In [132]:
import pandas as pd
videos_df = pd.read_csv("videos_data.csv")
likes_df = pd.read_csv("likes.csv")

In [133]:
# combining into one column to simplify text processing
videos_df['title'] = videos_df['title'].fillna('')
videos_df['description'] = videos_df['description'].fillna('')
videos_df["text"] = (videos_df["title"] + " " + videos_df["description"]).str.lower().str.strip()
videos_df = videos_df.reset_index(drop=True)

In [134]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-MiniLM-L6-v2')

# Core mood groups for semantic mapping
group_keywords = [
    'relaxed', 'happy', 'focused', 'sad', 'inspired', 'anxious', 'bored',
    'romantic', 'energetic', 'nostalgic', 'frustrated', 'lonely', 'calm',
    'playful', 'creative'
]
group_vecs = model.encode(group_keywords)

def map_mood_nlp(user_input, threshold=0.4):
    user_vec = model.encode([user_input])
    sims = cosine_similarity(user_vec, group_vecs)[0]
    max_sim = sims.max()
    best_match = group_keywords[sims.argmax()]

    if max_sim >= threshold:
        return best_match
    else:
        print(f"⚠️ Unrecognized mood '{user_input}' — using as-is.")
        return user_input.lower()

In [135]:
videos_df.head()

Unnamed: 0,video_id,title,description,channel_title,publish_time,thumbnail_url,video_url,duration_minutes,formatted_duration,text
0,siJkBEk_LWo,Easy Acrylic Painting for Beginners | How to p...,►Visit to my channel :\n https://www.youtube.c...,Farjana Drawing Academy,2022-06-03T12:48:38Z,https://i.ytimg.com/vi/siJkBEk_LWo/mqdefault.jpg,https://www.youtube.com/watch?v=siJkBEk_LWo,0.33,0:20,easy acrylic painting for beginners | how to p...
1,Wla7FB3Vrm0,Basic Acrylic Painting Techniques for Beginners,This video will demonstrate what a wash is and...,Jennifer Funnell,2023-03-16T23:08:22Z,https://i.ytimg.com/vi/Wla7FB3Vrm0/mqdefault.jpg,https://www.youtube.com/watch?v=Wla7FB3Vrm0,4.8,4:48,basic acrylic painting techniques for beginner...
2,ppkv1bmGJUs,Acrylic Painting Tutorial for Beginners | Easy...,Learn how to paint a sunset over the mountains...,Createful Art with Ashley Krieger,2020-07-19T14:34:24Z,https://i.ytimg.com/vi/ppkv1bmGJUs/mqdefault.jpg,https://www.youtube.com/watch?v=ppkv1bmGJUs,16.23,16:14,acrylic painting tutorial for beginners | easy...
3,2YKw7rRuEco,Sunrise / Easy acrylic painting for beginners ...,This is a simple and easy acrylic painting for...,Grace J Art,2020-08-17T13:13:43Z,https://i.ytimg.com/vi/2YKw7rRuEco/mqdefault.jpg,https://www.youtube.com/watch?v=2YKw7rRuEco,12.65,12:39,sunrise / easy acrylic painting for beginners ...
4,Fvkyb4Al0EM,Easy way to paint with acrylics 🎨 #shorts #pai...,,ColorByFeliks,2023-03-06T23:56:20Z,https://i.ytimg.com/vi/Fvkyb4Al0EM/mqdefault.jpg,https://www.youtube.com/watch?v=Fvkyb4Al0EM,0.52,0:31,easy way to paint with acrylics 🎨 #shorts #pai...


In [136]:
likes_df.head()

Unnamed: 0,user_id,video_id,liked,skill_level,mood
0,1,9YM8bbrICzc,1,advanced,relaxed
1,1,lKuzi4DGE84,1,advanced,relaxed
2,1,d5IlXOp4HQ0,1,advanced,relaxed
3,1,_qB7QrBaGfk,1,advanced,relaxed
4,1,mFWbzjuPMns,1,advanced,relaxed


# **Content Based Filtering**

In [137]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# using tfidf to find important words in description and title of video
# using cosine similarity between text fields for each video
tfidf = TfidfVectorizer(stop_words = "english")
tfidf_matrix = tfidf.fit_transform(videos_df["text"])

# cosine_sim[i][j] = how similar video i is to video j
cosine_sim = cosine_similarity(tfidf_matrix)
print(cosine_sim)

[[1.         0.06802562 0.21415211 ... 0.03794666 0.         0.22202231]
 [0.06802562 1.         0.12887599 ... 0.10434611 0.         0.06335573]
 [0.21415211 0.12887599 1.         ... 0.08144966 0.         0.22007605]
 ...
 [0.03794666 0.10434611 0.08144966 ... 1.         0.0496909  0.05222685]
 [0.         0.         0.         ... 0.0496909  1.         0.00989516]
 [0.22202231 0.06335573 0.22007605 ... 0.05222685 0.00989516 1.        ]]


In [138]:
def get_similar_content_videos(video_id, top_k = 5):
  idx = videos_df.index[videos_df['video_id'] == video_id][0]
  # maps how much similar each video is to video_id -> ex: (Video 1, 0.67)
  sim_scores = list(enumerate(cosine_sim[idx]))
  # sorts from most similar to least similar
  sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
  # gets indexes of k most similar videos, skips first video bc video will always be most similar to itself
  top_idxs = [i for i, score in sim_scores[1:top_k+1]]
  # grabs videos from database corresponding to top_idxs
  return videos_df.iloc[top_idxs][['video_id', 'title']]

In [139]:
similar_videos = get_similar_content_videos("yMDzgI4iBuI", top_k=5)
print(similar_videos)

        video_id                                              title
162  P4m1bpz2rPg  rainbow landscape painting / easy acrylic pain...
99   d76y08bE5tw  Easy way to paint clouds / acrylic painting id...
62   te9DoQl7yWQ  super easy sunset painting / acrylic painting ...
79   0lw2Rxs-qmY  Easy way to paint a night sky / acrylic painti...
201  _yPw7OihZfc  galaxy painting on black canvas / easy acrylic...


## **Collaborative Filtering**

In [140]:
def get_user_item_matrix(mood, skill_level):
  # filtering likes based on mood and skill level
  subset = likes_df[(likes_df["mood"] == mood) & (likes_df["skill_level"] == skill_level)]
  user_item = pd.crosstab(subset['user_id'], subset['video_id'])
  return user_item

In [141]:
def get_similar_users(user_id, mood, skill_level, top_k = 3):
  user_item_matrix = get_user_item_matrix(mood, skill_level)
  if user_id not in user_item_matrix.index:
        print("User has no interactions under that mood/skill level.")
        return []

  # gets specific row of user_item matrix with user_id
  user_vector = user_item_matrix.loc[[user_id]]
  # gets similarity from user_id to every other user
  sims = cosine_similarity(user_vector, user_item_matrix)[0]

  # get top k similar users
  user_item_matrix['similarity'] = sims
  similar_users = user_item_matrix.sort_values('similarity', ascending=False).iloc[1:top_k+1]
  return similar_users.index.tolist()

In [142]:
def recommend_from_users(user_id, mood, skill_level):
  mood = map_mood_nlp(mood)
  similar_users = get_similar_users(user_id, mood, skill_level)
  if not similar_users:
        return []

  # get the videos liked by similar users within same context (mood and skill level)
  sim_likes = likes_df[(likes_df["user_id"].isin(similar_users)) & (likes_df['mood'] == mood) & (likes_df['skill_level'] == skill_level)]
  # videos the user already liked
  user_liked = likes_df[(likes_df['user_id'] == user_id) & (likes_df['mood'] == mood) & (likes_df['skill_level'] == skill_level)]['video_id'].tolist()

  # remove videos that user has already liked from list of liked videos by other similar users
  recs = sim_likes[~sim_likes['video_id'].isin(user_liked)]
  return recs['video_id'].value_counts().index.tolist()

In [143]:
extra_likes = pd.DataFrame([
    {'user_id': 100, 'video_id': 'rec_video_1', 'liked': 1, 'mood': 'relaxed', 'skill_level': 'advanced'},
    {'user_id': 101, 'video_id': 'rec_video_2', 'liked': 1, 'mood': 'relaxed', 'skill_level': 'advanced'},
    {'user_id': 102, 'video_id': 'rec_video_3', 'liked': 1, 'mood': 'relaxed', 'skill_level': 'advanced'}
])

likes_df = pd.concat([likes_df, extra_likes], ignore_index=True)

In [144]:
recommendations = recommend_from_users(user_id=1, mood='relaxed', skill_level='advanced')
print("CF Recommendations for user 1:", recommendations)

CF Recommendations for user 1: ['rec_video_1', 'rec_video_2', 'rec_video_3']


## **Hybrid Recommendations**

In [145]:
def hybrid_recommend(user_id, video_id, mood, skill_level, top_k = 5):
  mood = map_mood_nlp(mood)

  # content based filtering
  cbf_recs = get_similar_content_videos(video_id, top_k = top_k)
  cbf_videos = cbf_recs['video_id'].tolist()

  # collaborative filtering
  cf_videos = recommend_from_users(user_id, mood, skill_level)

  # join video ideas (no duplicates)
  combined_ids = list(dict.fromkeys(cbf_videos + cf_videos))

  final_recs = videos_df[videos_df['video_id'].isin(combined_ids)].copy()

  final_recs['source'] = final_recs['video_id'].apply(
    lambda vid: 'both' if vid in cbf_videos and vid in cf_videos
    else 'cbf' if vid in cbf_videos
    else 'cf'
)

  return final_recs

In [149]:
recs = hybrid_recommend(
    user_id=1,
    video_id='GwrAnsXkxIo',
    mood='relaxed',
    skill_level='advanced',
    top_k=5
)

print(recs)

        video_id                                              title  \
241  zSdqkSTfB0Q  Full Moon Painting / Acrylic Painting for Begi...   
339  4jNm_ul8AC0  How to Paint a Morning Forest Scene with Acrylics   
350  7lPm16QXis8  How to Draw a Sunset Seascape / Acrylic Painti...   
355  buFIvYkjJ4E  How to Draw a Sunset Seascape / Acrylic Painti...   
372  l0znKG2EdQo                  Seascape Acrylic Painting #shorts   

                                           description  \
241  Today, I drew a full moon landscape,\nBeginner...   
339  Today, I painted a view of the morning forest....   
350  Today, I painted  a view of Sunset Sea.\nBegin...   
355  Today, I painted  a view of Sunset Sea.\nBegin...   
372  Seascape Acrylic Painting\n#art #acrylicpainti...   

                 channel_title          publish_time  \
241                  Joony art  2020-11-16T14:00:21Z   
339                  Joony art  2025-04-21T14:00:05Z   
350  Acrylic Painting Tutorial  2024-10-14T10:34:00Z   
