In [100]:
import pandas as pd
movies=  pd.read_csv('movies.csv')
credits = pd.read_csv('credits.csv')

In [101]:
movies = movies.merge(credits,on='title')

In [102]:
movies  = movies[['genres','id','keywords','title','original_language','overview','cast','crew']]

In [103]:
movies = movies.dropna()

In [104]:
import ast
def convertGenre(obj):
    list = []
    l2 = ast.literal_eval(obj)
    for i in l2:
        list.append(i['name'])
    return list

movies.loc[:,'genres'] = movies['genres'].apply(convertGenre)

In [105]:
movies.loc[:,'keywords'] = movies['keywords'].apply(convertGenre)

In [106]:
def convertCast(obj):
    list=  []
    ctr= 0
    l2 = ast.literal_eval(obj)
    for i in l2:
        list.append(i['name'])
        ctr+=1
        if(ctr==3):
            break
    return list 

In [107]:
movies.loc[:,'cast'] = movies['cast'].apply(convertCast)

In [108]:
def get_Director(obj):
    l2=  ast.literal_eval(obj)
    l = []
    for i in l2:
        if(i['job']=='Director'):
            l.append(i['name'])
            break
    return l
movies.loc[:,'crew'] = movies['crew'].apply(get_Director)    

In [109]:
movies.loc[:,'overview'] = movies['overview'].apply(lambda x:x.split())
def removeSpace(lst):
    return [i.replace(" ", "") for i in lst]

movies.loc[:,'genres'] = movies['genres'].apply(removeSpace)
movies.loc[:,'keywords'] = movies['keywords'].apply(removeSpace)
movies.loc[:,'cast'] = movies['cast'].apply(removeSpace)
movies.loc[:,'crew'] = movies['crew'].apply(removeSpace)

In [110]:
genre_to_mood = {
    "Action": "Excited",
    "Adventure": "Excited",
    "Animation": "Happy",
    "Comedy": "Happy",
    "Crime": "Thrilled",
    "Documentary": "Calm",
    "Drama": "Emotional",
    "Family": "Happy",
    "Fantasy": "Curious",
    "Foreign": "Curious",
    "History": "Curious",
    "Horror": "Thrilled",
    "Music": "Happy",
    "Mystery": "Curious",
    "Romance": "Romantic",
    "ScienceFiction": "Curious",
    "TVMovie": "Happy",
    "Thriller": "Thrilled",
    "War": "Emotional",
    "Western": "Excited"
}


In [111]:
def mood_from_genres(genres):
    for genre in genres:
        mood = genre_to_mood.get(genre)
        if mood:
            return mood
    return "Curious"  # default if no matching genre

# Apply mood tag to each row
movies['mood_tag'] = movies['genres'].apply(mood_from_genres)

In [112]:
def getMov(genre):
    lst  = []
    for i in range(len(movies)):
        if(movies['mood_tag'].iloc[i]==genre):
            lst.append(movies['title'].iloc[i])
    return lst        

In [113]:
movies['tags'] =  movies['genres'] + movies['keywords']+movies['overview']+movies['cast']+movies['crew']

In [114]:
df = movies[['id','title','mood_tag','tags']]

In [115]:
df.loc[:,'tags'] = df['tags'].apply(lambda x:" ".join(x))

In [116]:
import nltk
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()

In [117]:
def convertStem(obj):
    lst = obj.split()
    ans=  []
    for i in lst:
        ans.append(ps.stem(i))
    return " ".join(ans)    

In [118]:
df.loc[:,'tags'] = df['tags'].apply(convertStem)

In [119]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=5000,stop_words='english')
cv.fit(df['tags'])

In [120]:
vectors = cv.transform(df['tags']).toarray()

In [121]:
from sklearn.metrics.pairwise import cosine_similarity
similar_movies = cosine_similarity(vectors)

In [122]:
#map for movies
movies_map = {row['id']: idx for idx, row in df.iterrows()}

In [123]:
def getMovies(movie):
    index = movies_map[movie]
    list_movies=  sorted(list(enumerate(similar_movies[index])),reverse=True,key=lambda x:x[1])[1:6]
    return list_movies

In [124]:
def getName(movie):
    lst = getMovies(movie)
    for i in lst:
        print(df.loc[i[0]].title)

In [125]:
#hashMap for Movie Name to id
movie_id_map = dict(zip(df['title'], df['id']))

# Sample Data

In [126]:
user_history = [
    ['The Dark Knight Rises','Avatar','Iron Man'],              #Every Movie ever
    ['Avatar','Iron Man','Jurassic World'],                     #Excited
    ['Cars 2','Meet the Fockers','Ted 2'],                      #Happy
    ['Now You See Me','The Conjuring 2','Final Destination 5'], #thrilled
    ['The Square','Ayurveda: Art of Being','Super Size Me'],    #Calm
    ['Titanic','The Martian','Unbroken'],                       #Emotional
    ['Gravity','The Time Machine','The Last Days on Mars'],     #Curious 
    ['Titanic','Friends with Benefits','To Rome with Love'],    #Romantic
]

user_history_indices = [
    [49026, 19995, 1726],    #Every Movie ever
    [19995, 1726, 135397],   #Excited
    [49013, 693, 214756],    #Happy
    [75656, 259693, 55779],  #Thrilled
    [159037, 73981, 9372],   #Calm
    [597, 286217, 227306],   #Emotional
    [49047, 2135, 190847],   #Curious
    [597, 50544, 81836]      #Romantic
]

# Recommendation

In [127]:
# function to get 10 recommended movies based on the last seen 3 movies 
def getRecommendation(recent_watched):
    top3 = recent_watched[-3:] 
    movie_indices = [movies_map[i] for i in top3 if i in movies_map]

    recommended = []
    for idx in movie_indices:
        sim_scores = list(enumerate(similar_movies[idx]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:20]
        recommended.extend(sim_scores)

    recommended = sorted(recommended, key=lambda x: x[1], reverse=True)

    seen = set()
    final_recs = []
    for idx, score in recommended:
        movie_id = df.iloc[idx]['id']
        if movie_id not in seen and movie_id not in top3:
            final_recs.append(movie_id)
            seen.add(movie_id)
        if len(final_recs) == 10:
            break

    return final_recs

In [134]:
getRecommendation(user_history_indices[5])

[281230, 72431, 1251, 853, 113464, 13922, 9075, 12100, 10364, 10105]

# Moods

In [129]:
def getMov(genre):
    lst  = []
    for i in range(len(movies)):
        if(movies['mood_tag'].iloc[i]==genre):
            lst.append(movies['title'].iloc[i])
    return lst      

# Location

In [130]:
#languages 
language_mapping = {
    'en': 'English',
    'ja': 'Japanese',
    'fr': 'French',
    'zh': 'Chinese',
    'es': 'Spanish',
    'de': 'German',
    'hi': 'Hindi',
    'ru': 'Russian',
    'ko': 'Korean',
    'te': 'Telugu',
    'cn': 'Chinese (alternative code)',  # non-standard, often used in datasets
    'it': 'Italian',
    'nl': 'Dutch',
    'ta': 'Tamil',
    'sv': 'Swedish',
    'th': 'Thai',
    'da': 'Danish',
    'hu': 'Hungarian',
    'cs': 'Czech',
    'pt': 'Portuguese',
    'is': 'Icelandic',
    'tr': 'Turkish',
    'nb': 'Norwegian Bokmål',
    'af': 'Afrikaans',
    'pl': 'Polish',
    'he': 'Hebrew',
    'ar': 'Arabic',
    'vi': 'Vietnamese',
    'ky': 'Kyrgyz',
    'id': 'Indonesian',
    'ro': 'Romanian',
    'fa': 'Persian (Farsi)',
    'no': 'Norwegian',
    'sl': 'Slovenian',
    'ps': 'Pashto',
    'el': 'Greek'
}

In [131]:
#country language mapping 
country_language_mapping = {
    'United States': 'en',
    'United Kingdom': 'en',
    'India': 'hi',
    'Japan': 'ja',
    'France': 'fr',
    'China': 'zh',
    'Spain': 'es',
    'Mexico': 'es',
    'Germany': 'de',
    'Russia': 'ru',
    'South Korea': 'ko',
    'Italy': 'it',
    'Netherlands': 'nl',
    'Sri Lanka': 'ta',
    'Sweden': 'sv',
    'Thailand': 'th',
    'Denmark': 'da',
    'Hungary': 'hu',
    'Czech Republic': 'cs',
    'Portugal': 'pt',
    'Brazil': 'pt',
    'Iceland': 'is',
    'Turkey': 'tr',
    'Norway': 'no',
    'South Africa': 'af',
    'Poland': 'pl',
    'Israel': 'he',
    'Saudi Arabia': 'ar',
    'Egypt': 'ar',
    'Vietnam': 'vi',
    'Kyrgyzstan': 'ky',
    'Indonesia': 'id',
    'Romania': 'ro',
    'Iran': 'fa',
    'Slovenia': 'sl',
    'Afghanistan': 'ps',
    'Pakistan': 'ps',
    'Greece': 'el',
}

In [132]:
#fetch random country specific movies 
import random
def country_movies(country):
    language_code = country_language_mapping.get(country)
    if not language_code:
        return []  # country not in mapping

    filtered = movies[movies['original_language'] == language_code]
    movie_ids = filtered['id'].tolist()

    if len(movie_ids) <= 10:
        return movie_ids
    else:
        return random.sample(movie_ids, 10)

In [135]:
country_movies('India')

[480, 157293, 32740, 7913, 132316, 7504, 4251, 56666, 14165, 191714]