In [185]:
import pandas as pd
movies=  pd.read_csv('movies.csv')
credits = pd.read_csv('credits.csv')

In [186]:
movies = movies.merge(credits,on='title')

In [187]:
movies  = movies[['genres','id','keywords','title','original_language','overview','cast','crew']]

In [188]:
movies = movies.dropna()

In [189]:
import ast
def convertGenre(obj):
    list = []
    l2 = ast.literal_eval(obj)
    for i in l2:
        list.append(i['name'])
    return list

movies.loc[:,'genres'] = movies['genres'].apply(convertGenre)

In [190]:
movies.loc[:,'keywords'] = movies['keywords'].apply(convertGenre)

In [191]:
def convertCast(obj):
    list=  []
    ctr= 0
    l2 = ast.literal_eval(obj)
    for i in l2:
        list.append(i['name'])
        ctr+=1
        if(ctr==3):
            break
    return list 

In [192]:
movies.loc[:,'cast'] = movies['cast'].apply(convertCast)

In [193]:
def get_Director(obj):
    l2=  ast.literal_eval(obj)
    l = []
    for i in l2:
        if(i['job']=='Director'):
            l.append(i['name'])
            break
    return l
movies.loc[:,'crew'] = movies['crew'].apply(get_Director)    

In [194]:
movies.loc[:,'overview'] = movies['overview'].apply(lambda x:x.split())
def removeSpace(lst):
    return [i.replace(" ", "") for i in lst]

movies.loc[:,'genres'] = movies['genres'].apply(removeSpace)
movies.loc[:,'keywords'] = movies['keywords'].apply(removeSpace)
movies.loc[:,'cast'] = movies['cast'].apply(removeSpace)
movies.loc[:,'crew'] = movies['crew'].apply(removeSpace)

In [195]:
genre_to_mood = {
    "Action": "Excited",
    "Adventure": "Excited",
    "Animation": "Happy",
    "Comedy": "Happy",
    "Crime": "Thrilled",
    "Documentary": "Calm",
    "Drama": "Emotional",
    "Family": "Happy",
    "Fantasy": "Curious",
    "Foreign": "Curious",
    "History": "Curious",
    "Horror": "Thrilled",
    "Music": "Happy",
    "Mystery": "Curious",
    "Romance": "Romantic",
    "ScienceFiction": "Curious",
    "TVMovie": "Happy",
    "Thriller": "Thrilled",
    "War": "Emotional",
    "Western": "Excited"
}


In [196]:
def mood_from_genres(genres):
    for genre in genres:
        mood = genre_to_mood.get(genre)
        if mood:
            return mood
    return "Curious"  # default if no matching genre

# Apply mood tag to each row
movies['mood_tag'] = movies['genres'].apply(mood_from_genres)

In [197]:
movies['mood_tag'][0]

'Excited'

In [198]:
def getMov(genre):
    lst  = []
    for i in range(len(movies)):
        if(movies['mood_tag'].iloc[i]==genre):
            lst.append(movies['title'].iloc[i])
    return lst        

In [199]:
movies['tags'] =  movies['genres'] + movies['keywords']+movies['overview']+movies['cast']+movies['crew']

In [200]:
df = movies[['id','title','mood_tag','tags']]

In [201]:
df.loc[:,'tags'] = df['tags'].apply(lambda x:" ".join(x))

In [202]:
import nltk
from nltk.stem.porter import PorterStemmer
ps = PorterStemmer()

In [203]:
def convertStem(obj):
    lst = obj.split()
    ans=  []
    for i in lst:
        ans.append(ps.stem(i))
    return " ".join(ans)    

In [204]:
df.loc[:,'tags'] = df['tags'].apply(convertStem)

In [205]:
from sklearn.feature_extraction.text import CountVectorizer
cv = CountVectorizer(max_features=5000,stop_words='english')
cv.fit(df['tags'])

In [206]:
vectors = cv.transform(df['tags']).toarray()

In [207]:
from sklearn.metrics.pairwise import cosine_similarity
similar_movies = cosine_similarity(vectors)

In [208]:
#map for movies
movies_map = { title: idx for idx, title in enumerate(df['title']) }

In [209]:
def getMovies(movie):
    index = movies_map[movie]
    list_movies=  sorted(list(enumerate(similar_movies[index])),reverse=True,key=lambda x:x[1])[1:6]
    return list_movies

In [210]:
def getName(movie):
    lst = getMovies(movie)
    for i in lst:
        print(df.loc[i[0]].title)

In [211]:
getName('Ghost Ship')

Poseidon
The Boy
Supernova
5 Days of War
Titanic


# Sample Data

In [212]:
user_history = [
    ['The Dark Knight Rises','Avatar','Iron Man'],              #Every Movie ever
    ['Avatar','Iron Man','Jurassic World'],                     #Excited
    ['Cars 2','Meet the Fockers','Ted 2'],                      #Happy
    ['Now You See Me','The Conjuring 2','Final Destination 5'], #thrilled
    ['The Square','Ayurveda: Art of Being','Super Size Me'],    #Calm
    ['Titanic','The Martian','Unbroken'],                       #Emotional
    ['Gravity','The Time Machine','The Last Days on Mars'],     #Curious 
    ['Titanic','Friends with Benefits','To Rome with Love'],    #Romantic
]

# Recommendation

In [213]:
# function to get 10 recommended movies based on the last seen 3 movies 
def getRecommendation(recent_watched):
    top3 = recent_watched[-3:]
    movie_indices = [movies_map[i] for i in top3]
    
    recommended = []
    
    for idx in movie_indices:
        sim_scores = list(enumerate(similar_movies[idx]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:20]
        recommended.extend(sim_scores)

    recommended = sorted(recommended, key=lambda x: x[1], reverse=True)
    
    seen = set()
    final_recs = []
    for idx, score in recommended:
        if idx not in seen and df.iloc[idx]['title'] not in top3:
            final_recs.append(df.iloc[idx]['title'])
            seen.add(idx)
        if len(final_recs) == 10:
            break
    
    print(final_recs)

In [214]:
getRecommendation(user_history[5])

['Fort McCoy', 'Red Tails', 'Letters from Iwo Jima', 'Enemy at the Gates', 'Inchon', 'The Great Raid', 'Black Book', 'Windtalkers', 'Catch-22', 'Saints and Soldiers']


# Moods

In [215]:
def getMov(genre):
    lst  = []
    for i in range(len(movies)):
        if(movies['mood_tag'].iloc[i]==genre):
            lst.append(movies['title'].iloc[i])
    return lst      

# Location

In [216]:
#languages 
language_mapping = {
    'en': 'English',
    'ja': 'Japanese',
    'fr': 'French',
    'zh': 'Chinese',
    'es': 'Spanish',
    'de': 'German',
    'hi': 'Hindi',
    'ru': 'Russian',
    'ko': 'Korean',
    'te': 'Telugu',
    'cn': 'Chinese (alternative code)',  # non-standard, often used in datasets
    'it': 'Italian',
    'nl': 'Dutch',
    'ta': 'Tamil',
    'sv': 'Swedish',
    'th': 'Thai',
    'da': 'Danish',
    'hu': 'Hungarian',
    'cs': 'Czech',
    'pt': 'Portuguese',
    'is': 'Icelandic',
    'tr': 'Turkish',
    'nb': 'Norwegian Bokmål',
    'af': 'Afrikaans',
    'pl': 'Polish',
    'he': 'Hebrew',
    'ar': 'Arabic',
    'vi': 'Vietnamese',
    'ky': 'Kyrgyz',
    'id': 'Indonesian',
    'ro': 'Romanian',
    'fa': 'Persian (Farsi)',
    'no': 'Norwegian',
    'sl': 'Slovenian',
    'ps': 'Pashto',
    'el': 'Greek'
}

In [217]:
#country language mapping 
country_language_mapping = {
    'United States': 'en',
    'United Kingdom': 'en',
    'India': 'hi',
    'Japan': 'ja',
    'France': 'fr',
    'China': 'zh',
    'Spain': 'es',
    'Mexico': 'es',
    'Germany': 'de',
    'Russia': 'ru',
    'South Korea': 'ko',
    'Italy': 'it',
    'Netherlands': 'nl',
    'Sri Lanka': 'ta',
    'Sweden': 'sv',
    'Thailand': 'th',
    'Denmark': 'da',
    'Hungary': 'hu',
    'Czech Republic': 'cs',
    'Portugal': 'pt',
    'Brazil': 'pt',
    'Iceland': 'is',
    'Turkey': 'tr',
    'Norway': 'no',
    'South Africa': 'af',
    'Poland': 'pl',
    'Israel': 'he',
    'Saudi Arabia': 'ar',
    'Egypt': 'ar',
    'Vietnam': 'vi',
    'Kyrgyzstan': 'ky',
    'Indonesia': 'id',
    'Romania': 'ro',
    'Iran': 'fa',
    'Slovenia': 'sl',
    'Afghanistan': 'ps',
    'Pakistan': 'ps',
    'Greece': 'el',
}

In [218]:
#fetch random country specific movies 
import random
def country_movies(country):
    language_code = country_language_mapping.get(country)
    if not language_code:
        return []  # country not in mapping

    filtered = movies[movies['original_language'] == language_code]
    titles = filtered['title'].tolist()

    if len(titles) <= 10:
        return titles
    else:
        return random.sample(titles, 10)

    

In [219]:
country_movies('United States')

['Saint Ralph',
 'Hollywood Homicide',
 'The Hunger Games: Mockingjay - Part 1',
 'The Theory of Everything',
 'Road to Perdition',
 'Legally Blonde',
 'Dracula Untold',
 'Saw II',
 'A Thin Line Between Love and Hate',
 "My Boss's Daughter"]