In [6]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from difflib import get_close_matches

# Load datasets
movies = pd.read_csv('Dataset/tmdb_5000_movies.csv')
credits = pd.read_csv('Dataset/tmdb_5000_credits.csv')

# Merge on ID
movies = movies.merge(credits, on='title')

# Keep necessary columns
movies = movies[['movie_id', 'title', 'overview', 'genres', 'keywords', 'cast', 'crew']]

# Preprocessing function
import ast

def convert(obj):
    L = []
    for i in ast.literal_eval(obj):
        L.append(i['name'])
    return L

def get_director(obj):
    for i in ast.literal_eval(obj):
        if i['job'] == 'Director':
            return i['name']
    return ''

# Clean the data
movies.dropna(inplace=True)
movies['genres'] = movies['genres'].apply(convert)
movies['keywords'] = movies['keywords'].apply(convert)
movies['cast'] = movies['cast'].apply(lambda x: convert(x)[:3])
movies['crew'] = movies['crew'].apply(get_director)

# Create 'tags' column
movies['overview'] = movies['overview'].apply(lambda x: x.split())
movies['crew'] = movies['crew'].apply(lambda x: [x])
movies['tags'] = movies['overview'] + movies['genres'] + movies['keywords'] + movies['cast'] + movies['crew']
#new = movies[['movie_id', 'title', 'tags']]
#new['tags'] = new['tags'].apply(lambda x: " ".join(x).lower())
new = movies[['movie_id', 'title', 'tags']].copy()
new['tags'] = new['tags'].apply(lambda x: " ".join(x).lower())
# Vectorize
cv = CountVectorizer(max_features=5000, stop_words='english')
vectors = cv.fit_transform(new['tags']).toarray()
similarity = cosine_similarity(vectors)

# Recommendation by title
def recommend(movie):
    movie = movie.lower().strip()
    matched = new[new['title'].str.lower().str.strip() == movie]

    if matched.empty:
        close = get_close_matches(movie, new['title'].str.lower(), n=3, cutoff=0.6)
        return [f"Movie not found. Did you mean: {', '.join(close)}?"] if close else ["Movie not found."]

    index = matched.index[0]
    distances = sorted(list(enumerate(similarity[index])), reverse=True, key=lambda x: x[1])

    recommended_movies = []
    for i in distances[1:6]:
        recommended_movies.append(new.iloc[i[0]].title)
    
    return recommended_movies

# Mood-based genre mapping
mood_genre_map = {
    "happy": ["Comedy", "Family", "Adventure"],
    "sad": ["Drama", "Romance"],
    "excited": ["Action", "Thriller"],
    "scared": ["Horror", "Mystery"],
    "chill": ["Animation", "Fantasy"]
}

# Mood-based recommendation
def recommend_by_mood(mood):
    mood = mood.lower().strip()
    if mood not in mood_genre_map:
        return ["Invalid mood. Try happy, sad, excited, scared, or chill."]
    
    genres = mood_genre_map[mood]

    def genre_match(glist):
        return any(genre.lower() in [g.lower() for g in glist] for genre in genres)

    filtered = movies[movies['genres'].apply(genre_match)]
    filtered = filtered.dropna(subset=['overview'])

    filtered['tags'] = filtered['overview'] + filtered['keywords'] + filtered['cast']
    filtered['tags'] = filtered['tags'].apply(lambda x: " ".join(x).lower())

    cv_local = CountVectorizer(max_features=5000, stop_words='english')
    vectors = cv_local.fit_transform(filtered['tags']).toarray()
    similarity_local = cosine_similarity(vectors)

    top_indices = similarity_local.sum(axis=0).argsort()[-5:][::-1]
    return filtered.iloc[top_indices]['title'].values.tolist()

# Example usage
if __name__ == "__main__":
    print("ðŸŽ¬ Recommend based on movie name:")
    print(recommend("Avatar"))

    print("\nðŸŽ­ Recommend based on mood (e.g., happy, sad, chill):")
    print(recommend_by_mood("happy"))


ðŸŽ¬ Recommend based on movie name:
['Aliens', 'Moonraker', 'Alien', 'AlienÂ³', 'Silent Running']

ðŸŽ­ Recommend based on mood (e.g., happy, sad, chill):
['Mambo Italiano', 'Raising Helen', 'Party Monster', 'In the Name of the King: A Dungeon Siege Tale', 'Tiny Furniture']


In [7]:
import pickle

In [8]:
pickle.dump(new,open('model/movies_list.pkl','wb'))

In [9]:
pickle.dump(similarity, open('model/similarity.pkl','wb'))