In [10]:
import pandas as pd
from collections import Counter
from sklearn.preprocessing import LabelEncoder, MultiLabelBinarizer
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
import joblib



In [11]:
df=pd.read_csv('/content/MoviesOnStreamingPlatforms_updated.csv')

In [58]:
df.isna().sum()

Unnamed: 0,0
ID,0
Title,0
Year,0
Age,9390
IMDb,571
Rotten Tomatoes,11586
Netflix,0
Hulu,0
Prime Video,0
Disney+,0


In [59]:
# Clean only necessary fields
df = df.dropna(subset=["IMDb", "Genres"])

# Optionally fill less important fields
df["Directors"] = df["Directors"].fillna("Unknown")
df["Country"] = df["Country"].fillna("Unknown")
df["Language"] = df["Language"].fillna("Unknown")
df["Runtime"] = df["Runtime"].fillna(df["Runtime"].mean())
# Fill missing Age with 'Unknown' or a placeholder
df["Age"] = df["Age"].fillna("Unknown")

# Fill Rotten Tomatoes with 0 or average rating (if you prefer to keep the numeric type)
df["Rotten Tomatoes"] = df["Rotten Tomatoes"].str.rstrip('%')  # Remove % if present
df["Rotten Tomatoes"] = pd.to_numeric(df["Rotten Tomatoes"], errors="coerce")
df["Rotten Tomatoes"] = df["Rotten Tomatoes"].fillna(df["Rotten Tomatoes"].mean())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Directors"] = df["Directors"].fillna("Unknown")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Country"] = df["Country"].fillna("Unknown")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Language"] = df["Language"].fillna("Unknown")
A value is trying to be set on a copy of a slice from a

In [60]:
df.isna().sum()

Unnamed: 0,0
ID,0
Title,0
Year,0
Age,0
IMDb,0
Rotten Tomatoes,0
Netflix,0
Hulu,0
Prime Video,0
Disney+,0


In [61]:
#  New mood → therapeutic genre mapping
healing_genre_mapping = {
    "Sad": ["Comedy", "Adventure", "Animation", "Family"],
    "Scared": ["Romance", "Family", "Drama"],
    "Angry": ["Fantasy", "Sci-Fi", "Comedy", "Musical"],
    "Happy": ["Drama", "Biography", "Documentary"],
    "Neutral": ["Mystery", "Thriller", "Action"]
}

In [62]:
# Flatten healing genres into a list of allowed genres
allowed_genres = sorted(set(g for genres in healing_genre_mapping.values() for g in genres))


In [63]:
# Function to clean genres
def filter_genres(genres):
    if pd.isna(genres): return []
    genre_list = [g.strip() for g in genres.split(",")]
    return [g for g in genre_list if g in allowed_genres]

In [64]:
# Filter dataset to only include movies with allowed genres
df["FilteredGenres"] = df["Genres"].apply(filter_genres)
df = df[df["FilteredGenres"].map(len) > 0]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["FilteredGenres"] = df["Genres"].apply(filter_genres)


In [65]:
# Duplicate rows for each mood to create mood→genre training pairs
rows = []
for mood, genres_for_mood in healing_genre_mapping.items():
    for _, row in df.iterrows():
        if any(g in genres_for_mood for g in row["FilteredGenres"]):
            rows.append({"Mood": mood, "Genres": row["FilteredGenres"]})
healing_df = pd.DataFrame(rows)

In [66]:
# Encode mood (X) and genre list (y)
le = LabelEncoder()
X_encoded = le.fit_transform(healing_df["Mood"])

mlb = MultiLabelBinarizer()
y_encoded = mlb.fit_transform(healing_df["Genres"])

In [67]:
# Train model
model = OneVsRestClassifier(LogisticRegression(max_iter=200))
model.fit(X_encoded.reshape(-1, 1), y_encoded)


In [68]:
# Save model and encoders
joblib.dump(model, "mood_to_genre_model.pkl")
joblib.dump(le, "mood_label_encoder.pkl")
joblib.dump(mlb, "genre_binarizer.pkl")

print("✅ Model trained and saved: mood_to_genre_model.pkl")


✅ Model trained and saved: mood_to_genre_model.pkl


In [69]:
# Load everything (you can skip if it's already in memory)
model = joblib.load("mood_to_genre_model.pkl")
le = joblib.load("mood_label_encoder.pkl")
mlb = joblib.load("genre_binarizer.pkl")
df = pd.read_csv("/content/MoviesOnStreamingPlatforms_updated.csv")


In [70]:
#  INPUT: User mood (change this to test different moods)
user_mood = "Scared"  # Try: "Angry", "Sad", "Happy", etc.


In [71]:
# Predict healing genres
mood_encoded = le.transform([user_mood])
predicted_genres_binary = model.predict(mood_encoded.reshape(-1, 1))
predicted_genres = mlb.inverse_transform(predicted_genres_binary)[0]


In [72]:
print(f"\n For mood '{user_mood}', the model recommends genres: {predicted_genres}")


 For mood 'Scared', the model recommends genres: ('Drama',)


In [73]:
def match_genres(genre_str):
    if pd.isna(genre_str):
        return False
    genre_list = [g.strip() for g in genre_str.split(",")]
    return any(g in genre_list for g in predicted_genres)

recommended_movies = df[df["Genres"].apply(match_genres)]
recommended_movies = recommended_movies.sort_values(by="IMDb", ascending=False).head(10)

In [80]:
def recommend_by_mood_auto_platform(user_mood):
    """Recommends movies based on user mood and available platforms."""
    # Platform URL mapping
    platform_links = {
        "Netflix": "https://www.netflix.com",
        "Hulu": "https://www.hulu.com",
        "Prime Video": "https://www.primevideo.com",
        "Disney+": "https://www.disneyplus.com"
    }

    # Predict healing genres
    mood_encoded = le.transform([user_mood])
    predicted_genres_binary = model.predict(mood_encoded.reshape(-1, 1))
    predicted_genres = mlb.inverse_transform(predicted_genres_binary)[0]

    print(f"\n For mood '{user_mood}', the model recommends genres: {predicted_genres}")

    # Filter movies based on predicted genres
    def match_genres(genre_str):
        if pd.isna(genre_str):
            return False
        genre_list = [g.strip() for g in genre_str.split(",")]
        return any(g in genre_list for g in predicted_genres)

    recommended_movies = df[df["Genres"].apply(match_genres)]
    recommended_movies = recommended_movies.sort_values(by="IMDb", ascending=False).head(10)

    # Remove movies with no known platform
    def has_platform(row):
        return any(str(row.get(platform, 0)) == "1" for platform in ["Netflix", "Hulu", "Prime Video", "Disney+"])

    recommended_movies = recommended_movies[recommended_movies.apply(has_platform, axis=1)]

    if recommended_movies.empty:
        print("⚠️ No matching movies found with known platforms.")
        return


    # Show movie recommendations
    print("\n Top 10 Movie Recommendations:\n")
    for _, row in recommended_movies.iterrows():
        print(f" Title: {row['Title']}")
        print(f" IMDb: {row['IMDb']}")
        print(f" Genre: {row['Genres']}")

        # Create clickable platform links
        available = []
        for platform, link in platform_links.items():
            if str(row.get(platform, 0)) == "1":
                available.append(f"[{platform}]({link})")

        print(f"📺 Available on: {' | '.join(available)}")
        print("------------------------------------------------\n")

In [81]:
recommend_by_mood_auto_platform("Scared")


 For mood 'Scared', the model recommends genres: ('Drama',)

 Top 10 Movie Recommendations:

 Title: Bounty
 IMDb: 9.3
 Genre: Adventure,Drama,History,Romance
📺 Available on: [Prime Video](https://www.primevideo.com)
------------------------------------------------

 Title: Square One
 IMDb: 9.3
 Genre: Documentary,Drama,Music
📺 Available on: [Prime Video](https://www.primevideo.com)
------------------------------------------------

 Title: Love on a Leash
 IMDb: 9.3
 Genre: Comedy,Drama,Fantasy,Romance
📺 Available on: [Prime Video](https://www.primevideo.com)
------------------------------------------------

 Title: The Dark Knight
 IMDb: 9.0
 Genre: Action,Crime,Drama,Thriller
📺 Available on: [Hulu](https://www.hulu.com)
------------------------------------------------

 Title: A Dog Named Gucci
 IMDb: 9.0
 Genre: Documentary,Biography,Drama,News
📺 Available on: [Prime Video](https://www.primevideo.com)
------------------------------------------------

 Title: The Jones Family Will 

In [82]:
recommend_by_mood_auto_platform("Happy")


 For mood 'Happy', the model recommends genres: ('Drama',)

 Top 10 Movie Recommendations:

 Title: Bounty
 IMDb: 9.3
 Genre: Adventure,Drama,History,Romance
📺 Available on: [Prime Video](https://www.primevideo.com)
------------------------------------------------

 Title: Square One
 IMDb: 9.3
 Genre: Documentary,Drama,Music
📺 Available on: [Prime Video](https://www.primevideo.com)
------------------------------------------------

 Title: Love on a Leash
 IMDb: 9.3
 Genre: Comedy,Drama,Fantasy,Romance
📺 Available on: [Prime Video](https://www.primevideo.com)
------------------------------------------------

 Title: The Dark Knight
 IMDb: 9.0
 Genre: Action,Crime,Drama,Thriller
📺 Available on: [Hulu](https://www.hulu.com)
------------------------------------------------

 Title: A Dog Named Gucci
 IMDb: 9.0
 Genre: Documentary,Biography,Drama,News
📺 Available on: [Prime Video](https://www.primevideo.com)
------------------------------------------------

 Title: The Jones Family Will M