In [1]:

import pandas as pd

df = pd.read_csv("all_ott_cleaned.csv")

df = df.sample(frac=1).reset_index(drop=True)
df.to_csv("all_ott_cleaned.csv", index=False)

# Fill NaNs
for col in ["genre", "description", "cast"]:
    if col in df.columns:
        df[col] = df[col].fillna("")

# Combine into one column
df["combined"] = df["title"] + " " + df["genre"] + " " + df["description"] + " " + df["cast"]


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def recommend_show(title):
    shows_df = df[df['type'].str.lower() == "tv show"].copy()

    if title not in shows_df['title'].values:
        return f"'{title}' not found in TV Shows."

    # Re-index for internal use
    shows_df = shows_df.reset_index(drop=True)

    # make TF-IDF matrix
    tfidf = TfidfVectorizer(stop_words="english")
    tfidf_matrix = tfidf.fit_transform(shows_df["combined"])

    # index of the title become 0
    idx = shows_df[shows_df['title'] == title].index[0]

    # using cosine similarity
    sim_scores = list(enumerate(cosine_similarity(tfidf_matrix[idx], tfidf_matrix)[0]))

    # Sorted similarity score
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:6]

    top_indices = [i[0] for i in sim_scores]

    return shows_df.loc[top_indices, ["title", "platform", "genre", "type"]]


In [9]:
recommend_show("ZeroZeroZero")

Unnamed: 0,title,platform,genre,type
80,Atlantic: A Year in the Wild,Amazon,"Documentary, Special Interest",TV Show
6139,Cocaine,Netflix,"British TV Shows, Crime TV Shows, Docuseries",TV Show
27,Harold Shipman - Driven to Kill,Netflix,"British TV Shows, Crime TV Shows, Docuseries",TV Show
1883,Secret State,Amazon,"Drama, Suspense",TV Show
3548,Dark Desire,Netflix,"Crime TV Shows, International TV Shows, Spanis...",TV Show


In [10]:
recommend_show("Queens")
recommend_show("Hawkeye")

Unnamed: 0,title,platform,genre,type
1680,Mission Force One,Disney,"Action-Adventure, Animation, Kids",TV Show
4490,Halston,Netflix,TV Dramas,TV Show
1611,Archer,Hulu,"Action, Adult Animation, Adventure",TV Show
4413,Criminal Minds,Hulu,"Crime, Drama, Mystery",TV Show
1649,Unknown Waters with Jeremy Wade,Disney,"Animals & Nature, Docuseries",TV Show


In [17]:
def recommend_movies(title):
    movies_df = df[df['type'].str.lower() == "movie"].copy()
    if title not in movies_df['title'].values:
        return f"'{title}' not found in Movies on selected platform."

    movies_df = movies_df.reset_index(drop=True)

    tfidf = TfidfVectorizer(stop_words="english")
    tfidf_matrix = tfidf.fit_transform(movies_df["combined"])

    idx = movies_df[movies_df['title'] == title].index[0]
    sim_scores = list(enumerate(cosine_similarity(tfidf_matrix[idx], tfidf_matrix)[0]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:6]

    top_indices = [i[0] for i in sim_scores]
    return movies_df.loc[top_indices, ["title", "platform", "genre", "type"]]


In [18]:
recommend_movies("Silent Night")

Unnamed: 0,title,platform,genre,type
5477,Lucky Day,Hulu,"Action, Thriller",Movie
6238,Death in Texas,Hulu,"Action, Thriller",Movie
3507,Meat,Amazon,"Drama, Romance, Suspense",Movie
6973,The Artist,Hulu,"Comedy, Drama, Romance",Movie
14799,Sweet Virginia,Hulu,"Drama, Thriller",Movie


In [21]:
def find_your_mind():
    choice = input("What do you want to watch? (1 for Movie/2 for TV Show): ").strip().lower()
    if choice not in ["movie", "tv show"]:
        return "Invalid input. Choose 'Movie' or 'TV Show'."

    year_pref = input("📅 Recent (2020+) or Old (Before 2020)? (recent/old): ").strip().lower()
    if year_pref == "recent":
        year_filter = df["release_year"] >= 2020
    elif year_pref == "old":
        year_filter = df["release_year"] < 2020
    else:
        return "Invalid input for year preference."

    age = input("Your age (for content filtering): ").strip()
    try:
        age = int(age)
        if age < 13:
            rating_filter = df["rating"].isin(["G", "TV-G", "PG", "TV-Y", "TV-Y7"])
        elif age < 18:
            rating_filter = df["rating"].isin(["G", "PG", "PG-13", "TV-PG", "TV-14"])
        else:
            rating_filter = df["rating"].notna()
    except:
        return "Invalid age input."

    genres = input("🎭 Enter up to 3 genres (comma-separated): ").lower().split(",")
    genres = [g.strip() for g in genres if g.strip() != ""]

    platform = input("📺 Preferred platform? (Netflix, Prime, Hulu, Disney+) (Optional): ").strip().lower()

    # Filtering
    filtered_df = df[
        (df["type"].str.lower() == choice) &
        year_filter &
        rating_filter
    ].copy()

    if platform:
        filtered_df = filtered_df[filtered_df["platform"].str.lower() == platform]

    # Genre filtering
    filtered_df = filtered_df[
        filtered_df["genre"].str.lower().apply(lambda g: any(gen in g for gen in genres))
    ]

    if filtered_df.empty:
        return "No recommendations found for your preferences."

    return filtered_df[["title", "genre", "release_year", "platform"]].head(10)



In [22]:
find_your_mind()

Unnamed: 0,title,genre,release_year,platform
0,Toy Story 3,"Animation, Comedy, Drama",2010,Disney
3,Cars Toon: Air Mater,"Animation, Comedy, Family",2011,Disney
4,Tellur Aliens,Children & Family Movies,2016,Netflix
24,Deva Shapath Khota Saangen Khara Sangnar Nahi,Comedy,2006,Amazon
26,Porky's II: The Next Day,"Comedy, Young Adult Audience",1983,Amazon
33,Get Santa,"Children & Family Movies, Comedies",2014,Netflix
35,Jeff Dunham: Unhinged in Hollywood,Stand-Up Comedy,2015,Netflix
40,Donald Glover: Weirdo,Stand-Up Comedy,2012,Netflix
41,Mickey and the Seal,"Animation, Family",1948,Disney
42,UnAuthorized,"Arthouse, Comedy",2010,Amazon
