In [1]:

import pandas as pd

df = pd.read_csv("all_ott_cleaned.csv")

df = df.sample(frac=1).reset_index(drop=True)
df.to_csv("all_ott_cleaned.csv", index=False)

# Fill NaNs
for col in ["genre", "description", "cast"]:
    if col in df.columns:
        df[col] = df[col].fillna("")

# Combine into one column
df["combined"] = df["title"] + " " + df["genre"] + " " + df["description"] + " " + df["cast"]


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def recommend_show(title):
    shows_df = df[df['type'].str.lower() == "tv show"].copy()
    if title not in shows_df['title'].values:
        return f"'{title}' not found in TV Shows."

    shows_df = shows_df.reset_index(drop=True)

    # make TF-IDF matrix
    tfidf = TfidfVectorizer(stop_words="english")
    tfidf_matrix = tfidf.fit_transform(shows_df["combined"])

    indx = shows_df[shows_df['title'] == title].index[0]

    # using cosine similarity
    similarityscores = list(enumerate(cosine_similarity(tfidf_matrix[indx], tfidf_matrix)[0]))
    similarityscores = sorted(similarityscores, key=lambda x: x[1], reverse=True)[1:6]

    topindices = [i[0] for i in similarityscores]

    return shows_df.loc[topindices, ["title", "platform", "genre", "type"]]


In [7]:
recommend_show("ZeroZeroZero")

Unnamed: 0,title,platform,genre,type
4229,Atlantic: A Year in the Wild,Amazon,"Documentary, Special Interest",TV Show
3438,Cocaine,Netflix,"British TV Shows, Crime TV Shows, Docuseries",TV Show
3807,Harold Shipman - Driven to Kill,Netflix,"British TV Shows, Crime TV Shows, Docuseries",TV Show
4447,Secret State,Amazon,"Drama, Suspense",TV Show
426,Dark Desire,Netflix,"Crime TV Shows, International TV Shows, Spanis...",TV Show


In [8]:
recommend_show("Queens")

Unnamed: 0,title,platform,genre,type
1346,The Real Queens of Hip-Hop: The Women Who Chan...,Hulu,"Music, News",TV Show
4356,Hip Hop Uncovered,Hulu,"Black Stories, Documentaries, Music",TV Show
270,Gabru: Hip Hop Revolution,Netflix,"International TV Shows, TV Dramas",TV Show
4276,Love & Hip Hop Atlanta,Hulu,"Music, Reality",TV Show
541,Hip-Hop Evolution,Netflix,"Docuseries, International TV Shows",TV Show


In [None]:
def recommend_movies(title):
    movies_df = df[df['type'].str.lower() == "movie"].copy()
    if title not in movies_df['title'].values:
        return f"'{title}' not found in Movies on selected platform."

    movies_df = movies_df.reset_index(drop=True)

    tfidf = TfidfVectorizer(stop_words="english")
    tfidf_matrix = tfidf.fit_transform(movies_df["combined"])

    idx = movies_df[movies_df['title'] == title].index[0]
    similarityscores = list(enumerate(cosine_similarity(tfidf_matrix[idx], tfidf_matrix)[0]))
    similarityscores = sorted(similarityscores, key=lambda x: x[1], reverse=True)[1:6]

    top_indices = [i[0] for i in similarityscores]
    return movies_df.loc[top_indices, ["title", "platform", "genre", "type"]]


In [18]:
recommend_movies("Silent Night")

Unnamed: 0,title,platform,genre,type
5477,Lucky Day,Hulu,"Action, Thriller",Movie
6238,Death in Texas,Hulu,"Action, Thriller",Movie
3507,Meat,Amazon,"Drama, Romance, Suspense",Movie
6973,The Artist,Hulu,"Comedy, Drama, Romance",Movie
14799,Sweet Virginia,Hulu,"Drama, Thriller",Movie


In [9]:
def find_your_mind():
    choice = input("What you want to watch?(Movie or TV Show): ").strip().lower()
    if choice not in ["movie", "tv show"]:
        return "Invalid input. write 'Movie' or 'TV Show'."

    year_pref = input("Recent (2020+) or Old (Before 2020)?(recent/old): ").strip().lower()
    if year_pref == "recent":
        year_filter = df["release_year"] >= 2020
    elif year_pref == "old":
        year_filter = df["release_year"] < 2020
    else:
        return "Invalid input."

    age = input("Your age: ").strip()
    try:
        age = int(age)
        if age < 13:
            rating_filter = df["rating"].isin(["G", "TV-G", "PG", "TV-Y", "TV-Y7"])
        elif age < 18:
            rating_filter = df["rating"].isin(["G", "PG", "PG-13", "TV-PG", "TV-14"])
        else:
            rating_filter = df["rating"].notna()
    except:
        return "Invalid age input."

    genres = input("Enter up to 3 genres(comma-separated): ").lower().split(",")
    genres = [g.strip() for g in genres if g.strip() != ""]

    platform = input("Preferred platform? (Netflix, Prime, Hulu, Disney+)(Optional): ").strip().lower()

    # Filtering
    filtered_df = df[
        (df["type"].str.lower() == choice) &
        year_filter &
        rating_filter
    ].copy()

    if platform:
        filtered_df = filtered_df[filtered_df["platform"].str.lower() == platform]

    # Genre filtering
    filtered_df = filtered_df[
        filtered_df["genre"].str.lower().apply(lambda g: any(gen in g for gen in genres))
    ]

    if filtered_df.empty:
        return "No recommendations found for your preferences."

    return filtered_df[["title", "genre", "release_year", "platform"]].head(10)



In [10]:
find_your_mind()

Unnamed: 0,title,genre,release_year,platform
117,Is there anybody there?,"Documentary, Drama, Faith and Spirituality",2020,Amazon
153,Charmed,"TV Dramas, TV Sci-Fi & Fantasy",2021,Netflix
308,3%,"International TV Shows, TV Dramas, TV Sci-Fi &...",2020,Netflix
356,Outer Banks,"TV Action & Adventure, TV Dramas, Teen TV Shows",2021,Netflix
396,Grace and Frankie,"TV Comedies, TV Dramas",2021,Netflix
493,Brazilian Empire,Drama,2020,Amazon
532,A Series of Light,"Drama, LGBTQ, Suspense",2020,Amazon
562,One Lane Bridge,Drama,2020,Amazon
587,"Nevertheless,","International TV Shows, Romantic TV Shows, TV ...",2021,Netflix
589,Racket Boys,"International TV Shows, TV Comedies, TV Dramas",2021,Netflix
