### Imports


In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors

### Reading the Data

In [2]:
df = pd.read_csv("data.csv")

### Data Modification

In [3]:
# lowering the threshold gives you more niche shows
VOTE_THRESHOLD = 99 

# removes shows that not a lot of people have rated
df = df[df['Votes'] >= VOTE_THRESHOLD].reset_index(drop=True)

### TF-IDF Vectorization

In [4]:
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(df['Genre(s)'])

### Modeling

In [5]:
nn_model = NearestNeighbors(metric='cosine', algorithm='brute')
nn_model.fit(tfidf_matrix)

### Finding shows based on user input

Input a list of shows

In [6]:
my_shows = ['Sherlock', 'Hannibal', 'How to Get Away with Murder']

Attempt to find the provided shows in dataset, if not found add it to `missing_titles`

In [7]:
liked_show_indices = []
missing_titles = []

for title in my_shows:
    # case-insensitive partial match
    match = df[df['Title'].str.lower().str.contains(title.lower(), na=False)]
    if not match.empty:
        liked_show_indices.append(match.index[0])
    else:
        missing_titles.append(title)

Log non-existing shows

In [8]:
if missing_titles:
    print("Skipped (not found in dataset):", ", ".join(missing_titles))

If no liked show was found, fallback and just give a list of popular shows

In [9]:
if not liked_show_indices:
    print("\nℹ No liked shows found in dataset. Showing top popular shows instead:\n")
    
    # popularity score = rating × log10(Votes)
    df['popularity_score'] = df['Rating'] * np.log10(df['Votes'])
    
    top_shows = df.sort_values('popularity_score', ascending=False).head(10)
    print(top_shows[['Title', 'Rating']])

### Finally

In [10]:
if liked_show_indices:

    recommendations = set()

    for i in liked_show_indices:

        distances, indices = nn_model.kneighbors(tfidf_matrix[i], n_neighbors=8)

        for j in indices.flatten():
            if j != i:  # avoid recommending the same show
                recommendations.add(j)

    # create recommendations DataFrame
    rec_df = df.iloc[list(recommendations)].copy()

    #exclude shows already liked
    rec_df = rec_df[~rec_df['Title'].str.lower().isin([s.lower() for s in my_shows])]

    # sort by Rating
    rec_df = rec_df.sort_values('Rating', ascending=False)

    print("\n Top Recommendations:")
    print(rec_df[['Title', 'Rating']].head(15))



 Top Recommendations:
                  Title  Rating
560       The Mentalist   8.397
2480                BMF   8.200
2777  Dexter: New Blood   8.164
1907                You   8.106
1455             Narcos   7.989
2432           Get Even   7.800
1675           El Chema   7.652
1898             Harrow   7.600
531             Damages   7.576
75          Law & Order   7.574
64    Murder, She Wrote   7.569
1515            Trapped   7.400
1852            McMafia   7.299
2820       Kaleidoscope   7.200
2801         Stay Close   6.974
