In [27]:
import pandas as pd
import numpy as np

#loading and rating data

In [28]:
df= pd.read_csv("../data/netflix_titles.csv")

In [29]:
df = df[['title', 'type', 'listed_in', 'description']].dropna()
#keeping only the columns we need

In [30]:
df['combined_features'] = df['listed_in'] + ' ' + df['description']
#combining the columns we need to create a new column

In [31]:
df.head()

Unnamed: 0,title,type,listed_in,description,combined_features
0,Dick Johnson Is Dead,Movie,Documentaries,"As her father nears the end of his life, filmm...",Documentaries As her father nears the end of h...
1,Blood & Water,TV Show,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t...","International TV Shows, TV Dramas, TV Mysterie..."
2,Ganglands,TV Show,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...,"Crime TV Shows, International TV Shows, TV Act..."
3,Jailbirds New Orleans,TV Show,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo...","Docuseries, Reality TV Feuds, flirtations and ..."
4,Kota Factory,TV Show,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...,"International TV Shows, Romantic TV Shows, TV ..."


# filter WANT movies or Tv shows

In [32]:
content_type = 'Movie'

In [33]:
df_filtered = df[df['type']== content_type].reset_index(drop=True)

In [34]:
# Show how many titles are available of the selected type
print(f"Total {content_type}s available: {len(df_filtered)}")


Total Movies available: 6131


In [35]:
movie_count = len(df[df['type'] == 'Movie'])
tv_show_count = len(df[df['type'] == 'TV Show'])
print(f"Total Movies available: {movie_count}")
print(f"Total TV Shows available: {tv_show_count}")

Total Movies available: 6131
Total TV Shows available: 2676


In [36]:
df_filtered['combined_features'] = df_filtered['listed_in'] + ' ' + df_filtered['description']

In [37]:
from sklearn.feature_extraction.text import TfidfVectorizer
# Tfidf = term frequency-inverse document frequency

In [38]:
# Create TF-IDF vectorizer ignoring English stop word
tfidf = TfidfVectorizer(stop_words='english')

In [39]:
# Fit and transform the combined features into TF-IDF matrix
tfidf_matrix = tfidf.fit_transform(df_filtered['combined_features'])

fit means: learn all the words and their patterns.

transform means: convert the actual text to a matrix of numbers (one row per title)

now compute the cosine similer matrix


In [40]:

from sklearn.metrics.pairwise import cosine_similarity

In [41]:
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
# Get the index of the movie that matches the title

Recommendation function

In [43]:
def get_recommendations(title, cosine_sim=cosine_sim, df=df_filtered):
    """
    Return top 10 similar titles for a given title.
    """
    df_loc = df.reset_index(drop=True)
    title_to_index = pd.Series(df_loc.index, index=df_loc['title'])

    if title not in title_to_index:
        print(f"Title '{title}' not found. Please check spelling and try again.")
        return []

    idx = title_to_index[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:11]
    indices = [i[0] for i in sim_scores]

    return df_loc['title'].iloc[indices].tolist()


In [44]:
def get_recommendations_detailed(title, cosine_sim=cosine_sim, df=df_filtered):
    """
    Return a DataFrame with detailed info (title, type, genres, description) for top 10 similar titles.
    """
    df_loc = df.reset_index(drop=True)
    title_to_index = pd.Series(df_loc.index, index=df_loc['title'])

    if title not in title_to_index:
        print(f"Title '{title}' not found. Please check spelling and try again.")
        return pd.DataFrame()

    idx = title_to_index[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:11]
    indices = [i[0] for i in sim_scores]

    return df_loc.loc[indices, ['title', 'type', 'listed_in', 'description']]

In [45]:
if __name__ == "__main__":
    while True:
        content_type = input("Enter content type to get recommendations (Movie/TV Show) or 'exit' to quit: ").strip()
        if content_type.lower() == 'exit':
            print("Goodbye!")
            break
        if content_type not in ['Movie', 'TV Show']:
            print("Please enter either 'Movie' or 'TV Show'.")
            continue
        
        # Filter dataframe based on user choice
        df_filtered = df[df['type'] == content_type].reset_index(drop=True)
        df_filtered['combined_features'] = df_filtered['listed_in'] + ' ' + df_filtered['description']
        
        # Vectorize and compute cosine similarity
        tfidf = TfidfVectorizer(stop_words='english')
        tfidf_matrix = tfidf.fit_transform(df_filtered['combined_features'])
        cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
        
        print(f"\nTotal {content_type}s available: {len(df_filtered)}")
        print("Here are some sample titles you can try:")
        print(df_filtered['title'].sample(10).tolist())
        
        # Ask for a title
        title_to_test = input(f"\nEnter the exact {content_type} title for recommendations (or 'back' to choose content type): ").strip()
        if title_to_test.lower() == 'back':
            continue
        
        recommendations = get_recommendations_detailed(title_to_test, cosine_sim=cosine_sim, df=df_filtered)

        if not recommendations.empty:
            print(f"\n🎯 You selected: {title_to_test}\n")
            print("🎥 Recommended titles:\n")
            for i, row in recommendations.iterrows():
                print(f"🎬 Title       : {row['title']}")
                print(f"📂 Genre       : {row['listed_in']}")
                print(f"📝 Description : {row['description'][:150]}...\n")
        else:
            print("❌ No recommendations found. Try another title.\n")



Total TV Shows available: 2676
Here are some sample titles you can try:
['Deadly Sins', 'Derry Girls', 'The Business of Drugs', 'Mighty Morphin Power Rangers', 'Together', 'B: The Beginning', 'SHAMAN KING', 'Rhyme Time Town Singalongs', 'Motown Magic', 'Miraculous: Tales of Ladybug & Cat Noir']

🎯 You selected: Kota Factory

🎥 Recommended titles:

🎬 Title       : Cheese in the Trap
📂 Genre       : International TV Shows, Korean TV Shows, Romantic TV Shows
📝 Description : In this adaptation of a popular webtoon, a poor student trying to navigate college life gains the attention of a wealthy upperclassman with a dark sid...

🎬 Title       : Racket Boys
📂 Genre       : International TV Shows, TV Comedies, TV Dramas
📝 Description : A city kid is brought to the countryside by his father's new coaching gig: reviving a ragtag middle school badminton team on the brink of extinction....

🎬 Title       : The Creative Indians
📂 Genre       : Docuseries, International TV Shows
📝 Description : Fro

KeyboardInterrupt: Interrupted by user