**Import libraries**

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics.pairwise import cosine_similarity

**Load and clean dataset**

In [4]:
df = pd.read_csv("netflix_titles.csv.zip")
df.fillna('', inplace=True)

**Prepare features **

In [5]:
df['text_features'] = (
    df['title'] + ' ' +
    df['director'] + ' ' +
    df['cast'] + ' ' +
    df['description'] + ' ' +
    df['listed_in']
)


** Preprocess genres**

In [6]:
df['genres'] = df['listed_in'].apply(lambda x: [i.strip() for i in x.split(',')])


** Multi-label encoding of genres**

In [7]:
mlb = MultiLabelBinarizer()
y = mlb.fit_transform(df['genres'])


** TF-IDF vectorization of text features **

In [8]:
tfidf = TfidfVectorizer(stop_words='english', max_features=5000)
X = tfidf.fit_transform(df['text_features'])


**Train-test split **

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

**Train Random Forest model for genre prediction**

In [10]:
model = RandomForestClassifier(n_estimators=120, random_state=42, n_jobs=-1)
model.fit(X_train, y_train)


**Predict genres for test set**

In [11]:
y_pred = model.predict(X_test)

**Evaluate performance **

In [12]:
subset_acc = accuracy_score(y_test, y_pred)
f1_macro = f1_score(y_test, y_pred, average='macro')

print(f"\n‚úÖ Genre Prediction Model Trained Successfully!")
print(f"Subset Accuracy: {subset_acc*100:.2f}%")
print(f"Macro F1-score: {f1_macro*100:.2f}%")



‚úÖ Genre Prediction Model Trained Successfully!
Subset Accuracy: 83.43%
Macro F1-score: 71.49%


**Show sample genre predictions**

In [13]:
inv_labels = mlb.inverse_transform(y_pred[:5])
for i, title in enumerate(df['title'].iloc[:5]):
    print(f"\nüé¨ Title: {title}")
    print("Predicted Genres:", inv_labels[i])



üé¨ Title: Dick Johnson Is Dead
Predicted Genres: ('Action & Adventure', 'Comedies')

üé¨ Title: Blood & Water
Predicted Genres: ('Stand-Up Comedy',)

üé¨ Title: Ganglands
Predicted Genres: ("Kids' TV",)

üé¨ Title: Jailbirds New Orleans
Predicted Genres: ('Docuseries', 'Reality TV')

üé¨ Title: Kota Factory
Predicted Genres: ('Crime TV Shows', 'International TV Shows', 'Spanish-Language TV Shows')


** Content-Based Recommendation System**

In [16]:
cosine_sim = cosine_similarity(X, X)
indices = pd.Series(df.index, index=df['title'].str.lower()).drop_duplicates()

def recommend(title, n=5):
    title = title.lower()
    if title not in indices:
        return f"‚ùå '{title}' not found in dataset."

    idx = indices[title]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:n+1]  # Skip itself
    rec_indices = [i[0] for i in sim_scores]
    return df[['title', 'type', 'listed_in', 'release_year']].iloc[rec_indices]


** Test Recommendation System**

In [18]:
movie_name = "Kota Factory"
print(f"\nüéØ Recommended titles similar to '{movie_name}':\n")
print(recommend(movie_name, 5))


üéØ Recommended titles similar to 'Kota Factory':

                                   title     type  \
2353                       Chaman Bahaar    Movie   
7918                      Sadqay Tumhare  TV Show   
3464                   Engineering Girls  TV Show   
2472                              Betaal  TV Show   
7632  O-Negative, Love Can‚Äôt Be Designed  TV Show   

                                              listed_in  release_year  
2353             Comedies, Dramas, International Movies          2020  
7918  International TV Shows, Romantic TV Shows, TV ...          2014  
3464                International TV Shows, TV Comedies          2018  
2472  International TV Shows, TV Action & Adventure,...          2020  
7632  International TV Shows, Romantic TV Shows, TV ...          2016  
