<a href="https://colab.research.google.com/github/scienstien/Basics_Task/blob/main/spider_tasks_1B.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
# Step 1: Import Libraries
import pandas as pd
import numpy as np
import ast
import nltk
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.stem.porter import PorterStemmer

# Step 2: Load the Datasets
movies = pd.read_csv('tmdb_5000_movies.csv')
credits = pd.read_csv('tmdb_5000_credits.csv')

# Step 3: Merge the Datasets
credits.rename(columns={'movie_id': 'id'}, inplace=True)

# Merge on id
movies = movies.merge(credits, on='id')

# Check columns
print(movies.columns)

# Optional: keep only good 'title'
movies.rename(columns={'title_x': 'title'}, inplace=True)

# Now test:
print(movies['title'].head())


# Step 4: Select Useful Columns
movies = movies[['id', 'title', 'overview', 'genres', 'keywords', 'cast', 'crew']]

# Step 5: Data Preprocessing and Feature Extraction

# Function to convert JSON-like string columns to list of strings
def convert(obj):
    L = []
    for i in ast.literal_eval(obj):
        L.append(i['name'])
    return L

# Function to get top 3 actors
def convert_cast(obj):
    L = []
    counter = 0
    for i in ast.literal_eval(obj):
        if counter != 3:
            L.append(i['name'])
            counter += 1
        else:
            break
    return L

# Function to extract director name
def fetch_director(obj):
    L = []
    for i in ast.literal_eval(obj):
        if i['job'] == 'Director':
            L.append(i['name'])
            break
    return L

# Apply these functions to the columns
movies['genres'] = movies['genres'].apply(convert)
movies['keywords'] = movies['keywords'].apply(convert)
movies['cast'] = movies['cast'].apply(convert_cast)
movies['crew'] = movies['crew'].apply(fetch_director)

# Process 'overview' column
movies['overview'] = movies['overview'].apply(lambda x: x.split() if isinstance(x, str) else [])


# Step 6: Create 'tags' column
movies['tags'] = movies['overview'] + movies['genres'] + movies['keywords'] + movies['cast'] + movies['crew']
movies['tags'] = movies['tags'].apply(lambda x: " ".join(x))

# Step 7: Text Preprocessing — Lowercasing and Stemming
ps = PorterStemmer()

def stem(text):
    y = []
    for word in text.split():
        y.append(ps.stem(word))
    return " ".join(y)

movies['tags'] = movies['tags'].apply(lambda x: x.lower())
movies['tags'] = movies['tags'].apply(stem)

# Step 8: Vectorization
cv = CountVectorizer(max_features=5000, stop_words='english')
vectors = cv.fit_transform(movies['tags']).toarray()

# Step 9: Compute Cosine Similarity
similarity = cosine_similarity(vectors)

# Step 10: Build Recommender Function
def recommend(movie):
    if movie not in movies['title'].values:
        print(f"Movie '{movie}' not found in the database.")
        return

    movie_index = movies[movies['title'] == movie].index[0]
    distances = similarity[movie_index]
    movie_list = sorted(list(enumerate(distances)), reverse=True, key=lambda x: x[1])[1:11]

    print(f"\nTop 10 movies similar to **{movie}**:\n")
    for i in movie_list:
        print(movies.iloc[i[0]].title)

# Step 11: Test the Recommender
recommend('Avatar')
recommend('The Dark Knight')
recommend('Interstellar')


Index(['budget', 'genres', 'homepage', 'id', 'keywords', 'original_language',
       'original_title', 'overview', 'popularity', 'production_companies',
       'production_countries', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'status', 'tagline', 'title_x', 'vote_average',
       'vote_count', 'title_y', 'cast', 'crew'],
      dtype='object')
0                                      Avatar
1    Pirates of the Caribbean: At World's End
2                                     Spectre
3                       The Dark Knight Rises
4                                 John Carter
Name: title, dtype: object

Top 10 movies similar to **Avatar**:

Aliens
Silent Running
Alien
Moonraker
Mission to Mars
Alien³
Spaceballs
Lifeforce
Treasure Planet
Planet of the Apes

Top 10 movies similar to **The Dark Knight**:

The Dark Knight Rises
Batman Begins
Batman
Batman Forever
Batman Returns
Batman & Robin
Kick-Ass
Batman v Superman: Dawn of Justice
Gangster's Paradise: Jerusalema
The God