# Movie Recommender System

### Package import

In [None]:
# !pip install pandas
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pickle

### Data Preparation
Use Pandas to load and preprocess movie data, including creating a combined 'tag' column from 'overview' and 'genres'.

In [3]:
movie_data = pd.read_csv('tmdb_movies_data.csv')
movie_data = movie_data[['id', 'original_title', 'overview', 'genres']]
movie_data['tag'] = movie_data['overview'] + movie_data['genres']
movie_data = movie_data.drop(columns=['overview', 'genres'])

### Feature Extraction
`CountVectorizer` is used to convert the text data into a numerical vector, which is later used for similarity computation.

In [89]:
cv = CountVectorizer(max_features=10866, stop_words='english')

In [90]:
vector = cv.fit_transform(movie_data['tag'].values.astype('U')).toarray()

### Similarity C

In [92]:
similarity = cosine_similarity(vector)

In [96]:
pickle.dump(movie_data, open('movie_list.pkl', 'wb'))

In [97]:
pickle.dump(similarity, open('similarity.pkl', 'wb'))