<a href="https://colab.research.google.com/github/nitish-pandey/Recommendation-System/blob/main/Content_Based_Filtering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import re

In [2]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

# Loading prepared data

In [3]:
features=pd.read_csv('features.csv',encoding='latin-1')
features.head()

Unnamed: 0,id,features
0,862,toy story john lasseter animation comedy famil...
1,8844,jumanji joe johnston adventure fantasy family ...
2,15602,grumpier old men howard deutch romance comedy ...
3,31357,waiting to exhale forest whitaker comedy drama...
4,11862,father of the bride part ii charles shyer come...


# Features Extraction

In [4]:
tf=TfidfVectorizer()
vectors=tf.fit_transform(features['features'])
cosine=cosine_similarity(vectors)
np.fill_diagonal(cosine,0)
similarity=pd.DataFrame(cosine,index=features.id,columns=features.id)

similarity.iloc[:10,:10]

id,862,8844,15602,31357,11862,949,11860,45325,9091,710
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
862,0.0,0.019921,0.007096,0.006289,0.007109,0.0,0.006885,0.056408,0.0,0.0
8844,0.019921,0.0,0.0,0.0,0.0,0.0,0.0,0.122181,0.013154,0.015464
15602,0.007096,0.0,0.0,0.014461,0.00634,0.0,0.015832,0.0,0.0,0.0
31357,0.006289,0.0,0.014461,0.0,0.005619,0.004195,0.014032,0.003673,0.0,0.0
11862,0.007109,0.0,0.00634,0.005619,0.0,0.0,0.006152,0.0,0.0,0.037729
949,0.0,0.0,0.0,0.004195,0.0,0.0,0.0,0.016955,0.021025,0.024717
11860,0.006885,0.0,0.015832,0.014032,0.006152,0.0,0.0,0.0,0.0,0.0
45325,0.056408,0.122181,0.0,0.003673,0.0,0.016955,0.0,0.0,0.046266,0.026828
9091,0.0,0.013154,0.0,0.0,0.0,0.021025,0.0,0.046266,0.0,0.033376
710,0.0,0.015464,0.0,0.0,0.037729,0.024717,0.0,0.026828,0.033376,0.0


#Recommending

In [5]:
def recommend(id,similarity):
    a=id
    s=list(enumerate(similarity[a]))
    s=sorted(s,key=lambda x: x[1], reverse=True)
    s=s[0:9]
    index=[add[0] for add in s]
    index=[features['id'][x] for x in index]
    return index

# Generate top similar movies for each one

In [6]:
def get_similar_movies(data,n=20):
    order=np.argsort(data.values,axis=1)[:, :n]
    similar=data.apply(lambda x : pd.Series(x.sort_values(ascending=False).iloc[:n].index,index=['top{}'.format(i) for i in range(1,n+1)]),axis=1)
    return similar

In [7]:
similar_movies=get_similar_movies(similarity,25)
similar_movies.head()

Unnamed: 0_level_0,top1,top2,top3,top4,top5,top6,top7,top8,top9,top10,...,top16,top17,top18,top19,top20,top21,top22,top23,top24,top25
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
862,863,869,15875,40820,8916,25447,8469,10306,9446,9663,...,12312,71881,47504,68924,926,6521,22073,5548,24645,21661
8844,2171,812,200,11005,199,177,18975,24100,193,47018,...,8494,13567,10249,879,2493,13466,331,15139,9354,11335
15602,11520,27472,1888,18080,11356,9504,23752,11575,52961,284,...,25697,2690,32488,13818,110643,30285,11857,32308,11077,14475
31357,1883,26149,12158,281,10384,21539,801,35868,15765,20565,...,33644,24679,9715,37667,10397,4816,93946,210307,12335,6028
11862,10385,242,24113,18417,696,11561,11686,703,10440,18254,...,20857,67307,52856,31955,2176,15867,9482,17908,104301,9819


In [8]:
recommend(862,similarity)

[863, 869, 15875, 40820, 8916, 25447, 8469, 10306, 9446]

Saving the similarities

In [9]:
similarity.to_csv("similarities.csv")

similar_movies.to_csv('similar_movies.csv')