## YouTube Recomendation system

### Content-based filtering:
### is a recommendation system that makes recommendations to users based on their past interactions with content. The idea behind content-based filtering is to recommend content that is similar to what the user has already shown an interest in.

In [1]:
#pip install --upgrade pandas==1.3.4

In [2]:
import joblib
import pickle

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from ast import literal_eval
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
from nltk.stem.snowball import SnowballStemmer
from nltk.stem.wordnet import WordNetLemmatizer
from nltk.corpus import wordnet
from nltk.corpus import stopwords
import re
import nltk
from surprise import Reader, Dataset, SVD, SVDpp, model_selection, NormalPredictor, KNNBasic, KNNWithMeans, KNNWithZScore, KNNBaseline, BaselineOnly, NMF, SlopeOne, CoClustering, accuracy
from surprise.accuracy import rmse
from surprise.model_selection import cross_validate, train_test_split

# Set up inline plotting
%matplotlib inline

# Silence warnings
import warnings
warnings.simplefilter('ignore')

# Uncomment if needed:
# nltk.download('stopwords')

In [4]:
pd.__version__

'1.3.4'

In [5]:
youtube= pd.read_csv("IN_youtube_trending_data.csv")

In [6]:
youtube=youtube[:30000]

In [7]:
youtube.head(2)

Unnamed: 0,video_id,title,publishedAt,channelId,channelTitle,categoryId,trending_date,tags,view_count,likes,dislikes,comment_count,thumbnail_link,comments_disabled,ratings_disabled,description
0,Iot0eF6EoNA,Sadak 2 | Official Trailer | Sanjay | Pooja | ...,2020-08-12T04:31:41Z,UCGqvJPRcv7aVFun-eTsatcA,FoxStarHindi,24,2020-08-12T00:00:00Z,sadak|sadak 2|mahesh bhatt|vishesh films|pooja...,9885899,224925,3979409,350210,https://i.ytimg.com/vi/Iot0eF6EoNA/default.jpg,False,False,Three Streams. Three Stories. One Journey. Sta...
1,x-KbnJ9fvJc,Kya Baat Aa : Karan Aujla (Official Video) Tan...,2020-08-11T09:00:11Z,UCm9SZAl03Rev9sFwloCdz1g,Rehaan Records,10,2020-08-12T00:00:00Z,[None],11308046,655450,33242,405146,https://i.ytimg.com/vi/x-KbnJ9fvJc/default.jpg,False,False,Singer/Lyrics: Karan Aujla Feat Tania Music/ D...


In [8]:
quartiles = youtube['likes'].quantile([0, 0.2, 0.4, 0.6, 0.8, 1.0])

In [9]:
# Create a new column for ratings based on the quartiles
youtube['Rating'] = pd.cut(youtube['likes'], bins=quartiles, labels=[0,1,2,3,4])

In [10]:
youtube.head(5)

Unnamed: 0,video_id,title,publishedAt,channelId,channelTitle,categoryId,trending_date,tags,view_count,likes,dislikes,comment_count,thumbnail_link,comments_disabled,ratings_disabled,description,Rating
0,Iot0eF6EoNA,Sadak 2 | Official Trailer | Sanjay | Pooja | ...,2020-08-12T04:31:41Z,UCGqvJPRcv7aVFun-eTsatcA,FoxStarHindi,24,2020-08-12T00:00:00Z,sadak|sadak 2|mahesh bhatt|vishesh films|pooja...,9885899,224925,3979409,350210,https://i.ytimg.com/vi/Iot0eF6EoNA/default.jpg,False,False,Three Streams. Three Stories. One Journey. Sta...,4
1,x-KbnJ9fvJc,Kya Baat Aa : Karan Aujla (Official Video) Tan...,2020-08-11T09:00:11Z,UCm9SZAl03Rev9sFwloCdz1g,Rehaan Records,10,2020-08-12T00:00:00Z,[None],11308046,655450,33242,405146,https://i.ytimg.com/vi/x-KbnJ9fvJc/default.jpg,False,False,Singer/Lyrics: Karan Aujla Feat Tania Music/ D...,4
2,KX06ksuS6Xo,Diljit Dosanjh: CLASH (Official) Music Video |...,2020-08-11T07:30:02Z,UCZRdNleCgW-BGUJf-bbjzQg,Diljit Dosanjh,10,2020-08-12T00:00:00Z,clash diljit dosanjh|diljit dosanjh|diljit dos...,9140911,296533,6179,30058,https://i.ytimg.com/vi/KX06ksuS6Xo/default.jpg,False,False,CLASH official music video performed by DILJIT...,4
3,UsMRgnTcchY,Dil Ko Maine Di Kasam Video | Amaal M Ft.Ariji...,2020-08-10T05:30:49Z,UCq-Fj5jknLsUf-MWSy4_brA,T-Series,10,2020-08-12T00:00:00Z,hindi songs|2020 hindi songs|2020 new songs|t-...,23564512,743931,84162,136942,https://i.ytimg.com/vi/UsMRgnTcchY/default.jpg,False,False,Gulshan Kumar and T-Series presents Bhushan Ku...,4
4,WNSEXJJhKTU,"Baarish (Official Video) Payal Dev,Stebin Ben ...",2020-08-11T05:30:13Z,UCye6Oz0mg46S362LwARGVcA,VYRLOriginals,10,2020-08-12T00:00:00Z,VYRL Original|Mohsin Khan|Shivangi Joshi|Payal...,6783649,268817,8798,22984,https://i.ytimg.com/vi/WNSEXJJhKTU/default.jpg,False,False,VYRL Originals brings to you ‘Baarish’ - the b...,4


In [11]:
youtube['Rating'].unique()

[4, 2, 1, 3, 0, NaN]
Categories (5, int64): [0 < 1 < 2 < 3 < 4]

In [12]:
youtube.dropna(inplace=True)

In [13]:
# Get the number of rows in the youtube DataFrame
num_rows = len(youtube)

# Create a list of values for the new column
values = list(range(1, num_rows + 1))

# Assign the new column to 'userID' in the youtube DataFrame
youtube['userID'] = values

In [14]:
youtube.shape

(29400, 18)

In [15]:
youtube.columns

Index(['video_id', 'title', 'publishedAt', 'channelId', 'channelTitle',
       'categoryId', 'trending_date', 'tags', 'view_count', 'likes',
       'dislikes', 'comment_count', 'thumbnail_link', 'comments_disabled',
       'ratings_disabled', 'description', 'Rating', 'userID'],
      dtype='object')

In [16]:
youtube.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 29400 entries, 0 to 29999
Data columns (total 18 columns):
 #   Column             Non-Null Count  Dtype   
---  ------             --------------  -----   
 0   video_id           29400 non-null  object  
 1   title              29400 non-null  object  
 2   publishedAt        29400 non-null  object  
 3   channelId          29400 non-null  object  
 4   channelTitle       29400 non-null  object  
 5   categoryId         29400 non-null  int64   
 6   trending_date      29400 non-null  object  
 7   tags               29400 non-null  object  
 8   view_count         29400 non-null  int64   
 9   likes              29400 non-null  int64   
 10  dislikes           29400 non-null  int64   
 11  comment_count      29400 non-null  int64   
 12  thumbnail_link     29400 non-null  object  
 13  comments_disabled  29400 non-null  bool    
 14  ratings_disabled   29400 non-null  bool    
 15  description        29400 non-null  object  
 16  Rati

In [17]:
YouTube_df = youtube[['userID','video_id','title','channelTitle','categoryId','tags','likes','description','channelId','thumbnail_link','Rating']]

In [18]:
YouTube_df.head(2)

Unnamed: 0,userID,video_id,title,channelTitle,categoryId,tags,likes,description,channelId,thumbnail_link,Rating
0,1,Iot0eF6EoNA,Sadak 2 | Official Trailer | Sanjay | Pooja | ...,FoxStarHindi,24,sadak|sadak 2|mahesh bhatt|vishesh films|pooja...,224925,Three Streams. Three Stories. One Journey. Sta...,UCGqvJPRcv7aVFun-eTsatcA,https://i.ytimg.com/vi/Iot0eF6EoNA/default.jpg,4
1,2,x-KbnJ9fvJc,Kya Baat Aa : Karan Aujla (Official Video) Tan...,Rehaan Records,10,[None],655450,Singer/Lyrics: Karan Aujla Feat Tania Music/ D...,UCm9SZAl03Rev9sFwloCdz1g,https://i.ytimg.com/vi/x-KbnJ9fvJc/default.jpg,4


In [19]:
YouTube_df.title

0        Sadak 2 | Official Trailer | Sanjay | Pooja | ...
1        Kya Baat Aa : Karan Aujla (Official Video) Tan...
2        Diljit Dosanjh: CLASH (Official) Music Video |...
3        Dil Ko Maine Di Kasam Video | Amaal M Ft.Ariji...
4        Baarish (Official Video) Payal Dev,Stebin Ben ...
                               ...                        
29995    India celebrate a win for the ages at the Gabb...
29996    Jethalal Ke Haath Ki Chai | Taarak Mehta Ka Oo...
29997    Bruised, abused but conquered! India stun Aust...
29998    Kumkum Bhagya | Premiere Episode 1751 Preview ...
29999    SISTERS Season 2 | Episode 5 | Girl Formula | ...
Name: title, Length: 29400, dtype: object

In [20]:
YouTube_df["title"] = YouTube_df["title"].str.split('|').str[0].str.strip()

In [21]:
YouTube_df.head(2)

Unnamed: 0,userID,video_id,title,channelTitle,categoryId,tags,likes,description,channelId,thumbnail_link,Rating
0,1,Iot0eF6EoNA,Sadak 2,FoxStarHindi,24,sadak|sadak 2|mahesh bhatt|vishesh films|pooja...,224925,Three Streams. Three Stories. One Journey. Sta...,UCGqvJPRcv7aVFun-eTsatcA,https://i.ytimg.com/vi/Iot0eF6EoNA/default.jpg,4
1,2,x-KbnJ9fvJc,Kya Baat Aa : Karan Aujla (Official Video) Tania,Rehaan Records,10,[None],655450,Singer/Lyrics: Karan Aujla Feat Tania Music/ D...,UCm9SZAl03Rev9sFwloCdz1g,https://i.ytimg.com/vi/x-KbnJ9fvJc/default.jpg,4


In [22]:
YouTube_df['tags'] = YouTube_df['tags'].str.replace('|', ' ')

In [23]:
YouTube_df.head(3)

Unnamed: 0,userID,video_id,title,channelTitle,categoryId,tags,likes,description,channelId,thumbnail_link,Rating
0,1,Iot0eF6EoNA,Sadak 2,FoxStarHindi,24,sadak sadak 2 mahesh bhatt vishesh films pooja...,224925,Three Streams. Three Stories. One Journey. Sta...,UCGqvJPRcv7aVFun-eTsatcA,https://i.ytimg.com/vi/Iot0eF6EoNA/default.jpg,4
1,2,x-KbnJ9fvJc,Kya Baat Aa : Karan Aujla (Official Video) Tania,Rehaan Records,10,[None],655450,Singer/Lyrics: Karan Aujla Feat Tania Music/ D...,UCm9SZAl03Rev9sFwloCdz1g,https://i.ytimg.com/vi/x-KbnJ9fvJc/default.jpg,4
2,3,KX06ksuS6Xo,Diljit Dosanjh: CLASH (Official) Music Video,Diljit Dosanjh,10,clash diljit dosanjh diljit dosanjh diljit dos...,296533,CLASH official music video performed by DILJIT...,UCZRdNleCgW-BGUJf-bbjzQg,https://i.ytimg.com/vi/KX06ksuS6Xo/default.jpg,4


In [24]:
YouTube_df.isnull().sum()

userID            0
video_id          0
title             0
channelTitle      0
categoryId        0
tags              0
likes             0
description       0
channelId         0
thumbnail_link    0
Rating            0
dtype: int64

In [25]:
YouTube_df.dropna(inplace=True)

In [26]:
YouTube_df.drop_duplicates(subset=['title'], inplace=True)

In [27]:
YouTube_df['description'].apply(lambda x:x.split())

0        [Three, Streams., Three, Stories., One, Journe...
1        [Singer/Lyrics:, Karan, Aujla, Feat, Tania, Mu...
2        [CLASH, official, music, video, performed, by,...
3        [Gulshan, Kumar, and, T-Series, presents, Bhus...
4        [VYRL, Originals, brings, to, you, ‘Baarish’, ...
                               ...                        
29984    [Voilà!, Digi, presents, the, official, music,...
29987    [#AUSvIND, #India, #Australia, Aakash, Chopra,...
29990    [Click, here, to, watch, the, full, episode, o...
29994    [Click, here, to, Subscribe, to, SAB, TV, Chan...
29996    [Click, here, to, Subscribe, to, Taarak, Mehta...
Name: description, Length: 7312, dtype: object

In [28]:
YouTube_df.head(2)

Unnamed: 0,userID,video_id,title,channelTitle,categoryId,tags,likes,description,channelId,thumbnail_link,Rating
0,1,Iot0eF6EoNA,Sadak 2,FoxStarHindi,24,sadak sadak 2 mahesh bhatt vishesh films pooja...,224925,Three Streams. Three Stories. One Journey. Sta...,UCGqvJPRcv7aVFun-eTsatcA,https://i.ytimg.com/vi/Iot0eF6EoNA/default.jpg,4
1,2,x-KbnJ9fvJc,Kya Baat Aa : Karan Aujla (Official Video) Tania,Rehaan Records,10,[None],655450,Singer/Lyrics: Karan Aujla Feat Tania Music/ D...,UCm9SZAl03Rev9sFwloCdz1g,https://i.ytimg.com/vi/x-KbnJ9fvJc/default.jpg,4


In [29]:
YouTube_df['joint_tags'] = YouTube_df['channelTitle'] + YouTube_df['description'] + YouTube_df['tags'] 


In [30]:
YouTube_df['joint_tags'][:5]

0    FoxStarHindiThree Streams. Three Stories. One ...
1    Rehaan RecordsSinger/Lyrics: Karan Aujla Feat ...
2    Diljit DosanjhCLASH official music video perfo...
3    T-SeriesGulshan Kumar and T-Series presents Bh...
4    VYRLOriginalsVYRL Originals brings to you ‘Baa...
Name: joint_tags, dtype: object

In [31]:
YouTube_df.isnull().sum()

userID            0
video_id          0
title             0
channelTitle      0
categoryId        0
tags              0
likes             0
description       0
channelId         0
thumbnail_link    0
Rating            0
joint_tags        0
dtype: int64

In [32]:
YouTube_df['joint_tags']=YouTube_df['joint_tags'].apply(lambda x:x.split())

In [33]:
YouTube_df['joint_tags'][1]

['Rehaan',
 'RecordsSinger/Lyrics:',
 'Karan',
 'Aujla',
 'Feat',
 'Tania',
 'Music/',
 'Desi',
 'Crew',
 'Mix',
 '&',
 'Master',
 '/',
 'Dc',
 'Studio’sVideo/',
 'Sukh',
 'Sanghera',
 'Project',
 'By',
 '/',
 'Deep',
 'Rehaan',
 'Sukh',
 'Bajwa',
 '&',
 'Jeewan',
 'Chahal',
 'Produced',
 'By',
 '/',
 'Sandeep',
 'RehaanLabel',
 '/',
 'Rehaan',
 'RecordsOnline',
 'Promotions',
 '-',
 'Global',
 'Digital',
 'SolutionDigital',
 'Partner',
 '/',
 'Coin',
 'Digital',
 'Social',
 'media',
 'Promotions:',
 'Gk',
 'DigitialSpotify:',
 'https://spoti.fi/3kDfnNuYoutube',
 'Music:',
 'https://bit.ly/31N2tUkGaana:',
 'https://bit.ly/2PHrUkLhttps://wynk.in/u/SSWNC5VO3Website:',
 'WWW.RehaanRecords.CAFB:',
 'https://m.facebook.com/RehaanRecords/INSTA:',
 'Instagram/rehaanrecords[None]']

In [34]:
new = YouTube_df.drop(columns=['channelTitle','categoryId','tags','description','channelId'])#'likes'
#new.head()

In [35]:
new['joint_tags'] = new['joint_tags'].apply(lambda x: " ".join(x))
new.head()

Unnamed: 0,userID,video_id,title,likes,thumbnail_link,Rating,joint_tags
0,1,Iot0eF6EoNA,Sadak 2,224925,https://i.ytimg.com/vi/Iot0eF6EoNA/default.jpg,4,FoxStarHindiThree Streams. Three Stories. One ...
1,2,x-KbnJ9fvJc,Kya Baat Aa : Karan Aujla (Official Video) Tania,655450,https://i.ytimg.com/vi/x-KbnJ9fvJc/default.jpg,4,Rehaan RecordsSinger/Lyrics: Karan Aujla Feat ...
2,3,KX06ksuS6Xo,Diljit Dosanjh: CLASH (Official) Music Video,296533,https://i.ytimg.com/vi/KX06ksuS6Xo/default.jpg,4,Diljit DosanjhCLASH official music video perfo...
3,4,UsMRgnTcchY,Dil Ko Maine Di Kasam Video,743931,https://i.ytimg.com/vi/UsMRgnTcchY/default.jpg,4,T-SeriesGulshan Kumar and T-Series presents Bh...
4,5,WNSEXJJhKTU,"Baarish (Official Video) Payal Dev,Stebin Ben",268817,https://i.ytimg.com/vi/WNSEXJJhKTU/default.jpg,4,VYRLOriginalsVYRL Originals brings to you ‘Baa...


In [36]:
#from nltk.stem.porter import PorterStemme
from nltk.stem import PorterStemmer
ps = PorterStemmer()

In [37]:
def preprocessing(data):
    stemmer = nltk.stem.RSLPStemmer()
    all_stopwords = stopwords.words('english')
    all_stopwords.remove('not')
    corpus = []
    for text in data:
                
        review = re.sub(r"@[A-Za-z0-9_]+", " ", text)
        review = re.sub(r"https?://[A-Za-z0-9./]+", " ", review)
        review = re.sub(r"https?", " ", review)
        review = re.sub('[^a-zA-Z]', ' ', review)
        review = review.lower()
        review = review.split()
        ps = PorterStemmer()
        review = [ps.stem(word) for word in review if not word in set(all_stopwords) if len(word) > 2]
        review = ' '.join(review)
        corpus.append(review)

    return np.array(corpus)

In [38]:
new['cleen_joint_tags'] = preprocessing(new['joint_tags'] .values)

In [39]:
new['cleen_joint_tags'][1]

'rehaan recordssing lyric karan aujla feat tania music desi crew mix master studio svideo sukh sanghera project deep rehaan sukh bajwa jeewan chahal produc sandeep rehaanlabel rehaan recordsonlin promot global digit solutiondigit partner coin digit social media promot digitialspotifi music wynk sswnc websit www rehaanrecord cafb instagram rehaanrecord none'

In [40]:
#new['joint_tags'] = new['joint_tags'].astype('str').str.lower()

In [41]:
from sklearn.feature_extraction.text import CountVectorizer,TfidfVectorizer
# Define additional parameters
ngram_range = (2,3)
max_features = 4000

# Create the TF-IDF vectorizer with the specified parameters
tfidf_vectorizer = TfidfVectorizer(
    ngram_range=ngram_range,
    max_features=max_features
)


In [42]:
vector = tfidf_vectorizer.fit_transform(new['cleen_joint_tags']).toarray()

In [43]:
vector.shape

(7312, 4000)

In [44]:
from sklearn.preprocessing import MinMaxScaler ,StandardScaler
# Apply min-max scaling to your data
scaler = StandardScaler()
scaled_data = scaler.fit_transform(vector)

In [45]:
scaled_data

array([[-0.05242819, -0.04969901, -0.05194276, ..., -0.06944292,
        -0.10583266, -0.10583266],
       [-0.05242819, -0.04969901, -0.05194276, ..., -0.06944292,
        -0.10583266, -0.10583266],
       [-0.05242819, -0.04969901, -0.05194276, ..., -0.06944292,
        -0.10583266, -0.10583266],
       ...,
       [-0.05242819, -0.04969901, -0.05194276, ..., -0.06944292,
        -0.10583266, -0.10583266],
       [-0.05242819, -0.04969901, -0.05194276, ..., -0.06944292,
        -0.10583266, -0.10583266],
       [-0.05242819, -0.04969901, -0.05194276, ..., -0.06944292,
         9.69054177,  9.69054177]])

In [46]:
from sklearn.decomposition import PCA

pca = PCA(n_components=0.99)

vector_reduced = pca.fit_transform(scaled_data)

In [47]:
vector.shape

(7312, 4000)

In [48]:
from sklearn.metrics.pairwise import cosine_similarity
# Compute cosine similarity on the reduced vectors
similarity = cosine_similarity(vector_reduced)

In [49]:
new[new['title'] == 'Dil Ko Maine Di Kasam Video'].index[0]

3

### Prediction 

In [50]:
def recommend(movie):
    index = new[new['title'] == movie].index[0]
    distances = sorted(list(enumerate(similarity[index])),reverse=True,key = lambda x: x[1])
    for i in distances[1:6]:
        print(new.iloc[i[0]].title)
        
    

In [51]:
recommend('Diljit Dosanjh: CLASH (Official) Music Video')

Diljit Dosanjh: Born To Shine (Official Music Video) G.O.A.T
PEED: Diljit Dosanjh (Official) Music Video
Peed Diljit Dosanjh Lyrical Video Song
HABIT Lyric Video: Diljit Dosanjh
Diljit Dosanjh: Whiskey lyrical Video Song


In [52]:
recommend('NEW! Ep 2967 - Jethalal Drops Babita!')

NEW! Ep 2975 - Taarak's Boss!
NEW! Ep 3038 - महीनों बाद Soda Shop पे स्वागत
NEW! Ep 3041 - Popatlal के बच्चे!
NEW! Ep 3042 - Jetha Tries To Get Popatlal a Job
NEW! Ep 3043 - कुंवारा पोपटलाल सब्ज़ीवाला


In [53]:
recommend("first Crazy RIDE on Kawasaki NINJA H2")

Finally bought my DreamBike - Ninja H2
I Surprised my Friends With NINJA H2!!
bass Life mai HAYABUSA honi chaheye
baba ka dhaba KYU ? band ho gaya
25 Gifts to Myself on my 25th Bday #AnishkaBirthdayWeek


In [54]:
recommend('IPL 2020 - Patanjali IPL As Sponsor With 10 Big News')

Armenia vs Azerbaijan
Why are Farmers Protesting?
How Joe Biden Won?
New Strain of Coronavirus
Bangalore Violence 2020


### Some advantages of content-based filtering are that it does not rely on other users' behavior, it can handle new items without a large user base, and it can provide personalized recommendations even for niche interests. However, one limitation is that it may not be able to capture the diversity of user interests, as it tends to recommend similar items to what the user has already consumed.

In [55]:
import pickle
with open("YouTube_similarity.pkl", "wb") as f: 
    joblib.dump(similarity, f) 

## colaborative recomendation ststem

### Scikit-learn's Surprise library is a popular Python library for building collaborative filtering recommendation systems. Here's a general overview of how to build a collaborative recommendation system using the Surprise library:

#### Data preparation: The first step is to prepare your data in a format that can be used by the Surprise library. This typically involves creating a user-item matrix, where each row represents a user and each column represents an item. The values in the matrix represent the user's rating or interaction with the item.

In [56]:
new = new.reset_index()
titles = new[['title']]
indices = pd.Series(new.index, index=new['title'])

In [57]:
indices[100:150]

title
TSP's Rabish ki report Unlock 2.0                                                                       100
BADSHAH – BKL (Official Lyrical Video)                                                                  101
அட இவ்வளவு நாள் இது தெரியாம போச்சே                                                                      102
GEOMETRIC SHAPE FOOD CHALLENGE                                                                          103
DIPIKA’ S BIRTHDAY CELEBRATION                                                                          104
DNA: Russia August 12 को लॉन्च करेगा Corona Vaccine                                                     105
MASTER CHEF                                                                                             106
We Made 4 Wheeler Quad-Cycle                                                                            107
Yeh Rishta Kya Kehlata Hai: Meet Naira's daughter!                                                      108
રણછોડ રંગીલા ( ગોવાળીય

In [58]:
#pickle.dump(indices,open('indices.pkl','wb'))
with open("indices.pkl", "wb") as f: 
    joblib.dump(indices, f) 

### Create the dataset: Use Surprise's Dataset module to create a dataset object from your Pandas DataFrame.

In [59]:
reader=Reader(rating_scale=(0, 4))
data = Dataset.load_from_df(youtube[['categoryId','video_id','Rating']], reader)#  'categoryId','video_id','channelId'

### Select an algorithm: Choose a collaborative filtering algorithm to use with the Surprise library. Some popular algorithms include Singular Value Decomposition (SVD), k-Nearest Neighbors (k-NN), and Non-negative Matrix Factorization (NMF).

In [60]:
benchmark = []
# Iterate over all algorithms
for algorithm in [SVD(), SVDpp(), SlopeOne(), NMF(), NormalPredictor(), KNNBaseline(), KNNBasic()]:
    # Perform cross validation
    results = cross_validate(algorithm, data, measures=['MAE'], verbose=False,cv = 3)  # cv = 1 onluy try
    
    # Get results & append algorithm name
    tmp = pd.DataFrame.from_dict(results).mean(axis=0)
    tmp = tmp.append(pd.Series([str(algorithm).split(' ')[0].split('.')[-1]], index=['Algorithm']))
    benchmark.append(tmp)

Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.


In [61]:
surprise_results = pd.DataFrame(benchmark).set_index('Algorithm').sort_values('test_mae')

In [62]:
surprise_results

Unnamed: 0_level_0,test_mae,fit_time,test_time
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
KNNBaseline,0.270243,0.052261,0.129253
KNNBasic,0.282383,0.009542,0.098435
SlopeOne,0.284448,4.527798,19.118627
NMF,0.291707,0.689223,0.127234
SVDpp,0.352229,179.602633,53.183991
SVD,0.416784,0.449113,0.37811
NormalPredictor,1.525325,0.028897,0.104447


### Hyperparameter tuning: Finally, you may want to fine-tune the hyperparameters of your model to improve its performance. The Surprise library includes functions for performing grid search or random search to find the optimal hyperparameters for your model.

In [63]:
from surprise import Dataset, SVD
from surprise.model_selection import GridSearchCV

In [64]:
knn = KNNBaseline()
param_grid = {"n_epochs": [250,500,650],"k" :[10,20,30,40,50]}
gs = GridSearchCV(KNNBaseline, param_grid, measures=["rmse", "mae"], cv=3)

In [65]:
gs.fit(data)

Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matr

In [66]:

# combination of parameters that gave the best RMSE score
print(gs.best_params["mae"])

{'n_epochs': 250, 'k': 10}


### Model training & Model evaluation: After training the model, evaluate its performance using metrics such as Root Mean Squared Error (RMSE), Mean Absolute Error (MAE)

In [67]:
knn = KNNBaseline(n_epochs= 250,k=10, min_k=1,verbose=False)
cross_validate(knn, data, measures=['RMSE','MAE'],cv=3)

{'test_rmse': array([0.54973351, 0.53705205, 0.55870675]),
 'test_mae': array([0.27003157, 0.2626658 , 0.27666686]),
 'fit_time': (0.1046593189239502, 0.03428816795349121, 0.04688096046447754),
 'test_time': (0.31397104263305664, 0.12499380111694336, 0.14093565940856934)}

In [68]:
trainset = data.build_full_trainset()
knn.fit(trainset)

<surprise.prediction_algorithms.knns.KNNBaseline at 0x20aa30e38e0>

In [69]:
#youtube[youtube['likes'] >= 5000]

### Generate recommendations: Use the trained model to generate personalized recommendations for users based on their past interactions with items.

In [70]:
knn.predict(1, 302, 3)

Prediction(uid=1, iid=302, r_ui=3, est=2.0041456740223786, details={'was_impossible': False})

In [71]:
# Save the SVD object as a pickle file
with open('KNNBaseline.pkl', 'wb') as f:
    joblib.dump(knn, f)


In [72]:
#youtube[youtube['categoryId'] ==1]

### Hybrid :
### A hybrid recommendation system combines multiple recommendation techniques, such as content-based and collaborative filtering, to provide more accurate and diverse recommendations to users. The system involves collecting user behavior and preference data, selecting and training the recommendation algorithms, combining the recommendations, and evaluating and refining the system. Additional features, such as user demographics, can be incorporated for further personalization. The key is to fine-tune the system to meet the specific needs of the application.

In [73]:
smd =YouTube_df[['title','likes','categoryId','video_id','thumbnail_link','Rating']]
smd.head(5)

Unnamed: 0,title,likes,categoryId,video_id,thumbnail_link,Rating
0,Sadak 2,224925,24,Iot0eF6EoNA,https://i.ytimg.com/vi/Iot0eF6EoNA/default.jpg,4
1,Kya Baat Aa : Karan Aujla (Official Video) Tania,655450,10,x-KbnJ9fvJc,https://i.ytimg.com/vi/x-KbnJ9fvJc/default.jpg,4
2,Diljit Dosanjh: CLASH (Official) Music Video,296533,10,KX06ksuS6Xo,https://i.ytimg.com/vi/KX06ksuS6Xo/default.jpg,4
3,Dil Ko Maine Di Kasam Video,743931,10,UsMRgnTcchY,https://i.ytimg.com/vi/UsMRgnTcchY/default.jpg,4
4,"Baarish (Official Video) Payal Dev,Stebin Ben",268817,10,WNSEXJJhKTU,https://i.ytimg.com/vi/WNSEXJJhKTU/default.jpg,4


In [74]:
id_map= YouTube_df[['video_id','title','categoryId','channelId']].set_index('title')
id_map.head(2)#	movieId	id  title	

Unnamed: 0_level_0,video_id,categoryId,channelId
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Sadak 2,Iot0eF6EoNA,24,UCGqvJPRcv7aVFun-eTsatcA
Kya Baat Aa : Karan Aujla (Official Video) Tania,x-KbnJ9fvJc,10,UCm9SZAl03Rev9sFwloCdz1g


In [75]:
indices_map = id_map.set_index('categoryId')

In [76]:
indices_map.head(2)

Unnamed: 0_level_0,video_id,channelId
categoryId,Unnamed: 1_level_1,Unnamed: 2_level_1
24,Iot0eF6EoNA,UCGqvJPRcv7aVFun-eTsatcA
10,x-KbnJ9fvJc,UCm9SZAl03Rev9sFwloCdz1g


In [77]:
#pickle.dump(indices_map,open('indices_map.pkl','wb'))
#indices_map=indices_map.to_csv('indices_map.csv')
with open('indices_map.pkl', 'wb') as f:
    joblib.dump(indices_map, f)

In [78]:
indices_map

Unnamed: 0_level_0,video_id,channelId
categoryId,Unnamed: 1_level_1,Unnamed: 2_level_1
24,Iot0eF6EoNA,UCGqvJPRcv7aVFun-eTsatcA
10,x-KbnJ9fvJc,UCm9SZAl03Rev9sFwloCdz1g
10,KX06ksuS6Xo,UCZRdNleCgW-BGUJf-bbjzQg
10,UsMRgnTcchY,UCq-Fj5jknLsUf-MWSy4_brA
10,WNSEXJJhKTU,UCye6Oz0mg46S362LwARGVcA
...,...,...
10,fDzf3eB6R4E,UCLsMef624nZ4ME6WoSElPTg
17,q3ydfiwIN9U,UCujuVKmt_utAQZJghxlRMIQ
24,kFG5YliTlLU,UC6-F5tO8uklgE9Zy8IvbdFw
24,5lb4DUPXTao,UC6-F5tO8uklgE9Zy8IvbdFw


In [79]:
id_map.loc['Sadak 2']['categoryId']

24

### Prediction 

In [80]:
def hybrid(userId, title):
    idx = indices[title]
    sim_scores = list(enumerate(similarity[int(idx)]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:6]
    video_indices = [i[0] for i in sim_scores]
    
    video = smd.iloc[video_indices][['title','categoryId','video_id','likes']] # 'Rating'
    video['est'] = video['categoryId'].apply(lambda x: knn.predict(userId, indices_map.iloc[x]['video_id']).est)
   
    video = video.sort_values('likes', ascending=False)
    return video.head(5)

In [81]:
 knn.predict(1, indices_map.iloc[24]['video_id'])

Prediction(uid=1, iid='lw4ZqLZSBow', r_ui=None, est=2.793735047079548, details={'actual_k': 0, 'was_impossible': False})

In [82]:
hybrid(1,'Ahmedabad welcomes PM Modi!')

Unnamed: 0,title,categoryId,video_id,likes,est
2281,Precious moments: PM Modi feeding peacocks at ...,25,axbpbQTIiZo,248007,2.121955
13394,PM Modi's address to the nation,25,ClSk5l6OBx4,57306,2.121955
22694,Details of the New Parliament Building,25,_AOzpFXocKI,49356,2.121955
8842,"PM Modi's Mann Ki Baat with the Nation, Septem...",25,qSCxH_pjQos,23769,2.121955
14302,"PM Modi's Mann Ki Baat with the Nation, Octobe...",25,pqozg9Tchc4,13881,2.121955


In [83]:
hybrid(1,"BTS (방탄소년단) ‘Life Goes On’ Official MV : on my pillow")

Unnamed: 0,title,categoryId,video_id,likes,est
1676,BTS (방탄소년단) 'Dynamite' Official MV,10,gdZLi9oWNZg,7046197,2.636435
1277,BTS (방탄소년단) 'Dynamite' Official Teaser,10,oxoWhyS9buA,3859014,2.636435
19224,BTS (방탄소년단) ‘Life Goes On’ Official MV : on my...,10,yIvb4csSgcs,3161656,2.636435
19831,BTS (방탄소년단) 'Life Goes On' Official MV : in th...,10,RvcP6V4h_q4,2218612,2.636435
8630,BTS (방탄소년단) 'Dynamite' Official MV (Choreograp...,10,BflFNMl_UWY,1736724,2.636435


In [84]:
hybrid(1,'Apple iPhone 12 Pro')

Unnamed: 0,title,categoryId,video_id,likes,est
13402,iPhone 12 / 12 Pro Unboxing - ft MKBHD!,28,b5h4a_O5E44,171969,3.403106
12337,iPhone 12 - What were Apple thinking?,28,nZMUzaLV4X0,153766,3.403106
12026,iPhone 12 - The iPhone is New Again,28,g0AOf2AZ-8w,105332,3.403106
17354,iPhone 12 Pro Max,28,s-INp0FxjXA,102540,3.403106
22413,Shifting from iPhone 12 to Nokia 3310 for a Day !,28,8bglQlBonD4,98990,3.403106


In [85]:
hybrid(1,'Baarish (Official Video) Payal Dev,Stebin Ben')

Unnamed: 0,title,categoryId,video_id,likes,est
16121,Waada Hai (Official Video) Arjun Kanungo,10,tqBTGJ3FXRE,280409,2.636435
28722,Faraar (Official Video) Akull,10,yGRo8lFjZlA,66380,2.636435
18272,Bahot Roye - Official Video,10,CJJGDKn37kU,17370,2.636435
9473,"Hum Tum (Teaser1) Sukriti Kakar, Prakriti Kakar",10,SBaHS0H85ys,6851,2.636435
28388,Faraar (Official Teaser) Akull,10,SWsDGuACGMI,6483,2.636435


In [86]:
hybrid(1,'Types of Ex Girlfriends')

Unnamed: 0,title,categoryId,video_id,likes,est
4020,Rich Mom Vs Normal Mom,22,cWewnrC1qUk,16719,2.316475
13001,Desi vs Firangi,22,hrm0pbrIhAg,12583,2.316475
9622,Couples,22,3Oo0oROejbM,11236,2.316475
1704,Indian Maids,22,3szVkjPhs_Y,6788,2.316475
19897,Before Marriage Vs After Marriage,22,XQNNu7EcjTU,6597,2.316475


In [87]:
hybrid(1,'NEW! Ep 3006 - Abdul Missing?!')

Unnamed: 0,title,categoryId,video_id,likes,est
561,NEW! Ep 2971 - Relaxation With Babita,24,ajmPYL8l7-U,96895,2.793735
3161,NEW! Ep 2979 - Will Jetha Escape Goli?,24,QdgOmsP3OaU,61423,2.793735
1833,NEW! Ep 2974 - Taraak Mehta Gets Late!,24,sf7NDiQfSIo,40777,2.793735
1419,NEW! Ep 2973 - Jetha Invites Babita Over For B...,24,m0tdMLJWxb8,39433,2.793735
3364,NEW! Ep 2981 - Jetha Ki Masti!,24,-UfpObaqoLM,34649,2.793735


In [88]:
hybrid(1,'When 10R meets NINJA H2 !!')

Unnamed: 0,title,categoryId,video_id,likes,est
59,how JS FILMS bought KAWASAKI H2,22,lh7QPjPpXas,123335,2.316475
26,When 10R meets NINJA H2 !!,22,8QiOt2oxyTQ,58734,2.316475
367,Accidental Wheelie on NINJA H2,22,zkArAMNUyAY,53613,2.316475
14219,Surprising my Friends with NEW Bike!!,22,FG9MXlvkUkc,16773,2.316475
16568,SUBSCRIBERS ne Keh ke LELI 😭😂,22,r9RE9eO_qYs,12130,2.316475


In [89]:
hybrid(1,'FULL MATCH - Roman Reigns vs. Murphy: SmackDown LIVE, August 13, 2019')

Unnamed: 0,title,categoryId,video_id,likes,est
6971,FULL MATCH - Roman Reigns vs. Drew McIntyre: W...,17,1pb8jp8uOp4,43634,3.203106
13570,FULL MATCH - Shinsuke Nakamura vs. Roman Reign...,17,UG4Wpk5kD0s,40442,3.203106
5469,FULL MATCH: Roman Reigns vs. John Cena: WWE No...,17,hjUsCZ5i1yc,38493,3.203106
2883,FULL MATCH: Roman Reigns vs. Braun Strowman: W...,17,HpqHxoYJ-sQ,35990,3.203106
4625,FULL MATCH: Rusev vs. Roman Reigns – U.S. Titl...,17,1e1-xibMDkU,27412,3.203106


In [90]:
hybrid(1,'IPL 2020 - Patanjali IPL As Sponsor With 10 Big News')

Unnamed: 0,title,categoryId,video_id,likes,est
7300,IPL 2020 - Match 01,17,lmfk_6ZVoBI,19438,3.203106
19229,IPL 2021 - First List Of All 82 Released Playe...,17,qb_QJVXC38o,13401,3.203106
115,IPL 2020 - Patanjali IPL As Sponsor With 10 Bi...,17,ABrm1mQXj38,11941,3.203106
13860,CSK NEW CAPTAIN,24,zwxx85E6Rjo,7413,2.793735
17317,IPL 2020,27,hHvXxuB2pNU,2099,2.605738


In [91]:
hybrid(1,"DNA: Russia August 12 को लॉन्च करेगा Corona Vaccine")

Unnamed: 0,title,categoryId,video_id,likes,est
1606,DNA: नौकरियों के लिए अलग-अलग प्रवेश परीक्षाओं ...,25,ceIiCY8MWxY,45259,2.121955
11159,"बुजुर्ग को रोता देख मटर पनीर खाने उमड़ी भीड़, ...",25,-7QMtlNPWi4,44454,2.121955
799,PM Modi के ऐतिहासिक भाषण की बड़ी बातें,25,kyNGoxY2eIQ,19603,2.121955
8187,"Film City को लेकर फुल एक्शन मोड में Yogi, बॉली...",25,IPUUafZ4XIw,18130,2.121955
9550,Hathras Case: पीएम मोदी ने ली हाथरस मामले की ज...,25,mzLyIXJg3Ic,10244,2.121955


In [92]:
hybrid(1,"KOHLI & CO start with a fabulous WIN!")

Unnamed: 0,title,categoryId,video_id,likes,est
7972,Back to back WINS for CHENNAI?,17,JZbs3i_HyR4,45784,3.203106
9535,THRILLING WIN for BENGALURU in the SUPER OVER!,17,YJyWEp1SaNE,44152,3.203106
9272,Will DELHI dominate HYDERABAD?,17,YJyWEp1SaNE,43242,3.203106
8035,STELLAR STOINIS seals the deal for DELHI,17,1yUg1L-OYFY,42786,3.203106
7812,Will KOHLI ki Toli DOMINATE over the Orange Army?,17,1yUg1L-OYFY,41485,3.203106


In [93]:
hybrid(1,"Finally bought my DreamBike - Ninja H2")

Unnamed: 0,title,categoryId,video_id,likes,est
12,first Crazy RIDE on Kawasaki NINJA H2,22,KrXRnIESDxM,110209,2.316475
42,I Surprised my Friends With NINJA H2!!,22,7Mz71xlUlw8,89781,2.316475
23909,25 Gifts to Myself on my 25th Bday #AnishkaBir...,26,Ns5IRhJ4R8k,65561,2.320023
11167,baba ka dhaba KYU ? band ho gaya,22,X-Nq5_zL_x0,51009,2.316475
22908,bass Life mai HAYABUSA honi chaheye,22,2TGgdI1L0Hw,48490,2.316475


In [94]:
hybrid(1,"Gujarati song pucho to khara")

Unnamed: 0,title,categoryId,video_id,likes,est
734,Gaman Santhal - દેવ કરે એ કોઈ ના કરે - Dev Kar...,10,fy6Cx0mFDDE,16518,2.636435
11701,Rakesh barot Riddhi Vyash song,10,RO9FKd3am24,15352,2.636435
1699,Choy Ji Mari Hambhad Rakhnar,10,JmRt1IjwjjA,13177,2.636435
1508,Jignesh Barot ( Kaviraj) ચોય જ્યાં મારી હંભાળ ...,10,QwUw7Wb0I-A,12893,2.636435
21270,Chhodi Ne Gaya Tame Mane Je Halat Ma,10,WzHJe62sGzY,12786,2.636435


In [95]:
hybrid(1,"BADSHAH – BKL (Official Lyrical Video)")

Unnamed: 0,title,categoryId,video_id,likes,est
26457,"HOT LAUNDE - Badshah Ft. Fotty Seven, Bali",10,iuqfU9Ll300,108643,2.636435
18799,AWAARA I OFFICIAL MUSIC VIDEO I BADSHAH FT. RE...,10,Mi-Q1KM9Xtk,93698,2.636435
17469,AWAARA - TEASER I BADSHAH FT. REET TALWAR,10,3dxR0aXqcVw,24546,2.636435
27343,BADSHAH - A Day in Mumbai,10,-QtFcNNl_ac,13573,2.636435
10066,THE POWER OF DREAMS - Badshah ft. Lisa Mishra,10,WdktZ2fNQC4,11819,2.636435


In [96]:
hybrid(1,'Diljit Dosanjh: CLASH (Official) Music Video')

Unnamed: 0,title,categoryId,video_id,likes,est
4576,Diljit Dosanjh: Born To Shine (Official Music ...,10,dCmp56tSSmA,229701,2.636435
1700,PEED: Diljit Dosanjh (Official) Music Video,10,cXUndHRKmXQ,162861,2.636435
20332,Diljit Dosanjh: Whiskey lyrical Video Song,10,Hlgy6KvZGug,23572,2.636435
9746,Peed Diljit Dosanjh Lyrical Video Song,10,tq3BdjJZW84,20032,2.636435
10595,HABIT Lyric Video: Diljit Dosanjh,10,0Y72I0Sd_3k,18592,2.636435


In [97]:
smd.head()


Unnamed: 0,title,likes,categoryId,video_id,thumbnail_link,Rating
0,Sadak 2,224925,24,Iot0eF6EoNA,https://i.ytimg.com/vi/Iot0eF6EoNA/default.jpg,4
1,Kya Baat Aa : Karan Aujla (Official Video) Tania,655450,10,x-KbnJ9fvJc,https://i.ytimg.com/vi/x-KbnJ9fvJc/default.jpg,4
2,Diljit Dosanjh: CLASH (Official) Music Video,296533,10,KX06ksuS6Xo,https://i.ytimg.com/vi/KX06ksuS6Xo/default.jpg,4
3,Dil Ko Maine Di Kasam Video,743931,10,UsMRgnTcchY,https://i.ytimg.com/vi/UsMRgnTcchY/default.jpg,4
4,"Baarish (Official Video) Payal Dev,Stebin Ben",268817,10,WNSEXJJhKTU,https://i.ytimg.com/vi/WNSEXJJhKTU/default.jpg,4


In [98]:
#smd.to_csv('smd.csv')

In [99]:
#pickle.dump(smd,open('smd.pkl','wb'))
with open("smd.pkl", "wb") as f: 
    joblib.dump(smd, f) 

In [100]:
smd['thumbnail_link'][0][:]

'https://i.ytimg.com/vi/Iot0eF6EoNA/default.jpg'

In [101]:
video_list=smd[['title','likes','thumbnail_link']]

In [102]:
with open("video_list.pkl", "wb") as f: 
    joblib.dump(video_list, f) 

In [103]:
video_list['title'][100:150]

104                    TSP's Rabish ki report Unlock 2.0
105               BADSHAH – BKL (Official Lyrical Video)
106                   அட இவ்வளவு நாள் இது தெரியாம போச்சே
107                       GEOMETRIC SHAPE FOOD CHALLENGE
108                       DIPIKA’ S BIRTHDAY CELEBRATION
110    DNA: Russia August 12 को लॉन्च करेगा Corona Va...
111                                          MASTER CHEF
112                         We Made 4 Wheeler Quad-Cycle
113    Yeh Rishta Kya Kehlata Hai: Meet Naira's daugh...
114                      રણછોડ રંગીલા ( ગોવાળીયો ભાગ-૩ )
115    IPL 2020 - Patanjali IPL As Sponsor With 10 Bi...
116                                        Gaadi Parking
117    CHICKEN UPPU Kari Prepared by Daddy Arumugam /...
118                                           FilterCopy
119        सबसे बड़ी कोका कोला World's Biggest Coca Cola
120                       World's Most Expensive Country
121                                        Manasu Mamata
122          Asi Oh Hunne Aa (O