#Recommendation System

Submitted By,

NIDIN V NANDAN

Reg No:223039

In [89]:
#importing the Cornell Movie-Dialogs Corpus Dataset from convikit"
!pip install convokit
from convokit import Corpus, download
corpus = Corpus(filename=download("movie-corpus"))

Downloading movie-corpus to /root/.convokit/downloads/movie-corpus
Downloading movie-corpus from http://zissou.infosci.cornell.edu/convokit/datasets/movie-corpus/movie-corpus.zip (40.9MB)... Done


In [90]:
corpus.print_summary_stats()

Number of Speakers: 9035
Number of Utterances: 304713
Number of Conversations: 83097


In [92]:
#installing the surprise library
!pip install scikit-surprise



In [93]:
#importing the necessary libraries required for building the Recommendation System
import pandas as pd
from surprise import Dataset,Reader,SVD
from surprise.model_selection import train_test_split
from surprise import accuracy

In [94]:
# Display basic statistics of the data corpus
print("Number of conversations:", len(corpus.conversations))
print("Number of users:", len(corpus.speakers))
print("Number of utterances:", len(corpus.utterances))

Number of conversations: 83097
Number of users: 9035
Number of utterances: 304713


In [95]:
# Display information about users and their conversations for the first user
count_users = 0

for user_id, user in corpus.speakers.items():
    if count_users >= 1:
        break

    print("User ID:", user_id)

    # Iterate through conversations associated with the user
    for convo_id in user.get_conversation_ids():
        convo = corpus.get_conversation(convo_id)
        print("Conversation ID:", convo_id)
        print("Metadata:", convo.meta)
        print("Number of utterances in conversation:", len(convo.get_utterance_ids()))

    count_users += 1  # Increment the user counter after printing information


User ID: u0
Conversation ID: L1044
Metadata: ConvoKitMeta({'movie_idx': 'm0', 'movie_name': '10 things i hate about you', 'release_year': '1999', 'rating': '6.90', 'votes': '62847', 'genre': "['comedy', 'romance']"})
Number of utterances in conversation: 2
Conversation ID: L984
Metadata: ConvoKitMeta({'movie_idx': 'm0', 'movie_name': '10 things i hate about you', 'release_year': '1999', 'rating': '6.90', 'votes': '62847', 'genre': "['comedy', 'romance']"})
Number of utterances in conversation: 2
Conversation ID: L924
Metadata: ConvoKitMeta({'movie_idx': 'm0', 'movie_name': '10 things i hate about you', 'release_year': '1999', 'rating': '6.90', 'votes': '62847', 'genre': "['comedy', 'romance']"})
Number of utterances in conversation: 2
Conversation ID: L870
Metadata: ConvoKitMeta({'movie_idx': 'm0', 'movie_name': '10 things i hate about you', 'release_year': '1999', 'rating': '6.90', 'votes': '62847', 'genre': "['comedy', 'romance']"})
Number of utterances in conversation: 3
Conversatio

similarly we have this for the remaining 9034 users


Now building a sepearte Dataframe for the purpose of building a recommendation system from the data corpus

The new Dataframe will consist of three columns namely

1) User_Id-display the id of the user

2)Movie_Name-display the name of the movie

3)Rating-display the rating given for the movie by the user

In [96]:

import pandas as pd
# Create an empty list to store the data
data = []
# Iterate through users and conversations
for user_id, user in corpus.speakers.items():
    for convo_id in user.get_conversation_ids():
        convo = corpus.get_conversation(convo_id)
        metadata = convo.meta

        # Extract relevant information
        user_id = user.id
        movie_name = metadata['movie_name']
        rating = metadata['rating']

        # Append the data to the list
        data.append([user_id, movie_name, rating])

# Create a DataFrame from the collected data
df = pd.DataFrame(data, columns=['User_ID', 'Movie_Name', 'Rating'])



In [97]:
df

Unnamed: 0,User_ID,Movie_Name,Rating
0,u0,10 things i hate about you,6.90
1,u0,10 things i hate about you,6.90
2,u0,10 things i hate about you,6.90
3,u0,10 things i hate about you,6.90
4,u0,10 things i hate about you,6.90
...,...,...,...
166189,u9031,zulu dawn,6.40
166190,u9031,zulu dawn,6.40
166191,u9034,zulu dawn,6.40
166192,u9034,zulu dawn,6.40


In [98]:
#chekcing for duplicate values
df.duplicated().any()

True

In [99]:
#dropping the duplicate values
df = df.drop_duplicates()

In [114]:
df.duplicated().any()

False

In [101]:
#so the final dataframe for building the recommendation system
df

Unnamed: 0,User_ID,Movie_Name,Rating
0,u0,10 things i hate about you,6.90
65,u2,10 things i hate about you,6.90
120,u3,10 things i hate about you,6.90
127,u4,10 things i hate about you,6.90
151,u5,10 things i hate about you,6.90
...,...,...,...
166178,u9029,zulu dawn,6.40
166183,u9033,zulu dawn,6.40
166184,u9028,zulu dawn,6.40
166188,u9031,zulu dawn,6.40


In [102]:
#setting the reader here for our dataset the rating was between 1-10
reader = Reader(rating_scale=(1, 10))

In [103]:
data = Dataset.load_from_df(df, reader)

In [104]:
#splitting the dataset for training and testing
trainset, testset = train_test_split(data, test_size=0.8,random_state=42)


In [105]:
#creating the SVD model
model=SVD()

In [106]:
#fitting the model on the training data
model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x79048fdbb940>

In [107]:
#predicting for the test set
predictions=model.test(testset)

In [108]:
#checking the accuracy of the model using rmse test
rmse=accuracy.rmse(predictions)
print(f'RMSE:{rmse:.2f}')

RMSE: 0.9548
RMSE:0.95


here the rmse values are small so we can say that the model is performing well


In [109]:
top_n = 3
user_recommendations = []

In [110]:
item_column_name = 'Movie_Name'
all_item_ids = df[item_column_name].unique()

In [111]:
#now predicting the recommendations for a particular user and storing it in the user_recommendations
user_id='u04'
for item_id in all_item_ids:
    predicted_rating = model.predict(user_id, item_id).est
    user_recommendations.append((item_id, predicted_rating))

In [112]:
#sorting the recommendations
user_recommendations.sort(key=lambda x: x[1], reverse=True)


In [113]:
#now displaying the top 3 recommendations for the particular user
top_n = 3
print(f'Top {top_n} recommendations for user {user_id} :')
for item_id, predicted_rating in user_recommendations[:top_n]:
  print(f'Item {item_id}: Predicted Rating ={predicted_rating:.2f}')


Top 3 recommendations for user u04 :
Item one flew over the cuckoo's nest: Predicted Rating =7.88
Item the godfather: part ii: Predicted Rating =7.84
Item schindler's list: Predicted Rating =7.80


So above gives the top 3 recommendation for the user with id "u04" similarly we can give recommendations for the other users also