<a href="https://colab.research.google.com/github/s0013/CODSOFT/blob/main/MOVIE_RATING_PREDICTION_WITH_PYTHON.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
movies_data = pd.read_csv('IMDb Movies India.csv', encoding='latin1')


In [None]:
# printing the first 5 rows of the dataframe
movies_data.head()

Unnamed: 0,Name,Year,Duration,Genre,Rating,Votes,Director,Actor 1,Actor 2,Actor 3
0,,,,Drama,,,J.S. Randhawa,Manmauji,Birbal,Rajendra Bhatia
1,#Gadhvi (He thought he was Gandhi),(2019),109 min,Drama,7.0,8.0,Gaurav Bakshi,Rasika Dugal,Vivek Ghamande,Arvind Jangid
2,#Homecoming,(2021),90 min,"Drama, Musical",,,Soumyajit Majumdar,Sayani Gupta,Plabita Borthakur,Roy Angana
3,#Yaaram,(2019),110 min,"Comedy, Romance",4.4,35.0,Ovais Khan,Prateik,Ishita Raj,Siddhant Kapoor
4,...And Once Again,(2010),105 min,Drama,,,Amol Palekar,Rajat Kapoor,Rituparna Sengupta,Antara Mali


In [None]:
# number of rows and columns in the data frame

movies_data.shape

(15509, 10)

In [None]:
# selecting the relevant features for recommendation

selected_features = ['Year',	'Duration','Genre','Rating','Votes','Director']
print(selected_features)

['Year', 'Duration', 'Genre', 'Rating', 'Votes', 'Director']


In [None]:
# replacing the null valuess with null string

for feature in selected_features:
  movies_data[feature] = movies_data[feature].fillna('')

In [None]:
combined_features = movies_data['Year'].astype(str) + ' ' + \
                    movies_data['Duration'].astype(str) + ' ' + \
                    movies_data['Genre'] + ' ' + \
                    movies_data['Rating'].astype(str) + ' ' + \
                    movies_data['Votes'].astype(str) + ' ' + \
                    movies_data['Director']


In [None]:
print(combined_features)

0                                    Drama   J.S. Randhawa
1                 (2019) 109 min Drama 7.0 8 Gaurav Bakshi
2        (2021) 90 min Drama, Musical   Soumyajit Majumdar
3         (2019) 110 min Comedy, Romance 4.4 35 Ovais Khan
4                      (2010) 105 min Drama   Amol Palekar
                               ...                        
15504                  (1988)  Action 4.6 11 Mahendra Shah
15505      (1999) 129 min Action, Drama 4.5 655 Kuku Kohli
15506                          (2005)  Action   Kiran Thej
15507                                    (1988)  Action   
15508     (1998) 130 min Action, Drama 6.2 20 K.C. Bokadia
Length: 15509, dtype: object


In [None]:
# converting the text data to feature vectors

vectorizer = TfidfVectorizer()

In [None]:
feature_vectors = vectorizer.fit_transform(combined_features)

In [None]:
print(feature_vectors)

  (0, 4613)	0.9834167488028481
  (0, 2352)	0.18136013391601835
  (1, 1605)	0.5559571908682545
  (1, 2512)	0.5850105563878585
  (1, 3758)	0.1262354231876289
  (1, 96)	0.4546598205594512
  (1, 298)	0.33180630935609007
  (1, 2352)	0.12620574237975904
  (2, 3573)	0.4431575865584529
  (2, 5401)	0.6678764695846612
  (2, 3898)	0.2863817412045477
  (2, 1013)	0.3863189954178879
  (2, 301)	0.31363809968323203
  (2, 3758)	0.11818040514769998
  (2, 2352)	0.11815261825705868
  (3, 3276)	0.3241146529159171
  (3, 4145)	0.6312548897510891
  (3, 462)	0.43822036695441047
  (3, 4750)	0.1804091952526784
  (3, 2096)	0.19233243560040425
  (3, 98)	0.3690693025880557
  (3, 3758)	0.11170023502795935
  (3, 298)	0.29360097033735943
  (4, 4176)	0.5914630856273821
  (4, 1326)	0.5480783073215193
  :	:
  (15504, 263)	0.41878174660475614
  (15504, 5094)	0.4399698689984149
  (15504, 97)	0.42741554114850877
  (15504, 1166)	0.1961062144726442
  (15505, 775)	0.5503495900292703
  (15505, 3387)	0.5021689013916021
  (15505,

In [None]:
# getting the similarity scores using cosine similarity

similarity = cosine_similarity(feature_vectors)

In [None]:
print(similarity)

[[1.         0.02288869 0.02142817 ... 0.         0.         0.02443537]
 [0.02288869 1.         0.02983009 ... 0.         0.         0.0340164 ]
 [0.02142817 0.02983009 1.         ... 0.         0.         0.03184583]
 ...
 [0.         0.         0.         ... 1.         0.07739258 0.03456845]
 [0.         0.         0.         ... 0.07739258 1.         0.08033094]
 [0.02443537 0.0340164  0.03184583 ... 0.03456845 0.08033094 1.        ]]


In [None]:
print(similarity.shape)

(15509, 15509)


**Getting the movie name from the user**

In [None]:
# getting the movie name from the user

movie_name = input(' Enter your favourite movie name : ')

 Enter your favourite movie name : HomeComing\


In [None]:
# creating a list with all the movie names given in the dataset

list_of_all_titles = movies_data['Name'].tolist()
print(list_of_all_titles)



In [None]:
# finding the close match for the movie name given by the user

find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)
print(find_close_match)

['Homecoming', '#Homecoming', "Mom's Coming"]


In [None]:
close_match = find_close_match[0]
print(close_match)

Homecoming


In [None]:
# Check column names in your DataFrame
print(movies_data.columns)

# Assuming 'Title' is the column containing movie titles, replace it if it's different
close_match = 'Homecoming'

# Finding the index of the movie with a close match title
index_of_the_movie = movies_data[movies_data['Name'] == close_match].index
if len(index_of_the_movie) > 0:
    index_of_the_movie = index_of_the_movie[0]
    print(f"Index of the movie with title '{close_match}': {index_of_the_movie}")
else:
    print(f"No movie with title '{close_match}' found in the dataset.")


Index(['Name', 'Year', 'Duration', 'Genre', 'Rating', 'Votes', 'Director',
       'Actor 1', 'Actor 2', 'Actor 3'],
      dtype='object')
Index of the movie with title 'Homecoming': 5757


In [None]:
# getting a list of similar movies

similarity_score = list(enumerate(similarity[index_of_the_movie]))
print(similarity_score)

[(0, 0.02365574891045921), (1, 0.03293109122291108), (2, 0.12137012622090969), (3, 0.014573071154550754), (4, 0.0317190509536481), (5, 0.10835515400199322), (6, 0.024388887430504748), (7, 0.014444488805260303), (8, 0.012365544806448008), (9, 0.012446390137365076), (10, 0.027549286970449526), (11, 0.12513432732733745), (12, 0.01331828365987955), (13, 0.023052095591485952), (14, 0.0), (15, 0.02818644754096712), (16, 0.0), (17, 0.01803187569306243), (18, 0.026513679017588512), (19, 0.0), (20, 0.015481787599191705), (21, 0.028120090361495993), (22, 0.014074296933200383), (23, 0.0), (24, 0.0), (25, 0.012540840008019005), (26, 0.0), (27, 0.03457475771043775), (28, 0.028832079353038255), (29, 0.0), (30, 0.027401715262878344), (31, 0.014811597900573193), (32, 0.02964971132481431), (33, 0.014525361897728122), (34, 0.013308835803227824), (35, 0.019979083734088954), (36, 0.03145250035337748), (37, 0.016999529099169842), (38, 0.015401068370246095), (39, 0.11184752198882432), (40, 0.026655194650109

In [None]:
len(similarity_score)

15509

In [None]:
# sorting the movies based on their similarity score

sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True)
print(sorted_similar_movies)

[(5757, 1.0000000000000002), (970, 0.441887033692281), (1434, 0.4392916152901399), (6051, 0.36145338294800455), (7055, 0.35955559559429817), (10620, 0.3548082385977797), (9363, 0.34992444365622144), (4899, 0.339720887811002), (5154, 0.33927615924339855), (1628, 0.33791276276180227), (14432, 0.33272151285261303), (12494, 0.33029179251879554), (5301, 0.32803327375762176), (5615, 0.32496402680073333), (4523, 0.3097823248214101), (4812, 0.30455769295910257), (3300, 0.30274402491442554), (15225, 0.2996762572461681), (822, 0.29907824038186465), (473, 0.2971498818633049), (6911, 0.2956956984540277), (9127, 0.29334329600711334), (10974, 0.29063570316992077), (10892, 0.28924288596907244), (12767, 0.28609022614909485), (11195, 0.28461632040276), (4564, 0.284445548844652), (2533, 0.28432794412068174), (4687, 0.2833080902005256), (2956, 0.2826000736139368), (3571, 0.2826000736139368), (8116, 0.2818730826039298), (9917, 0.281216671938552), (11743, 0.281216671938552), (12755, 0.28085962862892594), (

In [None]:
# print the name of similar movies based on the index

print('Movies suggested for you : \n')

i = 1

for movie in sorted_similar_movies:
  index = movie[0]
  title_from_index = movies_data[movies_data.index==index]['Name'].values[0]
  if (i<30):
    print(i, '.',title_from_index)
    i+=1

Movies suggested for you : 

1 . Homecoming
2 . An Unspoken Love Story
3 . Awesome Mausam
4 . India Dial 100
5 . Kabli Khan
6 . Parking Closed
7 . Miss Anara
8 . Gangs of Gamebaaz
9 . Golmaal Kids
10 . Badlapur Boys
11 . Tragic Fathers Day
12 . Sargana
13 . Gunehgaar
14 . Hawayein
15 . Ek Tha Hero
16 . Gadera
17 . Dabang Sarkar
18 . Yeh Faasley
19 . Albela
20 . Aasha
21 . Junoon
22 . Melody
23 . Pratibimbo
24 . Plot No. 5
25 . Shagird
26 . Pyaar Mein Twist
27 . Enjoy Youth Party
28 . Border Hindustan Ka
29 . Fever


**Movie Recommendation Sytem**

In [None]:
movie_name = input('Enter your favourite movie name: ')

list_of_all_titles = movies_data['Name'].tolist()

find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)

close_match = find_close_match[0]

# Assuming the column containing titles is named 'Name', adjust if it's different
index_of_the_movie = movies_data[movies_data['Name'] == close_match].index

if len(index_of_the_movie) > 0:
    index_of_the_movie = index_of_the_movie[0]

    # Assuming similarity is a similarity matrix or list generated elsewhere in your code
    similarity_score = list(enumerate(similarity[index_of_the_movie]))

    sorted_similar_movies = sorted(similarity_score, key=lambda x: x[1], reverse=True)

    print('Movies suggested for you:\n')

    i = 1
    for movie in sorted_similar_movies:
        index = movie[0]
        title_from_index = movies_data.loc[index, 'Name']
        if i < 30:
            print(i, '.', title_from_index)
            i += 1
else:
    print(f"No movie with a close match to '{movie_name}' found in the dataset.")


Enter your favourite movie name: Aasha
Movies suggested for you:

1 . Aasha
2 . Chandi Pooja
3 . Mohini
4 . Naag Mani
5 . Shahzaadi
6 . Parbat Ke Us Paar
7 . Bedroom Story
8 . Gopal - Krishna
9 . Shabnam
10 . Chup Chup Ke
11 . Chandan
12 . Jodi Kya Banayi Wah Wah Ramji
13 . Ram Navami
14 . Grihalakshmi
15 . Daadi Maa
16 . Johnny-Walker
17 . Ladki
18 . Hari Darshan
19 . Zabardast
20 . Pehli Jhalak
21 . Payal Ki Jhankar
22 . Duniya Rang Rangili
23 . Kasam Teri Kasam
24 . Scholarship
25 . Saat Rang Ke Sapne
26 . Kushti
27 . Screwdriver
28 . Waman Avtar
29 . Johny I Love You
