<a href="https://colab.research.google.com/github/nikhil5589/price-predictor/blob/main/Telugu_Movies_Recommendation_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

IMPORTING THE LIBRARIES

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import difflib

**TELUGU MOVIES DATASET**

In [None]:
movies = pd.read_csv('/content/TeluguMovies_dataset.csv')
movies

Unnamed: 0.1,Unnamed: 0,Movie,Year,Certificate,Genre,Overview,Runtime,Rating,No.of.Ratings
0,0,Bahubali: The Beginning,2015.0,UA,"Action, Drama","In ancient India, an adventurous and darin...",159,8.1,99114
1,1,Baahubali 2: The Conclusion,2017.0,UA,"Action, Drama","When Shiva, the son of Bahubali, learns ab...",167,8.2,71458
2,2,1 - Nenokkadine,2014.0,UA,"Action, Thriller",A rock star must overcome his psychologica...,170,8.1,42372
3,3,Dhoom:3,2013.0,UA,"Action, Thriller","When Sahir, a circus entertainer trained i...",172,5.4,42112
4,4,Ra.One,2011.0,U,"Action, Adventure, Sci-Fi",When the titular antagonist of an action g...,156,4.6,37211
...,...,...,...,...,...,...,...,...,...
1395,1395,Maro Monagadu,1985.0,,,,0,8.6,49
1396,1396,Jakkanna,2016.0,,"Comedy, Drama",The movie is about an attempt by Sunil the...,0,6.3,49
1397,1397,Muvva Gopaludu,1987.0,,"Drama, Romance",Muvva Gopaludu is a 1987 Indian Telugu fil...,137,7.8,49
1398,1398,Ninney Ishta Paddaanu,2003.0,U,,Hero Charan (Tarun) a middle class family ...,0,5.9,49


In [None]:
movies.rename({'Unnamed: 0': 'Index'}, inplace=True)


**CHECHKING THE SHAPE OF DATA**

In [None]:
movies.shape

(1400, 9)

In [None]:
movies.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1400 entries, 0 to 1399
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Unnamed: 0     1400 non-null   int64  
 1   Movie          1400 non-null   object 
 2   Year           1352 non-null   float64
 3   Certificate    951 non-null    object 
 4   Genre          1389 non-null   object 
 5   Overview       1221 non-null   object 
 6   Runtime        1400 non-null   int64  
 7   Rating         1400 non-null   float64
 8   No.of.Ratings  1400 non-null   int64  
dtypes: float64(2), int64(3), object(4)
memory usage: 109.4+ KB


**CHECKING THE MISSING DATA**

In [None]:
movies.isnull().sum()/len(movies)*100

Unnamed: 0        0.000000
Movie             0.000000
Year              3.428571
Certificate      32.071429
Genre             0.785714
Overview         12.785714
Runtime           0.000000
Rating            0.000000
No.of.Ratings     0.000000
dtype: float64

In [None]:
movies.columns

Index(['Unnamed: 0', 'Movie', 'Year', 'Certificate', 'Genre', 'Overview',
       'Runtime', 'Rating', 'No.of.Ratings'],
      dtype='object')

**SELECTING THE IMPORTANT FEATURES**

In [None]:
selected_features = [['Genre','Overview']]

In [None]:
for i in selected_features:
    movies[i] = movies[i].fillna('')

**COMBINING THE SELECTED FEATURES TOGETHER**

In [None]:
combined_features = movies['Genre'] + movies['Overview']

In [None]:
combined_features

0       Action, Drama                In ancient India,...
1       Action, Drama                When Shiva, the s...
2       Action, Thriller                A rock star mu...
3       Action, Thriller                When Sahir, a ...
4       Action, Adventure, Sci-Fi                When ...
                              ...                        
1395                                                     
1396    Comedy, Drama                The movie is abou...
1397    Drama, Romance                Muvva Gopaludu i...
1398        Hero Charan (Tarun) a middle class family ...
1399    Drama                Surendra marries Savitri,...
Length: 1400, dtype: object

In [None]:
vector = TfidfVectorizer()

**Converting the text data to Feature Vectors**

In [None]:
vectors =  vector.fit_transform(combined_features)

In [None]:
print(vectors)

  (0, 3437)	0.21415717710767618
  (0, 5296)	0.3151030583410978
  (0, 5022)	0.1757804527187766
  (0, 571)	0.19552911647629348
  (0, 1723)	0.28939293774260944
  (0, 3282)	0.2367827536134717
  (0, 1205)	0.30485129679550443
  (0, 2374)	0.2575480577572813
  (0, 531)	0.21956841838511332
  (0, 2847)	0.14365964127076553
  (0, 1174)	0.346947938326426
  (0, 277)	0.08010763987538654
  (0, 163)	0.3151030583410978
  (0, 266)	0.11830459504933362
  (0, 2286)	0.20289565072959406
  (0, 276)	0.30485129679550443
  (0, 2273)	0.17581621681899193
  (0, 1422)	0.07691035351646491
  (0, 117)	0.08236987903421875
  (1, 2572)	0.23384510958978966
  (1, 2826)	0.2864244878324167
  (1, 5067)	0.27104600554083136
  (1, 4834)	0.120460757478752
  (1, 1598)	0.21000323776419882
  (1, 3408)	0.17575926747936468
  :	:
  (1399, 4702)	0.4349514410898509
  (1399, 2886)	0.17285243226648891
  (1399, 3167)	0.20193037319305188
  (1399, 2214)	0.1582581563209773
  (1399, 3319)	0.16653359096482614
  (1399, 3020)	0.1474038928751451
  (1

In [None]:
similarity = cosine_similarity(vectors)

**CHECKING THE SIMILARITY SCORE BY USING COSINE SIMILARITY**

In [None]:
similarity

array([[1.        , 0.02324405, 0.00735101, ..., 0.02286047, 0.01181832,
        0.04621893],
       [0.02324405, 1.        , 0.05750428, ..., 0.03434865, 0.08728437,
        0.06353   ],
       [0.00735101, 0.05750428, 1.        , ..., 0.        , 0.04562549,
        0.01262951],
       ...,
       [0.02286047, 0.03434865, 0.        , ..., 1.        , 0.05559826,
        0.0149011 ],
       [0.01181832, 0.08728437, 0.04562549, ..., 0.05559826, 1.        ,
        0.0312492 ],
       [0.04621893, 0.06353   , 0.01262951, ..., 0.0149011 , 0.0312492 ,
        1.        ]])

In [None]:
movie_name = input('Enter Your Favourite Telugu Movie Name : ')

Enter Your Favourite Telugu Movie Name : Bahubali: The Beginning


**BRINGING TITLES OF ALL MOVIES FROM THE DATA**

In [None]:
movie_titles = movies['Movie'].tolist()
movie_titles

['Bahubali: The Beginning',
 'Baahubali 2: The Conclusion',
 '1 - Nenokkadine',
 'Dhoom:3',
 'Ra.One',
 'Dhoom:2',
 'Eega',
 'Krrish 3',
 'Arjun Reddy',
 'Rangasthalam',
 'Magadheera',
 'War',
 'Bharat Ane Nenu',
 'Saaho',
 'Theri',
 'Dookudu',
 'Pokiri',
 'Sarkar',
 'Athadu',
 'The Ghazi Attack',
 'Kabali',
 'MSG: The Messenger of God',
 'Nanban',
 'Srimanthudu',
 'Veer - Vivegam',
 'Billa 2',
 'Manam',
 '7 Aum Arivu',
 'Bigil',
 'Business Man',
 'Geetha Govindam',
 'Mahanati',
 'Spyder',
 'Nannaku Prematho',
 'Dabangg 3',
 'MSG 2 the Messenger',
 'Manikarnika: The Queen of Jhansi',
 'Race Gurram',
 'Okkadu',
 'Bommarillu',
 'Atharintiki Daaredi',
 'Khaleja',
 'Yennai Arindhaal',
 'Thalaivaa',
 'Kaala',
 'Bairavaa',
 'Goodachari',
 'Puli',
 'Pulimurugan',
 'Veeram',
 'Vedam',
 'Yevadu',
 'Aravindha Sametha Veera Raghava',
 'Billa',
 'Jersey',
 'Sye Raa Narasimha Reddy',
 'Ala Vaikunthapurramuloo',
 'Janatha Garage',
 'Gabbar Singh',
 'Temper',
 'Game Over',
 'Singam 2',
 'Dhruva',
 'J

**FINDING THE CLOSE MATCH**

In [None]:
close_match = difflib.get_close_matches(movie_name,movie_titles)
close_match

['Bahubali: The Beginning', 'Baahubali 2: The Conclusion']

In [None]:
closeset_match = close_match[0]
closeset_match

'Bahubali: The Beginning'

FINDING THE INDEX OF THE MOVIE BY USING TITLE


In [None]:
index_of_movie = movies[movies['Movie'] == closeset_match]['Unnamed: 0'].values[0]
index_of_movie

0

In [None]:
similarity_score = list(enumerate(similarity[index_of_movie]))
similarity_score

[(0, 1.0000000000000002),
 (1, 0.02324404503161551),
 (2, 0.007351010592699853),
 (3, 0.036008175423133726),
 (4, 0.021803643389756537),
 (5, 0.009669259336848292),
 (6, 0.04302601549909576),
 (7, 0.048683441224766894),
 (8, 0.017409043936582855),
 (9, 0.01925926283186412),
 (10, 0.015075677378666368),
 (11, 0.03698085846392866),
 (12, 0.07086297098304603),
 (13, 0.16142690811294844),
 (14, 0.01017386990857186),
 (15, 0.019633101048071387),
 (16, 0.012599644407574755),
 (17, 0.05242950200645333),
 (18, 0.03448340389744975),
 (19, 0.0051378353010509414),
 (20, 0.009293265711139892),
 (21, 0.05461806003334637),
 (22, 0.036372770840818226),
 (23, 0.09162967565432886),
 (24, 0.020700896581761476),
 (25, 0.03156893981207746),
 (26, 0.02843151055985213),
 (27, 0.04040869026381354),
 (28, 0.018361311925946592),
 (29, 0.031128748896608487),
 (30, 0.06455581967799151),
 (31, 0.15281071585764722),
 (32, 0.057601764256709466),
 (33, 0.03505193137645035),
 (34, 0.027660777836201203),
 (35, 0.01792

In [None]:
len(similarity_score)

1400

In [None]:
sorted_similar_movies = sorted(similarity_score, key = lambda x: x[1], reverse=True)
sorted_similar_movies

[(0, 1.0000000000000002),
 (701, 0.20457490767270728),
 (916, 0.19931863434750122),
 (283, 0.19798542315646567),
 (741, 0.1947646623764026),
 (430, 0.1830853362218211),
 (13, 0.16142690811294844),
 (31, 0.15281071585764722),
 (595, 0.14732434063156302),
 (1200, 0.13974939345306991),
 (462, 0.13535015116466753),
 (549, 0.13272135819626052),
 (667, 0.13261486110246243),
 (961, 0.1252588831773866),
 (299, 0.1225852120040063),
 (383, 0.11658186492916289),
 (989, 0.11557097344145116),
 (87, 0.11526277514063392),
 (276, 0.11499396680730432),
 (1316, 0.11387636204060346),
 (638, 0.11269427425623467),
 (675, 0.11269427425623467),
 (685, 0.11269427425623467),
 (922, 0.11269427425623467),
 (1044, 0.11269427425623467),
 (1072, 0.11269427425623467),
 (1122, 0.11269427425623467),
 (1125, 0.11269427425623467),
 (1139, 0.11269427425623467),
 (1178, 0.11269427425623467),
 (1182, 0.11269427425623467),
 (1184, 0.11269427425623467),
 (1234, 0.11269427425623467),
 (1250, 0.11269427425623467),
 (1266, 0.11

In [None]:
sorted_similar_movies[1]

(701, 0.20457490767270728)

In [None]:

print("Telugu Movies Suggested for you : ")

i = 1
for movie in sorted_similar_movies:
    index = movie[0]
    title_from_index = movies[movies['Unnamed: 0'] == index]['Movie'].values
    if i < 10:
        print(i , title_from_index)
        i+=1

Telugu Movies Suggested for you : 
1 ['Bahubali: The Beginning']
2 ['Assembly Rowdy']
3 ['Anthaka Mundu Aa Tarvatha']
4 ['Sree']
5 ['Rowdy Fellow']
6 ['Bobby']
7 ['Saaho']
8 ['Mahanati']
9 ['Chinnadana Nee Kosam']


**BUILDING** **RECOMMENDATION** **SYSTEM**

**IF YOU ENTER YOUR FAVOURITE MOVIE BELOW IT WILL ALSO SUGGEST YOU SIMILAR MOVIES IN HERE:**

In [None]:
movie_name = input('Enter Your Favourite Telugu Movie Name : ')
movie_titles = movies['Movie'].tolist()
close_match = difflib.get_close_matches(movie_name,movie_titles)
closeset_match = close_match[0]
index_of_movie = movies[movies['Movie'] == closeset_match]['Unnamed: 0'].values[0]
similarity_score = list(enumerate(similarity[index_of_movie]))
sorted_similar_movies = sorted(similarity_score, key = lambda x: x[1], reverse=True)


print("Telugu Movies Suggested for you : ")

i = 1
for movie in sorted_similar_movies:
    index = movie[0]
    title_from_index = movies[movies['Unnamed: 0'] == index]['Movie'].values
    if i < 10:
        print(i , title_from_index)
        i+=1

Enter Your Favourite Telugu Movie Name : Annamaya
Telugu Movies Suggested for you : 
1 ['Annamayya']
2 ['Sri Manjunatha']
3 ['Sri Madvirat Veera Brahmendra Swamy Charitra']
4 ['Jai Bolo Telangana']
5 ['Homam']
6 ['Tenali Ramakrishna']
7 ['Alluri Seetharama Raju']
8 ['Abhinetri']
9 ['Shanti Kranti']


**THE** **END**