In [None]:
import string
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [None]:
netflix_data = pd.read_csv('/content/netflix_data.csv')
netflix_data['show_id'] = netflix_data['show_id'].str.extract('(\d+)').astype(int)
netflix_data.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


In [None]:
netflix_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8807 entries, 0 to 8806
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   show_id       8807 non-null   int64 
 1   type          8807 non-null   object
 2   title         8807 non-null   object
 3   director      6173 non-null   object
 4   cast          7982 non-null   object
 5   country       7976 non-null   object
 6   date_added    8797 non-null   object
 7   release_year  8807 non-null   int64 
 8   rating        8803 non-null   object
 9   duration      8804 non-null   object
 10  listed_in     8807 non-null   object
 11  description   8807 non-null   object
dtypes: int64(2), object(10)
memory usage: 825.8+ KB


In [None]:
netflix_data.fillna('', inplace=True)

In [None]:
movie_counts = netflix_data['release_year'].value_counts().sort_index()
fig = go.Figure(data=go.Bar(x=movie_counts.index, y=movie_counts.values))
fig.update_layout(
    plot_bgcolor='rgb(17, 17, 17)',
    paper_bgcolor='rgb(17, 17, 17)',
    font_color='white',
    title='Number of Movies Released Each Year',
    xaxis=dict(title='Year'),
    yaxis=dict(title='Number of Movies')
)
fig.update_traces(marker_color='red')
fig.show()

In [None]:
movie_type_counts = netflix_data['type'].value_counts()

fig = go.Figure(data=go.Pie(labels=movie_type_counts.index, values=movie_type_counts.values))

fig.update_layout(
    plot_bgcolor='rgb(17, 17, 17)',
    paper_bgcolor='rgb(17, 17, 17)',
    font_color='white',
    title='Distribution of C. Types',
)
fig.update_traces(marker=dict(colors=['red']))
fig.show()

In [None]:
top_countries = netflix_data['country'].value_counts().head(10)

fig = px.treemap(names=top_countries.index, parents=["" for _ in top_countries.index], values=top_countries.values)

fig.update_layout(
    plot_bgcolor='rgb(17, 17, 17)',
    paper_bgcolor='rgb(17, 17, 17)',
    font_color='white',
    title='Top Countries with Highest Number of Movies',
)
fig.show()

In [None]:
country_movie_counts = netflix_data['country'].value_counts()

data = pd.DataFrame({'Country': country_movie_counts.index, 'Movie Count': country_movie_counts.values})

fig = px.choropleth(data_frame=data, locations='Country', locationmode='country names',
                    color='Movie Count', title='Number of Movies Released By Country',
                    color_continuous_scale='Reds', range_color=(0, data['Movie Count'].max()),
                    labels={'Movie Count': 'Number of Movies'})

fig.update_layout(
    plot_bgcolor='rgb(17, 17, 17)',
    paper_bgcolor='rgb(17, 17, 17)',
    font_color='white'
)
fig.show()

In [None]:
ratings       = list(netflix_data['rating'].value_counts().index)
rating_counts = list(netflix_data['rating'].value_counts().values)

fig = go.Figure(data=[go.Bar(
    x=ratings,
    y=rating_counts,
    marker_color='#E50914'
)])

fig.update_layout(
    title='Movie Ratings Distribution',
    xaxis_title='Rating',
    yaxis_title='Count',
    plot_bgcolor='rgba(0, 0, 0, 0)',
    paper_bgcolor='rgba(0, 0, 0, 0.7)',
    font=dict(
        color='white'
    )
)

fig.show()

In [None]:
ratings       = list(netflix_data['duration'].value_counts().index)
rating_counts = list(netflix_data['duration'].value_counts().values)

fig = go.Figure(data=[go.Bar(
    x=ratings,
    y=rating_counts,
    marker_color='#E50914'
)])

fig.update_layout(
    title='Movie Durations Distribution',
    xaxis_title='Rating',
    yaxis_title='Count',
    plot_bgcolor='rgba(0, 0, 0, 0)',
    paper_bgcolor='rgba(0, 0, 0, 0.7)',
    font=dict(
        color='white'
    )
)

fig.show()

In [None]:
df_features = netflix_data[['director','duration','listed_in', 'description']].fillna('')

In [None]:
df_features.shape

(8807, 4)

In [None]:
x=df_features['director'] + ' ' + df_features['duration'] + ' ' + df_features['listed_in'] + ' ' + df_features['description']

In [None]:
x

0       Kirsten Johnson 90 min Documentaries As her fa...
1        2 Seasons International TV Shows, TV Dramas, ...
2       Julien Leclercq 1 Season Crime TV Shows, Inter...
3        1 Season Docuseries, Reality TV Feuds, flirta...
4        2 Seasons International TV Shows, Romantic TV...
                              ...                        
8802    David Fincher 158 min Cult Movies, Dramas, Thr...
8803     2 Seasons Kids' TV, Korean TV Shows, TV Comed...
8804    Ruben Fleischer 88 min Comedies, Horror Movies...
8805    Peter Hewitt 88 min Children & Family Movies, ...
8806    Mozez Singh 111 min Dramas, International Movi...
Length: 8807, dtype: object

In [None]:
x.shape

(8807,)

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf=TfidfVectorizer()

In [None]:
x=tfidf.fit_transform(x)


In [None]:
x.shape

(8807, 24325)

In [None]:
print(x)

  (0, 10666)	0.25623477146794016
  (0, 7590)	0.1524956001247612
  (0, 3022)	0.1583019744553288
  (0, 21719)	0.12396625412976799
  (0, 9819)	0.1251879990908946
  (0, 21939)	0.04511500281304091
  (0, 23472)	0.17276801439992245
  (0, 4584)	0.23646712584451435
  (0, 1224)	0.04454139770987926
  (0, 10941)	0.23646712584451435
  (0, 10570)	0.05303872119073052
  (0, 5568)	0.13807802014817613
  (0, 20539)	0.21230330564677405
  (0, 7954)	0.1743382468036374
  (0, 12556)	0.09574242150486534
  (0, 9984)	0.12870901911890628
  (0, 15238)	0.04978188497148991
  (0, 7048)	0.17032903453405177
  (0, 21706)	0.08502811652109162
  (0, 14722)	0.23207095127019983
  (0, 7751)	0.12360062496381032
  (0, 9854)	0.07868093104873403
  (0, 1633)	0.08640676389443691
  (0, 6341)	0.09447174239212161
  (0, 13942)	0.03885107580734846
  :	:
  (8806, 16562)	0.22971154262436344
  (8806, 7778)	0.24290535571730618
  (8806, 22361)	0.19485254308967206
  (8806, 22464)	0.24875260099453858
  (8806, 19094)	0.2585410509484191
  (8806,

In [None]:
from sklearn.metrics.pairwise import cosine_similarity
Similarity_Score=cosine_similarity(x)

In [None]:
Similarity_Score

array([[1.        , 0.00974292, 0.02811419, ..., 0.01242058, 0.0183492 ,
        0.04108449],
       [0.00974292, 1.        , 0.06799605, ..., 0.00276727, 0.        ,
        0.00822026],
       [0.02811419, 0.06799605, 1.        , ..., 0.01092812, 0.02064139,
        0.06852248],
       ...,
       [0.01242058, 0.00276727, 0.01092812, ..., 1.        , 0.05988384,
        0.01128266],
       [0.0183492 , 0.        , 0.02064139, ..., 0.05988384, 1.        ,
        0.01993854],
       [0.04108449, 0.00822026, 0.06852248, ..., 0.01128266, 0.01993854,
        1.        ]])

In [None]:
Similarity_Score.shape

(8807, 8807)

In [None]:
Favourite_Movie_Name = input("Enter your favourite movie name: ")

Enter your favourite movie name: war


In [None]:
All_Movies_Title_List = netflix_data['title'].tolist()

In [None]:
import difflib

In [None]:
Movie_Recommendation = difflib.get_close_matches(Favourite_Movie_Name, All_Movies_Title_List)
print(Movie_Recommendation)

['Anwar', 'War']


In [None]:
Close_Match = Movie_Recommendation[0]
print(Close_Match)

Anwar


In [None]:
Index_of_Close_Match_Movie = netflix_data[netflix_data.title == Close_Match]['show_id'].values[0]
print(Index_of_Close_Match_Movie)

6177


In [None]:
Recommendation_score = list(enumerate(Similarity_Score[Index_of_Close_Match_Movie]))
print(Recommendation_score)


[(0, 0.0412263742301939), (1, 0.04990067023430827), (2, 0.06424552657781059), (3, 0.029771977777423166), (4, 0.07655634389304473), (5, 0.026238765317015703), (6, 0.016378332301297226), (7, 0.01597948973932981), (8, 0.037818665987208434), (9, 0.01435610624586956), (10, 0.015537028216397433), (11, 0.024546154278552425), (12, 0.019879268089206738), (13, 0.036601727226939135), (14, 0.020718136571435453), (15, 0.05247618279678709), (16, 0.010550861601257323), (17, 0.055455916324292975), (18, 0.007849272502200619), (19, 0.02967336712046908), (20, 0.024871930253705866), (21, 0.04205264049234918), (22, 0.039213990660892636), (23, 0.022682695567571503), (24, 0.0167530426914605), (25, 0.03184523119254895), (26, 0.010172810345367107), (27, 0.013067961051415437), (28, 0.030718978880546872), (29, 0.020117227693050414), (30, 0.020448434877871888), (31, 0.013945601076093647), (32, 0.057063844124176935), (33, 0.03186305165660155), (34, 0.04430270788427432), (35, 0.08323101822451086), (36, 0.0070837426

In [None]:
len(Recommendation_score)

8807

In [None]:
Sorted_Simailar_movies = sorted(Recommendation_score, key = lambda x:x[1],reverse = True)
print(Sorted_Simailar_movies)

[(6177, 0.9999999999999999), (7588, 0.5694006594054446), (4622, 0.2414682514818591), (3541, 0.18065422325640101), (7029, 0.14802439135388926), (3609, 0.1435707147857751), (2739, 0.13998498818420854), (6291, 0.13870089256443835), (3333, 0.12793674666707208), (4762, 0.12555639843066796), (707, 0.12457493643940692), (2070, 0.12440229229655711), (1350, 0.12175512034370979), (3613, 0.12104664032068609), (7521, 0.11945576507811467), (453, 0.1190631663808495), (85, 0.1172603107430428), (3998, 0.11699723296377917), (4930, 0.11633109123988727), (803, 0.11498309554711916), (7748, 0.11473126047693467), (5940, 0.11423229182701315), (76, 0.11368630143417531), (4063, 0.11359843437848385), (3340, 0.11263095753064681), (220, 0.1106504502192952), (7589, 0.10980190130804572), (5674, 0.10861261600462009), (1724, 0.10826602909408153), (338, 0.10814928419142306), (58, 0.10780989330031811), (1123, 0.10744112671143365), (2490, 0.10694184102408542), (5591, 0.10668515883564073), (1040, 0.1064802057098645), (16

In [None]:
print("Top 30 Movies suggested for you: \n")
i=1
for movie in Sorted_Simailar_movies:
    index = movie[0]
    title_from_index = netflix_data[netflix_data.index == index]['title'].values[0]
    if (i<31):
        print(i,'.',title_from_index)
        i+=1

Top 30 Movies suggested for you: 

1 . 忍者ハットリくん
2 . Ninja Hattori
3 . Reboot: The Guardian Code
4 . Naruto
5 . Huntik: Secrets and Seekers
6 . Let's Eat 2
7 . Pokémon the Series
8 . Beyblade: Metal Fusion
9 . CLANNAD
10 . Home: Adventures with Tip & Oh
11 . Let's Eat
12 . Heidi
13 . Power Rangers Ninja Steel
14 . Reply 1994
15 . Mr. Young
16 . BEASTARS
17 . Pokémon Master Journeys: The Series
18 . My Husband Won't Fit
19 . The Magic School Bus Rides Again
20 . Ouran High School Host Club
21 . Planet Earth: The Complete Collection
22 . Breaking Bad
23 . Yowamushi Pedal
24 . The Disastrous Life of Saiki K.
25 . Maid-Sama!
26 . Family Reunion
27 . Ninja Turtles: The Next Mutation
28 . Merlin
29 . Bakugan: Battle Planet
30 . Hunter X Hunter (2011)
