In [6]:
# Plotting libraries
from matplotlib.pyplot import plot as plt, figure  # For basic plots and figures
import matplotlib.ticker as mtick  # For formatting axes in matplotlib
import seaborn as sns  # For advanced statistical plotting

# Data handling
import pandas as pd  # For data manipulation and analysis
import numpy as np  # For numerical operations
from numpy import math  # math functions from NumPy (use standard math instead usually)

# Date and time
from datetime import datetime  # For handling date and time

# Plotly (interactive visualizations)
import plotly.graph_objects as go  # Low-level plotting
import plotly.express as px  # High-level, simple interface for Plotly
from plotly.subplots import make_subplots  # For subplot creation
from plotly.offline import init_notebook_mode, iplot  # For offline mode in Jupyter
import plotly.offline as po
import plotly.io as pio  # Plotly input/output operations

# Text and data preprocessing
from collections import Counter  # Count frequency of elements
from sklearn import preprocessing  # Preprocessing tools like label encoding, scaling
from sklearn.feature_extraction.text import TfidfVectorizer  # For transforming text to TF-IDF features

# Model selection (NOTE: you had a typo here!)
from sklearn.model_selection import train_test_split, KFold  # Corrected "model_selction" to "model_selection"

# Dimensionality reduction
from sklearn.decomposition import PCA  # Principal Component Analysis

# NLP
from nltk.corpus import stopwords  # You had a typo here
from nltk.stem import SnowballStemmer  # Corrected "SnowballSteammer"




`np.math` is a deprecated alias for the standard library `math` module (Deprecated Numpy 1.25). Replace usages of `np.math` with `math`



# step 1 Data Loading

In [7]:
stream_net = pd.read_csv("netflix_titles.csv")
stream_net.head()

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,,United States,"September 25, 2021",2020,PG-13,90 min,Documentaries,"As her father nears the end of his life, filmm..."
1,s2,TV Show,Blood & Water,,"Ama Qamata, Khosi Ngema, Gail Mabalane, Thaban...",South Africa,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, TV Dramas, TV Mysteries","After crossing paths at a party, a Cape Town t..."
2,s3,TV Show,Ganglands,Julien Leclercq,"Sami Bouajila, Tracy Gotoas, Samuel Jouy, Nabi...",,"September 24, 2021",2021,TV-MA,1 Season,"Crime TV Shows, International TV Shows, TV Act...",To protect his family from a powerful drug lor...
3,s4,TV Show,Jailbirds New Orleans,,,,"September 24, 2021",2021,TV-MA,1 Season,"Docuseries, Reality TV","Feuds, flirtations and toilet talk go down amo..."
4,s5,TV Show,Kota Factory,,"Mayur More, Jitendra Kumar, Ranjan Raj, Alam K...",India,"September 24, 2021",2021,TV-MA,2 Seasons,"International TV Shows, Romantic TV Shows, TV ...",In a city of coaching centers known to train I...


In [8]:
stream_net.describe()

Unnamed: 0,release_year
count,8807.0
mean,2014.180198
std,8.819312
min,1925.0
25%,2013.0
50%,2017.0
75%,2019.0
max,2021.0


In [10]:
stream_net.description

0       As her father nears the end of his life, filmm...
1       After crossing paths at a party, a Cape Town t...
2       To protect his family from a powerful drug lor...
3       Feuds, flirtations and toilet talk go down amo...
4       In a city of coaching centers known to train I...
                              ...                        
8802    A political cartoonist, a crime reporter and a...
8803    While living alone in a spooky town, a young g...
8804    Looking to survive in a world taken over by zo...
8805    Dragged from civilian life, a former superhero...
8806    A scrappy but poor boy worms his way into a ty...
Name: description, Length: 8807, dtype: object

In [11]:
stream_net.describe

<bound method NDFrame.describe of      show_id     type                  title         director  \
0         s1    Movie   Dick Johnson Is Dead  Kirsten Johnson   
1         s2  TV Show          Blood & Water              NaN   
2         s3  TV Show              Ganglands  Julien Leclercq   
3         s4  TV Show  Jailbirds New Orleans              NaN   
4         s5  TV Show           Kota Factory              NaN   
...      ...      ...                    ...              ...   
8802   s8803    Movie                 Zodiac    David Fincher   
8803   s8804  TV Show            Zombie Dumb              NaN   
8804   s8805    Movie             Zombieland  Ruben Fleischer   
8805   s8806    Movie                   Zoom     Peter Hewitt   
8806   s8807    Movie                 Zubaan      Mozez Singh   

                                                   cast        country  \
0                                                   NaN  United States   
1     Ama Qamata, Khosi Ngema, Gail M

In [12]:
stream_net.isnull().count()

show_id         8807
type            8807
title           8807
director        8807
cast            8807
country         8807
date_added      8807
release_year    8807
rating          8807
duration        8807
listed_in       8807
description     8807
dtype: int64

In [15]:
stream_net.columns

Index(['show_id', 'type', 'title', 'director', 'cast', 'country', 'date_added',
       'release_year', 'rating', 'duration', 'listed_in', 'description'],
      dtype='object')

In [16]:
stream_net.index

RangeIndex(start=0, stop=8807, step=1)

In [17]:
stream_net.shape

(8807, 12)

In [19]:
stream_net.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8807 entries, 0 to 8806
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   show_id       8807 non-null   object
 1   type          8807 non-null   object
 2   title         8807 non-null   object
 3   director      6173 non-null   object
 4   cast          7982 non-null   object
 5   country       7976 non-null   object
 6   date_added    8797 non-null   object
 7   release_year  8807 non-null   int64 
 8   rating        8803 non-null   object
 9   duration      8804 non-null   object
 10  listed_in     8807 non-null   object
 11  description   8807 non-null   object
dtypes: int64(1), object(11)
memory usage: 825.8+ KB


In [20]:
# Checking Duplic

stream_net.duplicated().sum()

0

In [None]:
#Missing Valuees Count
stream_net.isna().sum()

show_id            0
type               0
title              0
director        2634
cast             825
country          831
date_added        10
release_year       0
rating             4
duration           3
listed_in          0
description        0
dtype: int64

In [24]:
#total null value 

stream_net.isna().sum().sum()

4307

In [None]:
# Visializing the missing value