In [17]:
import pandas as pd

In [18]:
# Load the dataset
df = pd.read_csv('C:/Users/Admin/OneDrive/Documents/netflix_titles.csv')

In [19]:
# Step 1: Preview the data
print("Initial Data Preview:\n", df.head())


Initial Data Preview:
     show_id     type                                    title  \
0  81145628    Movie  Norm of the North: King Sized Adventure   
1  80117401    Movie               Jandino: Whatever it Takes   
2  70234439  TV Show                       Transformers Prime   
3  80058654  TV Show         Transformers: Robots in Disguise   
4  80125979    Movie                             #realityhigh   

                   director  \
0  Richard Finn, Tim Maltby   
1                       NaN   
2                       NaN   
3                       NaN   
4          Fernando Lebrija   

                                                cast  \
0  Alan Marriott, Andrew Toth, Brian Dobson, Cole...   
1                                   Jandino Asporaat   
2  Peter Cullen, Sumalee Montano, Frank Welker, J...   
3  Will Friedle, Darren Criss, Constance Zimmer, ...   
4  Nesta Cooper, Kate Walsh, John Michael Higgins...   

                                    country         date_added

In [20]:
# Step 2: Filter data (only TV Shows)
tv_shows = df[df['type'] == 'TV Show']
print("\nFiltered TV Shows:\n", tv_shows.head())


Filtered TV Shows:
      show_id     type                             title director  \
2   70234439  TV Show                Transformers Prime      NaN   
3   80058654  TV Show  Transformers: Robots in Disguise      NaN   
5   80163890  TV Show                           Apaches      NaN   
8   80117902  TV Show                      Fire Chasers      NaN   
26  80244601  TV Show                   Castle of Stars      NaN   

                                                 cast        country  \
2   Peter Cullen, Sumalee Montano, Frank Welker, J...  United States   
3   Will Friedle, Darren Criss, Constance Zimmer, ...  United States   
5   Alberto Ammann, Eloy Azorín, Verónica Echegui,...          Spain   
8                                                 NaN  United States   
26  Chaiyapol Pupart, Jintanutda Lummakanon, Worra...            NaN   

           date_added  release_year    rating  duration  \
2   September 8, 2018          2013  TV-Y7-FV  1 Season   
3   September 8, 20

In [21]:
# Step 3: Handle missing values (fill or drop)
tv_shows['country'].fillna('Unknown', inplace=True)
tv_shows['rating'].fillna('Not Rated', inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  tv_shows['country'].fillna('Unknown', inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tv_shows['country'].fillna('Unknown', inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].meth

In [22]:
# Step: Clean and Convert 'date_added'
tv_shows['date_added'] = tv_shows['date_added'].str.strip()
tv_shows['date_added'] = pd.to_datetime(tv_shows['date_added'], errors='coerce')
tv_shows['year_added'] = tv_shows['date_added'].dt.year


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tv_shows['date_added'] = tv_shows['date_added'].str.strip()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tv_shows['date_added'] = pd.to_datetime(tv_shows['date_added'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tv_shows['year_added'] = tv_shows['date_added'].dt.year


In [23]:
# Step 5: Split genres into lists
tv_shows['genre_list'] = tv_shows['listed_in'].apply(lambda x: [genre.strip() for genre in x.split(',')])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  tv_shows['genre_list'] = tv_shows['listed_in'].apply(lambda x: [genre.strip() for genre in x.split(',')])


In [24]:
# Step 6: Group by country and count
country_count = tv_shows['country'].value_counts().reset_index()
country_count.columns = ['country', 'count']
print("\nTop Countries Producing TV Shows:\n", country_count.head())


Top Countries Producing TV Shows:
           country  count
0   United States    550
1         Unknown    281
2  United Kingdom    178
3           Japan    129
4     South Korea    104


In [25]:
# Step 7: Merge with original data (example)
merged_data = pd.merge(tv_shows, country_count, on='country')

In [26]:
# Step 8: Final data preview
print("\nMerged Data Sample:\n", merged_data[['title', 'country', 'count']].head())


Merged Data Sample:
                               title        country  count
0                Transformers Prime  United States    550
1  Transformers: Robots in Disguise  United States    550
2                           Apaches          Spain     37
3                      Fire Chasers  United States    550
4                   Castle of Stars        Unknown    281
