In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("netflix_titles.csv")

In [3]:
df.shape

(7787, 12)

In [4]:
df.columns

Index(['show_id', 'type', 'title', 'director', 'cast', 'country', 'date_added',
       'release_year', 'rating', 'duration', 'listed_in', 'description'],
      dtype='object')

In [5]:
df = df.drop(['date_added','release_year'],axis = 1)

In [6]:
df.columns

Index(['show_id', 'type', 'title', 'director', 'cast', 'country', 'rating',
       'duration', 'listed_in', 'description'],
      dtype='object')

#### Replacing all the NaN values with 'Unknown'

In [7]:
df = df.replace(np.nan,'Unknown', regex=True)

In [8]:
df['director'] = df['director'].str.lower()
df['cast'] = df['cast'].str.lower()
df['listed_in'] = df['listed_in'].str.lower()
df['description'] = df['description'].str.lower()
df['country'] = df['country'].str.lower()

In [9]:
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,rating,duration,listed_in,description
0,s1,TV Show,3%,unknown,"joão miguel, bianca comparato, michel gomes, r...",brazil,TV-MA,4 Seasons,"international tv shows, tv dramas, tv sci-fi &...",in a future where the elite inhabit an island ...
1,s2,Movie,7:19,jorge michel grau,"demián bichir, héctor bonilla, oscar serrano, ...",mexico,TV-MA,93 min,"dramas, international movies",after a devastating earthquake hits mexico cit...
2,s3,Movie,23:59,gilbert chan,"tedd chan, stella chung, henley hii, lawrence ...",singapore,R,78 min,"horror movies, international movies","when an army recruit is found dead, his fellow..."
3,s4,Movie,9,shane acker,"elijah wood, john c. reilly, jennifer connelly...",united states,PG-13,80 min,"action & adventure, independent movies, sci-fi...","in a postapocalyptic world, rag-doll robots hi..."
4,s5,Movie,21,robert luketic,"jim sturgess, kevin spacey, kate bosworth, aar...",united states,PG-13,123 min,dramas,a brilliant group of students become card-coun...


#### Removing punctuation from the attribute 'Description'

In [10]:
import string

In [11]:
df['description'] = df['description'].str.replace('[{}]'.format(string.punctuation), '')

  df['description'] = df['description'].str.replace('[{}]'.format(string.punctuation), '')


#### Removing stopwords from the attibute 'Description'

In [12]:
import nltk

In [13]:
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
stopwds = stopwords.words('english')
nltk.download('punkt')

[nltk_data] Downloading package punkt to C:\Users\Saurabh
[nltk_data]     Sonawane\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [14]:
def removestopwds(get):
    word_tokens = word_tokenize(get)
    result = [w for w in word_tokens if not w.lower() in stopwds]
    result = []
    for w in word_tokens:
        if w not in stopwds:
            result.append(w)
    
    return result          

In [15]:
df['description'] = df['description'].apply(lambda x: removestopwds(x))

#### Making tags by concatenating relevent attributes. These tags will later be converted to vectors.

In [16]:
df['listed_in'] = df.listed_in.apply(lambda x: x.split(','))
df['cast'] = df.cast.apply(lambda x: x.split(','))
df['director'] = df.director.apply(lambda x: x.split(','))
df['country'] = df.country.apply(lambda x: x.split(','))

In [17]:
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,rating,duration,listed_in,description
0,s1,TV Show,3%,[unknown],"[joão miguel, bianca comparato, michel gomes...",[brazil],TV-MA,4 Seasons,"[international tv shows, tv dramas, tv sci-f...","[future, elite, inhabit, island, paradise, far..."
1,s2,Movie,7:19,[jorge michel grau],"[demián bichir, héctor bonilla, oscar serran...",[mexico],TV-MA,93 min,"[dramas, international movies]","[devastating, earthquake, hits, mexico, city, ..."
2,s3,Movie,23:59,[gilbert chan],"[tedd chan, stella chung, henley hii, lawre...",[singapore],R,78 min,"[horror movies, international movies]","[army, recruit, found, dead, fellow, soldiers,..."
3,s4,Movie,9,[shane acker],"[elijah wood, john c. reilly, jennifer conne...",[united states],PG-13,80 min,"[action & adventure, independent movies, sci...","[postapocalyptic, world, ragdoll, robots, hide..."
4,s5,Movie,21,[robert luketic],"[jim sturgess, kevin spacey, kate bosworth, ...",[united states],PG-13,123 min,[dramas],"[brilliant, group, students, become, cardcount..."


In [18]:
df['director'] = df['director'].apply(lambda x:[i.replace(" ","") for i in x])
df['cast'] = df['cast'].apply(lambda x:[i.replace(" ","") for i in x])
df['listed_in'] = df['listed_in'].apply(lambda x:[i.replace(" ","") for i in x])
df['country'] = df['country'].apply(lambda x:[i.replace(" ","") for i in x])

In [19]:
df.head()

Unnamed: 0,show_id,type,title,director,cast,country,rating,duration,listed_in,description
0,s1,TV Show,3%,[unknown],"[joãomiguel, biancacomparato, michelgomes, rod...",[brazil],TV-MA,4 Seasons,"[internationaltvshows, tvdramas, tvsci-fi&fant...","[future, elite, inhabit, island, paradise, far..."
1,s2,Movie,7:19,[jorgemichelgrau],"[demiánbichir, héctorbonilla, oscarserrano, az...",[mexico],TV-MA,93 min,"[dramas, internationalmovies]","[devastating, earthquake, hits, mexico, city, ..."
2,s3,Movie,23:59,[gilbertchan],"[teddchan, stellachung, henleyhii, lawrencekoh...",[singapore],R,78 min,"[horrormovies, internationalmovies]","[army, recruit, found, dead, fellow, soldiers,..."
3,s4,Movie,9,[shaneacker],"[elijahwood, johnc.reilly, jenniferconnelly, c...",[unitedstates],PG-13,80 min,"[action&adventure, independentmovies, sci-fi&f...","[postapocalyptic, world, ragdoll, robots, hide..."
4,s5,Movie,21,[robertluketic],"[jimsturgess, kevinspacey, katebosworth, aaron...",[unitedstates],PG-13,123 min,[dramas],"[brilliant, group, students, become, cardcount..."


In [20]:
df['tags'] = df['description'] + df['listed_in'] + df['cast'] + df['director'] + df['country']

In [21]:
new_db = df[['show_id','title','tags']]

In [22]:
new_db.head()

Unnamed: 0,show_id,title,tags
0,s1,3%,"[future, elite, inhabit, island, paradise, far..."
1,s2,7:19,"[devastating, earthquake, hits, mexico, city, ..."
2,s3,23:59,"[army, recruit, found, dead, fellow, soldiers,..."
3,s4,9,"[postapocalyptic, world, ragdoll, robots, hide..."
4,s5,21,"[brilliant, group, students, become, cardcount..."


#### Stemming the tags attribute

In [23]:
from nltk.stem.porter import PorterStemmer
pstem = PorterStemmer()

In [24]:
def stemmwds(txt):
    temp = []
    for i in txt.split():
        temp.append(pstem.stem(i))
    return " ".join(temp)

In [25]:
new_db['tags'] = new_db['tags'].apply(lambda x:" ".join(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_db['tags'] = new_db['tags'].apply(lambda x:" ".join(x))


In [26]:
new_db['tags'] = new_db['tags'].apply(stemmwds)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_db['tags'] = new_db['tags'].apply(stemmwds)


In [27]:
new_db.head()

Unnamed: 0,show_id,title,tags
0,s1,3%,futur elit inhabit island paradis far crowd sl...
1,s2,7:19,devast earthquak hit mexico citi trap survivor...
2,s3,23:59,armi recruit found dead fellow soldier forc co...
3,s4,9,postapocalypt world ragdol robot hide fear dan...
4,s5,21,brilliant group student becom cardcount expert...


#### Getting imdb ids for each title

In [29]:
#def get_title_imdb_id(name):
    #NaN = float("NaN")
    #ia = imdb.IMDb()
    #search = ia.search_movie(name)
    #if search == []:
         #return NaN
    #else:
        #id = search[0].movieID
        #return id

In [30]:
#new_db['imdb_id'] = new_db['title'].apply(lambda x: get_title_imdb_id(x))

#### Above process takes considerable amount of time,since dataset size is large.

In [32]:
new_db

Unnamed: 0,show_id,title,tags,imdb_id
0,s1,3%,futur elit inhabit island paradis far crowd sl...,4922804.0
1,s2,7:19,devast earthquak hit mexico citi trap survivor...,5979312.0
2,s3,23:59,armi recruit found dead fellow soldier forc co...,2100376.0
3,s4,9,postapocalypt world ragdol robot hide fear dan...,472033.0
4,s5,21,brilliant group student becom cardcount expert...,478087.0
...,...,...,...,...
7782,s7783,Zozo,lebanon civil war depriv zozo famili he left g...,448267.0
7783,s7784,Zubaan,scrappi poor boy worm way tycoon dysfunct fami...,3776484.0
7784,s7785,Zulu Man in Japan,documentari south african rapper nasti c hit s...,15075334.0
7785,s7786,Zumbo's Just Desserts,dessert wizard adriano zumbo look next “ willi...,7239256.0


In [33]:
new_db['imdb_id'].isnull().sum()

168

In [34]:
new_db.shape

(7787, 4)

In [35]:
new_db = new_db.dropna()

In [36]:
new_db.shape

(7619, 4)

In [37]:
new_db['imdb_id'] = new_db['imdb_id'].astype(np.int64)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_db['imdb_id'] = new_db['imdb_id'].astype(np.int64)


In [38]:
new_db.head()

Unnamed: 0,show_id,title,tags,imdb_id
0,s1,3%,futur elit inhabit island paradis far crowd sl...,4922804
1,s2,7:19,devast earthquak hit mexico citi trap survivor...,5979312
2,s3,23:59,armi recruit found dead fellow soldier forc co...,2100376
3,s4,9,postapocalypt world ragdol robot hide fear dan...,472033
4,s5,21,brilliant group student becom cardcount expert...,478087


#### Next step is to take top 3 actors from the "cast" attribute and 1 main director from the "Director" attribute

In [39]:
def splitnow(temp):
    lst = temp.split(", ")
    return lst

In [40]:
tempdf = pd.read_csv('netflix_titles.csv')

In [41]:
tempdf = tempdf.replace(np.nan,'Unknown', regex=True)

In [42]:
tempdf['cast_split'] = tempdf['cast'].apply(lambda x: splitnow(x))

In [43]:
tempdf['cast_split']

0       [João Miguel, Bianca Comparato, Michel Gomes, ...
1       [Demián Bichir, Héctor Bonilla, Oscar Serrano,...
2       [Tedd Chan, Stella Chung, Henley Hii, Lawrence...
3       [Elijah Wood, John C. Reilly, Jennifer Connell...
4       [Jim Sturgess, Kevin Spacey, Kate Bosworth, Aa...
                              ...                        
7782    [Imad Creidi, Antoinette Turk, Elias Gergi, Ca...
7783    [Vicky Kaushal, Sarah-Jane Dias, Raaghav Chana...
7784                                            [Nasty C]
7785                         [Adriano Zumbo, Rachel Khoo]
7786                                            [Unknown]
Name: cast_split, Length: 7787, dtype: object

In [44]:
def topthree(lst):
    sample = []
    temp = len(lst)  
    if temp >= 3:
        sample.extend(lst[0].split(","))
        sample.extend(lst[1].split(","))
        sample.extend(lst[2].split(","))
    else:
        sample.extend(lst)

    return sample

In [45]:
tempdf["top_cast"] = tempdf["cast_split"].apply(lambda x: topthree(x))

In [46]:
tempdf['top_cast']

0           [João Miguel, Bianca Comparato, Michel Gomes]
1          [Demián Bichir, Héctor Bonilla, Oscar Serrano]
2                   [Tedd Chan, Stella Chung, Henley Hii]
3        [Elijah Wood, John C. Reilly, Jennifer Connelly]
4             [Jim Sturgess, Kevin Spacey, Kate Bosworth]
                              ...                        
7782          [Imad Creidi, Antoinette Turk, Elias Gergi]
7783    [Vicky Kaushal, Sarah-Jane Dias, Raaghav Chanana]
7784                                            [Nasty C]
7785                         [Adriano Zumbo, Rachel Khoo]
7786                                            [Unknown]
Name: top_cast, Length: 7787, dtype: object

In [47]:
tempdf[["actor1","actor2","actor3"]] = pd.DataFrame(tempdf.top_cast.to_list(),index = tempdf.index)

In [48]:
tempdf[["actor1","actor2","actor3"]]

Unnamed: 0,actor1,actor2,actor3
0,João Miguel,Bianca Comparato,Michel Gomes
1,Demián Bichir,Héctor Bonilla,Oscar Serrano
2,Tedd Chan,Stella Chung,Henley Hii
3,Elijah Wood,John C. Reilly,Jennifer Connelly
4,Jim Sturgess,Kevin Spacey,Kate Bosworth
...,...,...,...
7782,Imad Creidi,Antoinette Turk,Elias Gergi
7783,Vicky Kaushal,Sarah-Jane Dias,Raaghav Chanana
7784,Nasty C,,
7785,Adriano Zumbo,Rachel Khoo,


In [49]:
tempdf["actor1"].isnull().sum()

0

In [50]:
tempdf["actor2"].isnull().sum()

1546

In [51]:
tempdf["actor3"].isnull().sum()

1731

In [52]:
tempdf['actor2'] = tempdf['actor2'].replace(np.nan,'Unknown', regex=True)

In [53]:
tempdf['actor3'] = tempdf['actor3'].replace(np.nan,'Unknown', regex=True)

In [54]:
tempdf['dir_split'] = tempdf['director'].apply(lambda x: splitnow(x))

In [55]:
def get_dir(lst):
    sample = lst[0]
    return sample

In [56]:
tempdf['Main_director'] = tempdf['dir_split'].apply(lambda x: get_dir(x))

In [57]:
new_db['actor_one'] = new_db['show_id'].map(tempdf.set_index('show_id')['actor1'])
new_db['actor_two'] = new_db['show_id'].map(tempdf.set_index('show_id')['actor2'])
new_db['actor_three'] = new_db['show_id'].map(tempdf.set_index('show_id')['actor3'])
new_db['Director'] = new_db['show_id'].map(tempdf.set_index('show_id')['Main_director'])
new_db['Duration'] = new_db['show_id'].map(tempdf.set_index('show_id')['duration'])
new_db['Description'] = new_db['show_id'].map(tempdf.set_index('show_id')['description'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_db['actor_one'] = new_db['show_id'].map(tempdf.set_index('show_id')['actor1'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_db['actor_two'] = new_db['show_id'].map(tempdf.set_index('show_id')['actor2'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_db['actor_three'] = new_db['show_id

In [58]:
new_db

Unnamed: 0,show_id,title,tags,imdb_id,actor_one,actor_two,actor_three,Director,Duration,Description
0,s1,3%,futur elit inhabit island paradis far crowd sl...,4922804,João Miguel,Bianca Comparato,Michel Gomes,Unknown,4 Seasons,In a future where the elite inhabit an island ...
1,s2,7:19,devast earthquak hit mexico citi trap survivor...,5979312,Demián Bichir,Héctor Bonilla,Oscar Serrano,Jorge Michel Grau,93 min,After a devastating earthquake hits Mexico Cit...
2,s3,23:59,armi recruit found dead fellow soldier forc co...,2100376,Tedd Chan,Stella Chung,Henley Hii,Gilbert Chan,78 min,"When an army recruit is found dead, his fellow..."
3,s4,9,postapocalypt world ragdol robot hide fear dan...,472033,Elijah Wood,John C. Reilly,Jennifer Connelly,Shane Acker,80 min,"In a postapocalyptic world, rag-doll robots hi..."
4,s5,21,brilliant group student becom cardcount expert...,478087,Jim Sturgess,Kevin Spacey,Kate Bosworth,Robert Luketic,123 min,A brilliant group of students become card-coun...
...,...,...,...,...,...,...,...,...,...,...
7782,s7783,Zozo,lebanon civil war depriv zozo famili he left g...,448267,Imad Creidi,Antoinette Turk,Elias Gergi,Josef Fares,99 min,When Lebanon's Civil War deprives Zozo of his ...
7783,s7784,Zubaan,scrappi poor boy worm way tycoon dysfunct fami...,3776484,Vicky Kaushal,Sarah-Jane Dias,Raaghav Chanana,Mozez Singh,111 min,A scrappy but poor boy worms his way into a ty...
7784,s7785,Zulu Man in Japan,documentari south african rapper nasti c hit s...,15075334,Nasty C,Unknown,Unknown,Unknown,44 min,"In this documentary, South African rapper Nast..."
7785,s7786,Zumbo's Just Desserts,dessert wizard adriano zumbo look next “ willi...,7239256,Adriano Zumbo,Rachel Khoo,Unknown,Unknown,1 Season,Dessert wizard Adriano Zumbo looks for the nex...


In [59]:
new_db['rating'] = new_db['show_id'].map(tempdf.set_index('show_id')['rating'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_db['rating'] = new_db['show_id'].map(tempdf.set_index('show_id')['rating'])


In [60]:
new_db

Unnamed: 0,show_id,title,tags,imdb_id,actor_one,actor_two,actor_three,Director,Duration,Description,rating
0,s1,3%,futur elit inhabit island paradis far crowd sl...,4922804,João Miguel,Bianca Comparato,Michel Gomes,Unknown,4 Seasons,In a future where the elite inhabit an island ...,TV-MA
1,s2,7:19,devast earthquak hit mexico citi trap survivor...,5979312,Demián Bichir,Héctor Bonilla,Oscar Serrano,Jorge Michel Grau,93 min,After a devastating earthquake hits Mexico Cit...,TV-MA
2,s3,23:59,armi recruit found dead fellow soldier forc co...,2100376,Tedd Chan,Stella Chung,Henley Hii,Gilbert Chan,78 min,"When an army recruit is found dead, his fellow...",R
3,s4,9,postapocalypt world ragdol robot hide fear dan...,472033,Elijah Wood,John C. Reilly,Jennifer Connelly,Shane Acker,80 min,"In a postapocalyptic world, rag-doll robots hi...",PG-13
4,s5,21,brilliant group student becom cardcount expert...,478087,Jim Sturgess,Kevin Spacey,Kate Bosworth,Robert Luketic,123 min,A brilliant group of students become card-coun...,PG-13
...,...,...,...,...,...,...,...,...,...,...,...
7782,s7783,Zozo,lebanon civil war depriv zozo famili he left g...,448267,Imad Creidi,Antoinette Turk,Elias Gergi,Josef Fares,99 min,When Lebanon's Civil War deprives Zozo of his ...,TV-MA
7783,s7784,Zubaan,scrappi poor boy worm way tycoon dysfunct fami...,3776484,Vicky Kaushal,Sarah-Jane Dias,Raaghav Chanana,Mozez Singh,111 min,A scrappy but poor boy worms his way into a ty...,TV-14
7784,s7785,Zulu Man in Japan,documentari south african rapper nasti c hit s...,15075334,Nasty C,Unknown,Unknown,Unknown,44 min,"In this documentary, South African rapper Nast...",TV-MA
7785,s7786,Zumbo's Just Desserts,dessert wizard adriano zumbo look next “ willi...,7239256,Adriano Zumbo,Rachel Khoo,Unknown,Unknown,1 Season,Dessert wizard Adriano Zumbo looks for the nex...,TV-PG


#### Creating a duration amount attribute which holds the :"duration" attribute as an integer value

In [61]:
def getdurationamount(temp):
    lst = [int(s) for s in temp.split() if s.isdigit()]
    return lst

In [62]:
new_db["duration_as_int"] = new_db['Duration'].apply(lambda x: getdurationamount(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_db["duration_as_int"] = new_db['Duration'].apply(lambda x: getdurationamount(x))


In [63]:
new_db['duration_as_int'] = new_db['duration_as_int'].apply(lambda x: int(''.join(map(str,x))))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_db['duration_as_int'] = new_db['duration_as_int'].apply(lambda x: int(''.join(map(str,x))))


In [64]:
new_db['type'] = new_db['show_id'].map(tempdf.set_index('show_id')['type'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_db['type'] = new_db['show_id'].map(tempdf.set_index('show_id')['type'])


### Below data is the final data to be fed into the execution code. 

In [65]:
new_db

Unnamed: 0,show_id,title,tags,imdb_id,actor_one,actor_two,actor_three,Director,Duration,Description,rating,duration_as_int,type
0,s1,3%,futur elit inhabit island paradis far crowd sl...,4922804,João Miguel,Bianca Comparato,Michel Gomes,Unknown,4 Seasons,In a future where the elite inhabit an island ...,TV-MA,4,TV Show
1,s2,7:19,devast earthquak hit mexico citi trap survivor...,5979312,Demián Bichir,Héctor Bonilla,Oscar Serrano,Jorge Michel Grau,93 min,After a devastating earthquake hits Mexico Cit...,TV-MA,93,Movie
2,s3,23:59,armi recruit found dead fellow soldier forc co...,2100376,Tedd Chan,Stella Chung,Henley Hii,Gilbert Chan,78 min,"When an army recruit is found dead, his fellow...",R,78,Movie
3,s4,9,postapocalypt world ragdol robot hide fear dan...,472033,Elijah Wood,John C. Reilly,Jennifer Connelly,Shane Acker,80 min,"In a postapocalyptic world, rag-doll robots hi...",PG-13,80,Movie
4,s5,21,brilliant group student becom cardcount expert...,478087,Jim Sturgess,Kevin Spacey,Kate Bosworth,Robert Luketic,123 min,A brilliant group of students become card-coun...,PG-13,123,Movie
...,...,...,...,...,...,...,...,...,...,...,...,...,...
7782,s7783,Zozo,lebanon civil war depriv zozo famili he left g...,448267,Imad Creidi,Antoinette Turk,Elias Gergi,Josef Fares,99 min,When Lebanon's Civil War deprives Zozo of his ...,TV-MA,99,Movie
7783,s7784,Zubaan,scrappi poor boy worm way tycoon dysfunct fami...,3776484,Vicky Kaushal,Sarah-Jane Dias,Raaghav Chanana,Mozez Singh,111 min,A scrappy but poor boy worms his way into a ty...,TV-14,111,Movie
7784,s7785,Zulu Man in Japan,documentari south african rapper nasti c hit s...,15075334,Nasty C,Unknown,Unknown,Unknown,44 min,"In this documentary, South African rapper Nast...",TV-MA,44,Movie
7785,s7786,Zumbo's Just Desserts,dessert wizard adriano zumbo look next “ willi...,7239256,Adriano Zumbo,Rachel Khoo,Unknown,Unknown,1 Season,Dessert wizard Adriano Zumbo looks for the nex...,TV-PG,1,TV Show


### Exporting the data in .pkl file

In [68]:
#import pickle

In [69]:
#pickle.dump(new_db.to_dict(),open('testdb.pkl','wb'))