In [1]:
# import the imdbpy package that was downloaded via pipenv
# import pandas
# methods referenced from https://imdbpy.readthedocs.io/en/latest/usage/quickstart.html

import imdb
from imdb import IMDb
import pandas as pd

ia = IMDb()

In [2]:
# read the CSV file downloaded from Netflix

personNetflix = pd.read_csv("FileName.CSV", header=0)

In [3]:
# use pandas to modify the Netflix CSV file


# pandas data frame display options

pd.options.display.max_rows = 1300

# Netflix data needs to be split into Series, Season, and Episode

titleSplit = personNetflix["Title"].str.split(":", n = 2, expand = True) 
  
# create separate columns from the split data

personNetflix["Series"]= titleSplit[0] 
personNetflix["Season"]= titleSplit[1] 
personNetflix["Episode"]= titleSplit[2] 
  
# drop column that was split

personNetflix.drop(columns =["Title"], inplace = True)

# split date by day/month/year

dateSplit = personNetflix["Date"].str.split("/", n = 2, expand = True) 

# making separate column from split data

personNetflix["Month"]= dateSplit[0] 
personNetflix["Day"]= dateSplit[1] 
personNetflix["Year"]= dateSplit[2] 

# drop column that was split

personNetflix.drop(columns =["Date"], inplace = True)

personNetflix # to display as a dataframe

Unnamed: 0,Series,Season,Episode,Month,Day,Year
0,Emily in Paris,Season 1,Cancel Couture,12,11,20
1,Emily in Paris,Season 1,An American Auction in Paris,12,11,20
2,Emily in Paris,Season 1,Family Affair,12,11,20
3,Emily in Paris,Season 1,French Ending,12,11,20
4,Emily in Paris,Season 1,Ringarde,12,11,20
5,Emily in Paris,Season 1,Faux Amis,12,10,20
6,Emily in Paris,Season 1,A Kiss Is Just A Kiss,12,10,20
7,Emily in Paris,Season 1,Sexy or Sexist,12,10,20
8,Emily in Paris,Season 1,Masculin Féminin,12,10,20
9,Emily in Paris,Season 1,Emily in Paris,12,10,20


In [4]:
# list of unique titles only

uniqueTitlesNetflix = personNetflix.Series.unique()

In [5]:
# get IMDb series objects from Netflix title names

def getSeries(list):
    seriesList = []
    for title in list:
        movies = ia.search_movie(title)
        movie = movies[0].movieID
        series = ia.get_movie(movie)
        if series:
            seriesList.append(series)
        else:
            print("No series info available.")
    return seriesList

In [7]:
# get runtime information
# if runtime not available, will add 0 minutes

def getDuration(list):
    durationList = []
    for title in list:
        runtimes = title.get("runtime") # the IMDbdocs says to use .data but kept getting error so switch to .get
        if runtimes:
            durationList.append(runtimes)
        else:
            durationList.append(['0'])
    return durationList

In [8]:
# unnest list of lists  because .get("runtime") results in each runtime in its own list, like "['24']"

def unNestDuration(list):
    durationList = []
    for x in list:
        for y in x:
            durationList.append(y)
    return durationList

In [9]:
# get title kind information (series, movie, miniseries)

def getKind(list):
    kindList = []
    for title in list:
        kind = title.data['kind']
        if kind:
            kindList.append(kind)
        else:
            kindList.append("Don't know")
    return kindList

In [10]:
# create a class for Netflix data

class NetflixData:
    def __init__(self, duration, kind):
        self.duration = duration
        self.kind = kind

In [11]:
# main function to gather all information from IMDB and return information as class instance

def IMDbData(NetflixList):
    # all functions previously defined
    IMDbRuntime = getSeries(NetflixList)
    nestedDurationList = getDuration(IMDbRuntime)
    duration = unNestDuration(nestedDurationList)
    kind = getKind(IMDbRuntime)
    
    #create an instance of the class with the properties duration and kind
    variable=NetflixData(duration,kind)
    return variable

In [12]:
# data frame function to create new table of IMDB information

def dataFrame(dataSet,titleList):
    DF = pd.DataFrame({"Series":titleList, "Kind": dataSet.kind, "Runtime": dataSet.duration})
    return DF

In [13]:
# create dataframe of IMDB data

def IMDbDF(imdbClassInst, uniqueTitlesNetflix):
    personIMDB = DataFrame(imdbClassInst,uniqueTitlesNetflix)
    return personIMDB

In [14]:
# define left and right tables for data merge
# merge Netflix data with IMDb data

def combinedDataFrame(personNetflix, personIMDB):
    left = personNetflix
    right = personIMDB
    left_merged = pd.merge(left, right, how="left", on=['Series'])
    return left_merged

In [15]:
# combines all functions

def finalDataFrameExport(userInput):
    imdb = IMDbData(userInput)
    personIMDB2 = dataFrame(imdb, userInput)
    DF = combinedDataFrame(personNetflix, personIMDB2)
    return DF
    DF

In [22]:
# export dataframe to .csv

def csvExport():
    leftMerged.to_csv(r"ExportFile.csv", index=False, header=True)

In [23]:
# call the function

leftMerged=finalDataFrameExport(uniqueTitlesNetflix)
leftMerged

Unnamed: 0,Series,Season,Episode,Month,Day,Year,Kind,Runtime
0,Emily in Paris,Season 1,Cancel Couture,12,11,20,tv series,30
1,Emily in Paris,Season 1,An American Auction in Paris,12,11,20,tv series,30
2,Emily in Paris,Season 1,Family Affair,12,11,20,tv series,30
3,Emily in Paris,Season 1,French Ending,12,11,20,tv series,30
4,Emily in Paris,Season 1,Ringarde,12,11,20,tv series,30
5,Emily in Paris,Season 1,Faux Amis,12,10,20,tv series,30
6,Emily in Paris,Season 1,A Kiss Is Just A Kiss,12,10,20,tv series,30
7,Emily in Paris,Season 1,Sexy or Sexist,12,10,20,tv series,30
8,Emily in Paris,Season 1,Masculin Féminin,12,10,20,tv series,30
9,Emily in Paris,Season 1,Emily in Paris,12,10,20,tv series,30


In [21]:
# export to CSV

csvExport()