In [11]:
import numpy as np
import pandas as pd
from imdb import Cinemagoer
from tqdm import tqdm
import threading
import pyspark
tqdm.pandas()
from pyspark.sql import SQLContext, SparkSession
import sys
import warnings
warnings.simplefilter(action="ignore")

import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["SPARK_HOME"] = "/project/spark-3.2.1-bin-hadoop3.2"

# get a spark context
import pyspark.pandas as ps

# Creating a spark session
spark = SparkSession \
    .builder \
    .appName("PySparkPostgresApp") \
    .config("spark.jars", "/project/postgresql-42.3.2.jar") \
    .getOrCreate()

spark


# defining spark credentials
postgres_uri = "jdbc:postgresql://database-1.cdz0wq8d9fyh.eu-west-2.rds.amazonaws.com:5432/imdb_database"
user = "postgres"
password = "qwerty123"

def dbToSpark(dbtable):
    '''
    This function fetches data from a database and saves it as a pyspark dataframe
    '''
    try:
        df = spark.read \
            .format("jdbc") \
            .option("url", postgres_uri) \
            .option("dbtable", dbtable) \
            .option("user", user) \
            .option("password", password) \
            .option("driver", "org.postgresql.Driver") \
            .load()
        return df

    except Exception as ex:
        return ex


def sparkToDB(df_spark,dbtable):
    '''
    The function moves data from a pyspark df to a database table
    '''
    try:
        df_spark.write \
            .mode("append") \
            .format("jdbc") \
            .option("url", postgres_uri) \
            .option("dbtable", dbtable) \
            .option("user", user) \
            .option("password", password) \
            .option("driver", "org.postgresql.Driver") \
            .save()
    except Exception as ex:
        return ex
    
def fetchForeignKey(df,dbtable,mappingColumnOutset,mappingColumnTarget,mappedValue,newcol):
    '''
    Getting the foreign key id for a value from the database table
    '''
    df_curr = dbToSpark(dbtable)
    df_curr_pd = df_curr.toPandas()
    df[newcol] = df[mappingColumnOutset].map(df_curr_pd.set_index([mappingColumnTarget])[mappedValue])
    df.drop(columns=[mappingColumnOutset], inplace=True)
    return df

In [4]:
def writeToDB(movies,moviePersons,movieGenres):
    '''
    Storing imdb data to the the database tables
    '''
    moviesDF = pd.DataFrame(movies)
    moviePersonsDF = pd.concat(moviePersons)
    movieGenresDF = pd.concat(movieGenres)
    
    moviesDF_spark = spark.createDataFrame(moviesDF)
    sparkToDB(moviesDF_spark,"imdb_schema.movie_primary")
    
    genresPresent_df_spark = dbToSpark("imdb_schema.genres")
    genresPresent_df = genresPresent_df_spark.toPandas()
    newGenres = list(set(movieGenresDF['genre_name'])-set(genresPresent_df['genre_name']))
    if len(newGenres)>0:
        newGenresDF = pd.DataFrame()
        newGenresDF['genre_name'] = newGenres
        newGenresDF_spark = spark.createDataFrame(newGenresDF)
        sparkToDB(newGenresDF_spark,"imdb_schema.genres")
    
    personPresent_df_spark = dbToSpark("imdb_schema.person_data")
    personPresent_df = personPresent_df_spark.toPandas()
    newPersons = list(set(moviePersonsDF['person_name'])-set(personPresent_df['person_name']))
    if len(newPersons)>0:
        newPersonsDF = pd.DataFrame()
        newPersonsDF['person_name'] = newPersons
        newPersonsDF_spark = spark.createDataFrame(newPersonsDF)
        sparkToDB(newPersonsDF_spark,"imdb_schema.person_data")
    
    rolesPresent_df_spark = dbToSpark("imdb_schema.role_data")
    rolesPresent_df = rolesPresent_df_spark.toPandas()
    newRoles = list(set(moviePersonsDF['role_name'])-set(rolesPresent_df['role_name']))
    if len(newRoles)>0:
        newRolesDF = pd.DataFrame()
        newRolesDF['role_name'] = newRoles
        newRolesDF_spark = spark.createDataFrame(newRolesDF)
        sparkToDB(newRolesDF_spark,"imdb_schema.role_data")
    
    movieGenresDF = fetchForeignKey(movieGenresDF,"imdb_schema.genres","genre_name",
                                    "genre_name","genre_id","genre_id")
    movieGenresDF_spark = spark.createDataFrame(movieGenresDF)
    sparkToDB(movieGenresDF_spark,"imdb_schema.movie_genres")
    
    moviePersonsDF =fetchForeignKey(moviePersonsDF,"imdb_schema.person_data","person_name",
                                    "person_name","person_id","person_id")
    moviePersonsDF =fetchForeignKey(moviePersonsDF,"imdb_schema.role_data","role_name",
                                "role_name","role_id","role_id")
    moviePersonsDF_spark = spark.createDataFrame(moviePersonsDF)
    sparkToDB(moviePersonsDF_spark,"imdb_schema.movie_person")



In [14]:

movies = []
moviePersons = []
movieGenres = []
rolesList = ['cast','director','writer']

In [15]:
ia = Cinemagoer()
def fetchData(x):
    '''
    Calling the IMDb api to fetch the relevant data points
    '''
    
    try:
        movieDict = {}
        moviePersonDF = pd.DataFrame(columns = ['movie_id','person_name','role_name'])
        genresDF = pd.DataFrame()
        movieID = "0000000" + str(x)
        movieID = movieID[-7:]
        movie = ia.get_movie(movieID)
        movieDict['movie_id'] = movieID
        movieDetails = {'plot_outline':'plot outline','movie_name':'title',
                        'movie_runtime':'runtimes','rating':'rating'}
        for key,value in movieDetails.items():
            try:
                movieDict[key] = movie.get(value)
            except:
                movieDict[key] = ''
            if type(movieDict[key]) == list:
                movieDict[key] = movieDict[key][0]
        if (movieDict['plot_outline']=='') or (movieDict['plot_outline']==None):
            return ''
        #print(movieDict)
        #movies[x-1]=movieDict
        infoAvailable = set(movie.infoset2keys['main'])
        rolesAvaliable = list(set(rolesList).intersection(infoAvailable))
        for role in rolesAvaliable:
            roleDF = pd.DataFrame()
            persons = movie.get(role)
            persons = [str(person) for person in persons]
            try:
                if role == 'cast':
                    persons = persons[:5]
            except:
                pass
            roleDF['movie_id'] = [movieID]*len(persons)
            roleDF['role_name'] = [role]*len(persons)
            roleDF['person_name'] = persons
            moviePersonDF = moviePersonDF.append(roleDF)
        genres = movie.get('genres')
        genresDF['movie_id'] = [movieID]*len(genres)
        genresDF['genre_name'] = genres
        movies.append(movieDict)
        moviePersons.append(moviePersonDF)
        movieGenres.append(genresDF)
        return movieDict,moviePersonDF,genresDF
    except:
        return ''


In [44]:
startList = list(range(1,100000,100))
startList[:5]
endList =  list(range(100,100100,100))
endList[:5]

[80100, 80200, 80300, 80400, 80500]

In [45]:
'''
Utilizing multithreading for quicker processing
'''

pool_size = 16
from multiprocessing.pool import ThreadPool
from timeit import default_timer as timer
import time
pool = ThreadPool(processes=pool_size)
start = timer()
for start,end in tqdm(zip(startList,endList)):
    try:
        for x in range(start,end+1):
            #fetchData(x)
            results = pool.map_async(fetchData, (x,))
        results.get()
        writeToDB(movies,moviePersons,movieGenres)
        movies = []
        moviePersons = []
        movieGenres = []
        time.sleep(3)
    except HTTPError:
        time.sleep(60)
        
pool.close()
pool.join()
end = timer()
    
print("Total time for code to execute:", end - start)
#futures = [executor.submit(try_my_operation, item) for item in items]
#concurrent.futures.wait(futures)

0it [00:00, ?it/s]2022-04-15 16:36:37,941 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/_exceptions.py:32: IMDbDataAccessError exception raised; args: ({'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/title/tt0080001/reference', 'proxy': '', 'exception type': 'IOError', 'original exception': <HTTPError 503: 'Service Unavailable'>},); kwds: {}
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/p

2022-04-15 16:36:37,949 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/_exceptions.py:32: IMDbDataAccessError exception raised; args: ({'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/title/tt0080009/reference', 'proxy': '', 'exception type': 'IOError', 'original exception': <HTTPError 503: 'Service Unavailable'>},); kwds: {}
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/re

CRITICAL:imdbpy:IMDbDataAccessError exception raised; args: ({'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/title/tt0080002/reference', 'proxy': '', 'exception type': 'IOError', 'original exception': <HTTPError 503: 'Service Unavailable'>},); kwds: {}
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/pytho

2022-04-15 16:36:37,952 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/_exceptions.py:32: IMDbDataAccessError exception raised; args: ({'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/title/tt0080011/reference', 'proxy': '', 'exception type': 'IOError', 'original exception': <HTTPError 503: 'Service Unavailable'>},); kwds: {}
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/re

CRITICAL:imdbpy:IMDbDataAccessError exception raised; args: ({'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/title/tt0080010/reference', 'proxy': '', 'exception type': 'IOError', 'original exception': <HTTPError 503: 'Service Unavailable'>},); kwds: {}
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/pytho

2022-04-15 16:36:37,956 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/__init__.py:833: caught an exception retrieving or parsing "main" info set for mopID "0080006" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_def

2022-04-15 16:36:37,958 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/__init__.py:833: caught an exception retrieving or parsing "main" info set for mopID "0080007" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_def

2022-04-15 16:36:37,966 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/__init__.py:833: caught an exception retrieving or parsing "main" info set for mopID "0080011" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_def

2022-04-15 16:36:37,968 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/__init__.py:833: caught an exception retrieving or parsing "main" info set for mopID "0080005" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_def

2022-04-15 16:36:38,035 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/_exceptions.py:32: IMDbDataAccessError exception raised; args: ({'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/title/tt0080017/reference', 'proxy': '', 'exception type': 'IOError', 'original exception': <HTTPError 503: 'Service Unavailable'>},); kwds: {}
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/re

2022-04-15 16:36:38,054 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/__init__.py:833: caught an exception retrieving or parsing "main" info set for mopID "0080018" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_def

CRITICAL:imdbpy:caught an exception retrieving or parsing "main" info set for mopID "0080019" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_default
    raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Ser

2022-04-15 16:36:38,075 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/_exceptions.py:32: IMDbDataAccessError exception raised; args: ({'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/title/tt0080022/reference', 'proxy': '', 'exception type': 'IOError', 'original exception': <HTTPError 503: 'Service Unavailable'>},); kwds: {}
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/re

CRITICAL:imdbpy:caught an exception retrieving or parsing "main" info set for mopID "0080022" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_default
    raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Ser

CRITICAL:imdbpy:caught an exception retrieving or parsing "main" info set for mopID "0080021" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_default
    raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Ser

2022-04-15 16:36:38,093 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/_exceptions.py:32: IMDbDataAccessError exception raised; args: ({'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/title/tt0080026/reference', 'proxy': '', 'exception type': 'IOError', 'original exception': <HTTPError 503: 'Service Unavailable'>},); kwds: {}
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/re

CRITICAL:imdbpy:caught an exception retrieving or parsing "main" info set for mopID "0080025" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_default
    raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Ser

CRITICAL:imdbpy:caught an exception retrieving or parsing "main" info set for mopID "0080027" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_default
    raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Ser

2022-04-15 16:36:38,128 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/__init__.py:833: caught an exception retrieving or parsing "main" info set for mopID "0080029" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_def

CRITICAL:imdbpy:caught an exception retrieving or parsing "main" info set for mopID "0080030" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_default
    raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Ser

2022-04-15 16:36:38,168 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/_exceptions.py:32: IMDbDataAccessError exception raised; args: ({'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/title/tt0080032/reference', 'proxy': '', 'exception type': 'IOError', 'original exception': <HTTPError 503: 'Service Unavailable'>},); kwds: {}
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/re

2022-04-15 16:36:38,177 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/__init__.py:833: caught an exception retrieving or parsing "main" info set for mopID "0080003" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_def

CRITICAL:imdbpy:caught an exception retrieving or parsing "main" info set for mopID "0080033" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_default
    raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Ser

CRITICAL:imdbpy:IMDbDataAccessError exception raised; args: ({'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/title/tt0080035/reference', 'proxy': '', 'exception type': 'IOError', 'original exception': <HTTPError 503: 'Service Unavailable'>},); kwds: {}
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/pytho

CRITICAL:imdbpy:caught an exception retrieving or parsing "main" info set for mopID "0080016" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_default
    raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Ser

CRITICAL:imdbpy:caught an exception retrieving or parsing "main" info set for mopID "0080035" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_default
    raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Ser

2022-04-15 16:36:38,200 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/_exceptions.py:32: IMDbDataAccessError exception raised; args: ({'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/title/tt0080037/reference', 'proxy': '', 'exception type': 'IOError', 'original exception': <HTTPError 503: 'Service Unavailable'>},); kwds: {}
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/re

CRITICAL:imdbpy:IMDbDataAccessError exception raised; args: ({'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/title/tt0080040/reference', 'proxy': '', 'exception type': 'IOError', 'original exception': <HTTPError 503: 'Service Unavailable'>},); kwds: {}
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/pytho

2022-04-15 16:36:38,219 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/_exceptions.py:32: IMDbDataAccessError exception raised; args: ({'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/title/tt0080041/reference', 'proxy': '', 'exception type': 'IOError', 'original exception': <HTTPError 503: 'Service Unavailable'>},); kwds: {}
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/re

CRITICAL:imdbpy:caught an exception retrieving or parsing "main" info set for mopID "0080039" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_default
    raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Ser

CRITICAL:imdbpy:caught an exception retrieving or parsing "main" info set for mopID "0080041" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_default
    raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Ser

2022-04-15 16:36:38,253 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/_exceptions.py:32: IMDbDataAccessError exception raised; args: ({'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/title/tt0080043/reference', 'proxy': '', 'exception type': 'IOError', 'original exception': <HTTPError 503: 'Service Unavailable'>},); kwds: {}
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/re

2022-04-15 16:36:38,266 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/__init__.py:833: caught an exception retrieving or parsing "main" info set for mopID "0080044" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_def

CRITICAL:imdbpy:caught an exception retrieving or parsing "main" info set for mopID "0080045" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_default
    raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Ser

2022-04-15 16:36:38,289 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/_exceptions.py:32: IMDbDataAccessError exception raised; args: ({'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/title/tt0080047/reference', 'proxy': '', 'exception type': 'IOError', 'original exception': <HTTPError 503: 'Service Unavailable'>},); kwds: {}
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/re

2022-04-15 16:36:38,298 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/__init__.py:833: caught an exception retrieving or parsing "main" info set for mopID "0080048" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_def

2022-04-15 16:36:38,304 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/__init__.py:833: caught an exception retrieving or parsing "main" info set for mopID "0080049" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_def

CRITICAL:imdbpy:caught an exception retrieving or parsing "main" info set for mopID "0080050" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_default
    raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Ser

2022-04-15 16:36:38,319 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/_exceptions.py:32: IMDbDataAccessError exception raised; args: ({'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/title/tt0080053/reference', 'proxy': '', 'exception type': 'IOError', 'original exception': <HTTPError 503: 'Service Unavailable'>},); kwds: {}
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/re

CRITICAL:imdbpy:caught an exception retrieving or parsing "main" info set for mopID "0080053" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_default
    raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Ser

CRITICAL:imdbpy:caught an exception retrieving or parsing "main" info set for mopID "0080054" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_default
    raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Ser

2022-04-15 16:36:38,333 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/_exceptions.py:32: IMDbDataAccessError exception raised; args: ({'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/title/tt0080056/reference', 'proxy': '', 'exception type': 'IOError', 'original exception': <HTTPError 503: 'Service Unavailable'>},); kwds: {}
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/re

2022-04-15 16:36:38,345 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/__init__.py:833: caught an exception retrieving or parsing "main" info set for mopID "0080056" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_def

2022-04-15 16:36:38,366 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/_exceptions.py:32: IMDbDataAccessError exception raised; args: ({'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/title/tt0080062/reference', 'proxy': '', 'exception type': 'IOError', 'original exception': <HTTPError 503: 'Service Unavailable'>},); kwds: {}
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/re

2022-04-15 16:36:38,387 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/__init__.py:833: caught an exception retrieving or parsing "main" info set for mopID "0080063" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_def

CRITICAL:imdbpy:caught an exception retrieving or parsing "main" info set for mopID "0080064" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_default
    raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Ser

2022-04-15 16:36:38,406 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/_exceptions.py:32: IMDbDataAccessError exception raised; args: ({'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/title/tt0080066/reference', 'proxy': '', 'exception type': 'IOError', 'original exception': <HTTPError 503: 'Service Unavailable'>},); kwds: {}
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/re

2022-04-15 16:36:38,409 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/__init__.py:833: caught an exception retrieving or parsing "main" info set for mopID "0080067" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_def

2022-04-15 16:36:38,418 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/__init__.py:833: caught an exception retrieving or parsing "main" info set for mopID "0080068" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_def

2022-04-15 16:36:38,427 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/_exceptions.py:32: IMDbDataAccessError exception raised; args: ({'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/title/tt0080070/reference', 'proxy': '', 'exception type': 'IOError', 'original exception': <HTTPError 503: 'Service Unavailable'>},); kwds: {}
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/re

2022-04-15 16:36:38,435 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/__init__.py:833: caught an exception retrieving or parsing "main" info set for mopID "0080071" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_def

CRITICAL:imdbpy:caught an exception retrieving or parsing "main" info set for mopID "0080072" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_default
    raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Ser

2022-04-15 16:36:38,458 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/_exceptions.py:32: IMDbDataAccessError exception raised; args: ({'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/title/tt0080075/reference', 'proxy': '', 'exception type': 'IOError', 'original exception': <HTTPError 503: 'Service Unavailable'>},); kwds: {}
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/re

2022-04-15 16:36:38,480 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/__init__.py:833: caught an exception retrieving or parsing "main" info set for mopID "0080076" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_def

CRITICAL:imdbpy:caught an exception retrieving or parsing "main" info set for mopID "0080077" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_default
    raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Ser

2022-04-15 16:36:38,505 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/_exceptions.py:32: IMDbDataAccessError exception raised; args: ({'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/title/tt0080078/reference', 'proxy': '', 'exception type': 'IOError', 'original exception': <HTTPError 503: 'Service Unavailable'>},); kwds: {}
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/re

2022-04-15 16:36:38,511 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/__init__.py:833: caught an exception retrieving or parsing "main" info set for mopID "0080080" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_def

2022-04-15 16:36:38,518 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/__init__.py:833: caught an exception retrieving or parsing "main" info set for mopID "0080081" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_def

CRITICAL:imdbpy:caught an exception retrieving or parsing "main" info set for mopID "0080082" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_default
    raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Ser

2022-04-15 16:36:38,536 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/__init__.py:833: caught an exception retrieving or parsing "main" info set for mopID "0080084" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_def

2022-04-15 16:36:38,542 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/__init__.py:833: caught an exception retrieving or parsing "main" info set for mopID "0080086" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_def

2022-04-15 16:36:38,548 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/_exceptions.py:32: IMDbDataAccessError exception raised; args: ({'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/title/tt0080087/reference', 'proxy': '', 'exception type': 'IOError', 'original exception': <HTTPError 503: 'Service Unavailable'>},); kwds: {}
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/re

2022-04-15 16:36:38,574 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/__init__.py:833: caught an exception retrieving or parsing "main" info set for mopID "0080059" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_def

CRITICAL:imdbpy:caught an exception retrieving or parsing "main" info set for mopID "0080089" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_default
    raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Ser

2022-04-15 16:36:38,589 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/_exceptions.py:32: IMDbDataAccessError exception raised; args: ({'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/title/tt0080061/reference', 'proxy': '', 'exception type': 'IOError', 'original exception': <HTTPError 503: 'Service Unavailable'>},); kwds: {}
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/re

2022-04-15 16:36:38,596 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/__init__.py:833: caught an exception retrieving or parsing "main" info set for mopID "0080090" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_def

CRITICAL:imdbpy:caught an exception retrieving or parsing "main" info set for mopID "0080091" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_default
    raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Ser

2022-04-15 16:36:38,613 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/_exceptions.py:32: IMDbDataAccessError exception raised; args: ({'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/title/tt0080095/reference', 'proxy': '', 'exception type': 'IOError', 'original exception': <HTTPError 503: 'Service Unavailable'>},); kwds: {}
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/re

2022-04-15 16:36:38,618 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/__init__.py:833: caught an exception retrieving or parsing "main" info set for mopID "0080093" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_def

CRITICAL:imdbpy:caught an exception retrieving or parsing "main" info set for mopID "0080094" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_default
    raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Ser

2022-04-15 16:36:38,634 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/_exceptions.py:32: IMDbDataAccessError exception raised; args: ({'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/title/tt0080099/reference', 'proxy': '', 'exception type': 'IOError', 'original exception': <HTTPError 503: 'Service Unavailable'>},); kwds: {}
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/re

2022-04-15 16:36:38,640 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/_exceptions.py:32: IMDbDataAccessError exception raised; args: ({'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/title/tt0080098/reference', 'proxy': '', 'exception type': 'IOError', 'original exception': <HTTPError 503: 'Service Unavailable'>},); kwds: {}
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/re

CRITICAL:imdbpy:caught an exception retrieving or parsing "main" info set for mopID "0080098" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_default
    raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Ser

0it [00:00, ?it/s]
2022-04-15 16:36:38,671 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/_exceptions.py:32: IMDbDataAccessError exception raised; args: ({'errcode': None, 'errmsg': 'None', 'url': 'https://www.imdb.com/title/tt0080073/reference', 'proxy': '', 'exception type': 'IOError', 'original exception': <HTTPError 503: 'Service Unavailable'>},); kwds: {}
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/

NameError: name 'HTTPError' is not defined

2022-04-15 16:36:38,680 CRITICAL [imdbpy] /opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/__init__.py:833: caught an exception retrieving or parsing "main" info set for mopID "0080073" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_def

CRITICAL:imdbpy:caught an exception retrieving or parsing "main" info set for mopID "0080088" (accessSystem: http)
Traceback (most recent call last):
  File "/opt/anaconda/envs/Python3/lib/python3.8/site-packages/imdb/parser/http/__init__.py", line 221, in retrieve_unicode
    response = uopener.open(url)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 531, in open
    response = meth(req, response)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 640, in http_response
    response = self.parent.error(
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 569, in error
    return self._call_chain(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 502, in _call_chain
    result = func(*args)
  File "/opt/anaconda/envs/Python3/lib/python3.8/urllib/request.py", line 649, in http_error_default
    raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 503: Ser

In [12]:


movieGenresDF_spark = dbToSpark("imdb_schema.movie_genres")
movieGenresDF = movieGenresDF_spark.toPandas()

genresDF_spark= dbToSpark("imdb_schema.genres")
genresDF = genresDF_spark.toPandas()

roles_data_df_spark = dbToSpark("imdb_schema.role_data")
roles_data_df = roles_data_df_spark.toPandas()

moviePersonsDF_spark = dbToSpark("imdb_schema.movie_person")
moviePersonsDF = moviePersonsDF_spark.toPandas()

person_data_df_spark = dbToSpark("imdb_schema.person_data")
person_data_df = person_data_df_spark.toPandas()

# moviesDF['genre_id'] = moviesDF['movie_id'].map(movieGenresDF.set_index(['movie_id'])['genre_id'])
# moviesDF['genre_name'] = moviesDF['genre_id'].map(genresDF.set_index(['genre_id'])['genre_name'])



In [15]:
sparkToDB(movieGenresDF_spark,"imdb_schema.movie_genres")

In [9]:
movieGenresDF = movieGenresDF_spark.toPandas()
genresDF = genresDF_spark.toPandas()
#roles_data_df = roles_data_df_spark.toPandas()
#moviePersonsDF = moviePersonsDF_spark.toPandas()

In [10]:
genresDF.shape

(28, 2)

In [59]:
person_data_df_spark = dbToSpark("imdb_schema.person_data")
person_data_df = person_data_df_spark.toPandas()

In [2]:
# We faced some issues in fetching some NAN values from the movies-primary db using pyspark
# As such we using sql query for the same.

from postgres_wrapper import PGWrapper
config = {
    "database": "imdb_database",
    "user": "postgres",
    "password": "qwerty123",
    "host": "database-1.cdz0wq8d9fyh.eu-west-2.rds.amazonaws.com",
    "port": 5432,
}

db_con = PGWrapper(config)
db_con.connect()
results = db_con.query_as_list_of_dicts("SELECT * FROM imdb_schema.movie_primary")

In [3]:
moviesDF=pd.DataFrame(results)

In [16]:
def fetchGenres(movieID):
    movieGenres = movieGenresDF[movieGenresDF['movie_id']==movieID]
    movieGenres['genre_name']= movieGenres['genre_id'].map(genresDF.set_index(['genre_id'])['genre_name'])
    return movieGenres['genre_name'].tolist()

In [17]:
moviesDF['genres'] = moviesDF['movie_id'].progress_apply(lambda x: fetchGenres(x))

In [18]:
moviesDF1 = moviesDF[moviesDF['movie_runtime'].notna()]
len(moviesDF1)

28790

In [22]:
for genre in tqdm(genresDF['genre_name'].tolist()):
    moviesDF1[genre] = moviesDF1.apply(lambda row: 1 if genre in row['genres'] else 0, axis = 1)

100%|██████████| 28/28 [00:05<00:00,  4.91it/s]


In [28]:
!pip install flair==0.10
    
import flair
from flair.data import Sentence
from flair.embeddings import WordEmbeddings, ELMoEmbeddings, TransformerWordEmbeddings, TransformerDocumentEmbeddings

[1m
         .:::.     .::.       
        ....yy:    .yy.       
        :.  .yy.    y.        
             :y:   .:         
             .yy  .:          
              yy..:           
              :y:.            
              .y.             
             .:.              
        ....:.                
        :::.                  
[0;33m
• Project files and data should be stored in /project. This is shared among everyone
  in the project.
• Personal files and configuration should be stored in /home/faculty.
• Files outside /project and /home/faculty will be lost when this server is terminated.
• Create custom environments to setup your servers reproducibly.
[0m
Collecting flair==0.10
  Using cached flair-0.10-py3-none-any.whl (322 kB)
Collecting huggingface-hub
  Downloading huggingface_hub-0.5.1-py3-none-any.whl (77 kB)
[K     |████████████████████████████████| 77 kB 589 kB/s  eta 0:00:01
[?25hCollecting janome
  Using cached Janome-0.4.2-py2.py3-none-any.whl (19.7 MB

In [96]:
def vectorizeSentence(sentence):
    glove_embedding.embed(sentence)
    return np.array([np.mean([sentence[m].embedding[x] for m in range(len(sentence))]) for x in range(len(sentence[0].embedding))])

moviesDF1['name_plot'] = moviesDF1.apply(lambda row: row['movie_name']+' - '+row['plot_outline'],axis = 1)
moviesDF1['name_plot_vectorized'] = moviesDF1['name_plot'].progress_apply(lambda x: vectorizeSentence(Sentence(x)))
#moviesDF1['name_vectorized'] = moviesDF1['movie_name'].progress_apply(lambda x: vectorizeSentence(Sentence(x)))

100%|██████████| 28790/28790 [51:38<00:00,  9.29it/s]  


In [97]:
moviesDF1.head(100)

Unnamed: 0,movie_id,movie_name,movie_runtime,plot_outline,rating,genres,Romance,News,Animation,Comedy,...,Talk-Show,Reality-TV,Adult,plot_vectorized,name_vectorized,normalized_rating,plot_sentence,runtime_normalized,name_plot,name_plot_vectorized
0,0000027,Cordeliers' Square in Lyon,1,A stationary camera looks across the boulevard...,5.6,"[Documentary, Short]",0,0,0,0,...,0,0,0,"[-0.07946225, 0.068295226, 0.27278626, -0.1863...","[0.27884075, -0.026810005, -0.066925496, 0.129...",0.12,"[(Token: 1 A, Token: 2 stationary, Token: 3 ca...",0.000000,Cordeliers' Square in Lyon - A stationary came...,"[-0.07696736, 0.069864586, 0.26245904, -0.1816..."
2,0000021,The Photographical Congress Arrives in Lyon,1,Members of the French Photographic Society arr...,5.7,"[Documentary, Short]",0,0,0,0,...,0,0,0,"[-0.05347773, 0.1110159, 0.20198119, -0.203555...","[-0.047319163, -0.15532734, 0.54127467, -0.036...",0.14,"[(Token: 1 Members, Token: 2 of, Token: 3 the,...",0.000000,The Photographical Congress Arrives in Lyon - ...,"[-0.069014676, 0.09636637, 0.23400398, -0.2003..."
3,0000020,The Derby 1895,1,"A stationary camera, looking diagonally across...",4.8,"[Documentary, Short, Sport]",0,0,0,0,...,0,0,0,"[-0.03976103, 0.062304758, 0.38596523, -0.2819...","[0.26332533, -0.09233666, -0.077725984, -0.338...",-0.04,"[(Token: 1 A, Token: 2 stationary, Token: 3 ca...",0.000000,"The Derby 1895 - A stationary camera, looking ...","[-0.046019807, 0.064021714, 0.36128956, -0.299..."
4,0000023,The Sea,1,The sea is before us. Some rocks are visible t...,5.7,"[Documentary, Short]",0,0,0,0,...,0,0,0,"[-0.06712052, 0.24126318, 0.36458135, -0.26170...","[-0.311577, -0.103966504, 0.68901503, -0.12772...",0.14,"[(Token: 1 The, Token: 2 sea, Token: 3 is, Tok...",0.000000,The Sea - The sea is before us. Some rocks are...,"[-0.0809841, 0.23860729, 0.37162447, -0.26531,..."
5,0000031,Jumping the Blanket,1,"Outdoors, with a nondescript building in the b...",5.5,"[Documentary, Short]",0,0,0,0,...,0,0,0,"[-0.06316189, 0.089918524, 0.38536254, -0.2373...","[0.17459865, -0.019626671, 0.42243338, -0.2551...",0.10,"[(Token: 1 Outdoors, Token: 2 ,, Token: 3 with...",0.000000,"Jumping the Blanket - Outdoors, with a nondesc...","[-0.06651333, 0.09125983, 0.38741693, -0.24278..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103,0000335,Soldiers of the Cross,40,The plot outlined the story of the early Chris...,6.1,"[Short, Biography, Drama]",0,0,0,0,...,0,0,0,"[-0.05257072, 0.13807157, 0.26515916, -0.16061...","[-0.0701985, -0.031142503, 0.070279986, 0.0551...",0.22,"[(Token: 1 The, Token: 2 plot, Token: 3 outlin...",0.029235,Soldiers of the Cross - The plot outlined the ...,"[-0.06530712, 0.13602589, 0.26039174, -0.16003..."
104,0000340,Uncle Josh in a Spooky Hotel,1,Another adventure involving the recurring char...,4.9,"[Comedy, Fantasy, Short]",0,0,0,1,...,0,0,0,"[0.028813528, 0.049082734, 0.40425545, -0.2127...","[0.20363717, -0.1104841, 0.28002, -0.29484534,...",-0.02,"[(Token: 1 Another, Token: 2 adventure, Token:...",0.000000,Uncle Josh in a Spooky Hotel - Another adventu...,"[0.025189344, 0.042988524, 0.39530563, -0.2319..."
105,0000341,Uncle Josh's Nightmare,2,"Poor Uncle Josh is trying to get to sleep, but...",4.9,"[Comedy, Short, Fantasy]",0,0,0,1,...,0,0,0,"[-0.10433494, 0.08680299, 0.21397837, -0.26974...","[0.485565, -0.047040164, 0.65238, -0.884985, -...",-0.02,"[(Token: 1 Poor, Token: 2 Uncle, Token: 3 Josh...",0.000750,Uncle Josh's Nightmare - Poor Uncle Josh is tr...,"[-0.07412906, 0.0865076, 0.2666673, -0.3486767..."
106,0000337,Spanish Bullfight,1,"With a crowded arena in the background, a stat...",5,"[Short, Sport]",0,0,0,0,...,0,0,0,"[-0.028000712, 0.03543403, 0.26265943, -0.2142...","[0.50072, -0.1327454, -0.21441849, 0.220693, -...",0.00,"[(Token: 1 With, Token: 2 a, Token: 3 crowded,...",0.000000,Spanish Bullfight - With a crowded arena in th...,"[-0.030703211, 0.03922082, 0.25235966, -0.2124..."


In [98]:
X = moviesDF1[['name_plot_vectorized']]

In [99]:
y = moviesDF1[genresDF['genre_name'].tolist()]

In [113]:
y

Unnamed: 0,Romance,News,Animation,Comedy,Sport,Documentary,Horror,Short,Western,Biography,...,History,Music,Mystery,Thriller,Musical,Film-Noir,Game-Show,Talk-Show,Reality-TV,Adult
0,0,0,0,0,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,1,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30182,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
30183,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
30184,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
30185,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [101]:
from sklearn.model_selection import train_test_split

X_pretrain, X_test, y_pretrain, y_test = train_test_split(X, y, test_size=0.1, random_state=42)


We had initially trained lightGBM models using the above defined GloVe embeddings. However we faced challenges in deploying such big embeddings into lambda. As such, we switched to logistic regression using a countVectorizer

In [134]:
moviesDF2 = moviesDF[moviesDF['movie_runtime'].notna()]
len(moviesDF2)

for genre in tqdm(genresDF['genre_name'].tolist()):
    moviesDF2[genre] = moviesDF2.apply(lambda row: 1 if genre in row['genres'] else 0, axis = 1)

100%|██████████| 28/28 [00:05<00:00,  4.88it/s]


In [136]:
moviesDF2['name_plot'] = moviesDF2.apply(lambda row: row['movie_name']+' - '+row['plot_outline'],axis = 1)

In [137]:
X = moviesDF2[['name_plot']]

y = moviesDF2[genresDF['genre_name'].tolist()]

In [138]:
X_pretrain, X_test, y_pretrain, y_test = train_test_split(X, y, test_size=0.1, random_state=42)

In [139]:
trainingText = X_pretrain.name_plot.str.cat(sep = " ")
from sklearn.feature_extraction.text import CountVectorizer
countVec = CountVectorizer(max_features = 1000, ngram_range=(1,2), stop_words = "english")
countVec = countVec.fit([trainingText])

In [140]:
X_pretrain_vectorized = countVec.transform(X_pretrain.name_plot).A

In [143]:
X_pretrain_vectorized.shape

(25911, 1000)

In [159]:
from sklearn.linear_model import LogisticRegression

In [147]:
pickle.dump(countVec, open(r"/project/imdb_genrePredictor/models_BOW/countVec.pkl", "wb"))

In [176]:
import lightgbm as lgb
import pickle
from sklearn.linear_model import LogisticRegression
accuracy_dict_BOW,cm_dict_BOW = {},{}
thresholds = []
for label in tqdm(genresDF['genre_name'].tolist()):
    accuracy_dict = {}
    X_train, X_val, y_train, y_val = train_test_split(X_pretrain_vectorized, y_pretrain[label], test_size = 0.15, 
                                                      random_state = 42, stratify=y_pretrain[label])
    

    logisticRegr = LogisticRegression()
    logisticRegr.fit(X_train, y_train)
    
    for threshold in [0.15,0.25,0.35]:
        y_pred_val = logisticRegr.predict_proba(X_val)
        y_pred = [1 if y[1]>threshold else 0 for y in y_pred]
        accuracy_dict[threshold] = accuracy_score(y_val, y_pred)*100
    
    threshold = max(accuracy_dict, key=accuracy_dict.get)
    thresholds.append(threshold)
    
    X_test_vectorized = countVec.transform(X_test.name_plot).A
    y_pred = logisticRegr.predict_proba(X_test_vectorized)
    y_pred = [1 if y[1]>threshold else 0 for y in y_pred]
    accuracy_dict_BOW[label] = accuracy_score(y_test[label], y_pred)*100
    cm_dict_BOW[label] = confusion_matrix(y_test[label], y_pred)
    
    pickle.dump(logisticRegr, open(r"/project/imdb_genrePredictor/models_BOW/PICKLE_"+str(label)+".pkl", "wb"))
    
threshold = max(thresholds,key=thresholds.count)

100%|██████████| 28/28 [00:36<00:00,  1.32s/it]


In [175]:
accuracy_dict_BOW

{'Romance': 75.16498784300104,
 'News': 99.96526571726294,
 'Animation': 95.79715178881555,
 'Comedy': 68.11392844737756,
 'Sport': 98.54116012504342,
 'Documentary': 97.2907259465092,
 'Horror': 93.53942341090656,
 'Short': 91.3511635984717,
 'Western': 94.16464050017368,
 'Biography': 97.18652309829801,
 'Crime': 86.59256686349427,
 'Fantasy': 95.72768322334144,
 'Drama': 59.56929489406044,
 'Family': 91.24696075026051,
 'Sci-Fi': 96.56130600903091,
 'Action': 88.01667245571379,
 'Adventure': 87.70406391108023,
 'War': 94.65092045849252,
 'History': 95.34560611323376,
 'Music': 95.62348037513026,
 'Mystery': 93.15734630079889,
 'Thriller': 89.02396665508857,
 'Musical': 93.9562348037513,
 'Film-Noir': 96.59604029176798,
 'Game-Show': 99.82632858631469,
 'Talk-Show': 99.96526571726294,
 'Reality-TV': 100.0,
 'Adult': 99.23584577978465}

In [173]:
cm_dict_BOW

{'Romance': array([[1979,  410],
        [ 305,  185]]),
 'News': array([[2878,    0],
        [   1,    0]]),
 'Animation': array([[2652,   57],
        [  64,  106]]),
 'Comedy': array([[1279,  738],
        [ 180,  682]]),
 'Sport': array([[2818,   20],
        [  22,   19]]),
 'Documentary': array([[2774,   38],
        [  40,   27]]),
 'Horror': array([[2645,   94],
        [  92,   48]]),
 'Short': array([[2463,  134],
        [ 115,  167]]),
 'Western': array([[2463,  108],
        [  60,  248]]),
 'Biography': array([[2787,   25],
        [  56,   11]]),
 'Crime': array([[2323,  226],
        [ 160,  170]]),
 'Fantasy': array([[2744,   69],
        [  54,   12]]),
 'Drama': array([[ 604, 1053],
        [ 111, 1111]]),
 'Family': array([[2508,  128],
        [ 124,  119]]),
 'Sci-Fi': array([[2738,   46],
        [  53,   42]]),
 'Action': array([[2468,  154],
        [ 191,   66]]),
 'Adventure': array([[2446,  179],
        [ 175,   79]]),
 'War': array([[2646,   92],
        

In [177]:
moviesDF.iloc[0]['plot_outline']

"A stationary camera looks across the boulevard at a diagonal toward one corner of Lyon's Cordeliers' Square. It's a long shot, with a great deal of depth of focus. We can see the sky and fronts of four buildings, each four or five stories tall. It's a busy thoroughfare, with pedestrians walking in front of the buildings and crossing the boulevard between horse-drawn vehicles. A double-decker bus passes in front of us, pulled by two horses. Various tradesmen pass on wagons. One van passes."

In [7]:
moviesDF

Unnamed: 0,movie_id,movie_name,movie_runtime,plot_outline,rating
0,0000027,Cordeliers' Square in Lyon,1,A stationary camera looks across the boulevard...,5.6
1,0000025,The Oxford and Cambridge University Boat Race,,Although the content of this film is primitive...,4.3
2,0000021,The Photographical Congress Arrives in Lyon,1,Members of the French Photographic Society arr...,5.7
3,0000020,The Derby 1895,1,"A stationary camera, looking diagonally across...",4.8
4,0000023,The Sea,1,The sea is before us. Some rocks are visible t...,5.7
...,...,...,...,...,...
30182,0079709,Penitentiary,99,A hitchhiker named Martel Gordone gets in a fi...,5.6
30183,0079707,Portable Country,103,"On board of a bus, Andrés Barazarte travels th...",7.1
30184,0079714,Phantasm,89,"Mike, a young teenage boy who has just lost hi...",6.6
30185,0079712,Perro callejero II,96,"After serving time in prison, ""Perro"", the tit...",7.3
