In [1]:
# Import dependencies
import pandas as pd
import numpy as np
import json
import re
from sqlalchemy import create_engine
from pprint import pprint

import warnings
warnings.filterwarnings("ignore")

In [2]:
# !pip install pymysql

# 1. Extract

In [3]:
# Reading the movies_ratings csv file
df_ratings = pd.read_csv(r'Resources/movies_ratings.csv')
df_ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179
2,1,1061,3.0,1260759182
3,1,1129,2.0,1260759185
4,1,1172,4.0,1260759205


In [4]:
# Reading the movies_metadata csv file
df_movies = pd.read_csv(r'Resources\movies_metadata.csv')
df_movies.sample(2)

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
40589,False,,0,"[{'id': 18, 'name': 'Drama'}, {'id': 10402, 'n...",,360389,tt0493076,en,Nina,The story of the late jazz musician and classi...,...,2016-04-22,0.0,90.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,Singer. Activist. Survivor. Legend.,Nina,False,5.7,22.0
28958,False,,0,"[{'id': 16, 'name': 'Animation'}, {'id': 12, '...",http://www.ronalthebarbarian.com/,79433,tt1629374,da,Ronal Barbaren,Ronal is a young barbarian with low self-estee...,...,2011-09-27,0.0,85.0,"[{'iso_639_1': 'da', 'name': 'Dansk'}]",Released,"Babes, balls and muscles in 3D for the whole f...",Ronal the Barbarian,False,6.2,95.0


In [5]:
# Reading the wikipedia-movies json file
with open(r'Resources/wikipedia-movies.json',mode='r') as file:
    movies_wiki_raw = json.load(file)
len(movies_wiki_raw)

7311

# 2. Tranform

In [6]:
df_ratings.shape

(100004, 4)

In [7]:
df_ratings.isnull().sum()

userId       0
movieId      0
rating       0
timestamp    0
dtype: int64

In [8]:
df_ratings['timestamp'] = pd.to_datetime(df_ratings['timestamp'], unit = 's')

In [9]:
df_ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,2009-12-14 02:52:24
1,1,1029,3.0,2009-12-14 02:52:59
2,1,1061,3.0,2009-12-14 02:53:02
3,1,1129,2.0,2009-12-14 02:53:05
4,1,1172,4.0,2009-12-14 02:53:25


In [10]:
df_movies.sample(2)

Unnamed: 0,adult,belongs_to_collection,budget,genres,homepage,id,imdb_id,original_language,original_title,overview,...,release_date,revenue,runtime,spoken_languages,status,tagline,title,video,vote_average,vote_count
42451,False,,0,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",http://kingof.be,390357,tt4818804,en,King of the Belgians,The King of the Belgians is on a state visit i...,...,2016-11-30,0.0,94.0,"[{'iso_639_1': 'bg', 'name': 'български език'}...",Released,,King of the Belgians,False,5.0,12.0
34756,False,,750000,"[{'id': 28, 'name': 'Action'}, {'id': 16, 'nam...",,363890,tt1512222,en,Night of the Living Dead: Darkest Dawn,A group of survivors trapped in a New York apa...,...,2015-10-05,0.0,80.0,"[{'iso_639_1': 'en', 'name': 'English'}]",Released,,Night of the Living Dead: Darkest Dawn,False,5.2,6.0


In [11]:
df_movies.columns

Index(['adult', 'belongs_to_collection', 'budget', 'genres', 'homepage', 'id',
       'imdb_id', 'original_language', 'original_title', 'overview',
       'popularity', 'poster_path', 'production_companies',
       'production_countries', 'release_date', 'revenue', 'runtime',
       'spoken_languages', 'status', 'tagline', 'title', 'video',
       'vote_average', 'vote_count'],
      dtype='object')

In [12]:
df_movies.shape

(45466, 24)

In [13]:
# Checking null values percentage
percent = df_movies.isnull().sum()*100/len(df_movies)
total = df_movies.isnull().sum()
movies_null = pd.DataFrame({'Total':total, 'Percent':percent})
movies_null

Unnamed: 0,Total,Percent
adult,0,0.0
belongs_to_collection,40972,90.115691
budget,0,0.0
genres,0,0.0
homepage,37684,82.883913
id,0,0.0
imdb_id,17,0.037391
original_language,11,0.024194
original_title,0,0.0
overview,954,2.098271


In [14]:
# Keeping columns with null values less than 10% only
drop_list = movies_null[movies_null['Percent']>10].index.tolist()
df_movies.drop(drop_list, axis = 1, inplace = True)
df_movies.columns

Index(['adult', 'budget', 'genres', 'id', 'imdb_id', 'original_language',
       'original_title', 'overview', 'popularity', 'poster_path',
       'production_companies', 'production_countries', 'release_date',
       'revenue', 'runtime', 'spoken_languages', 'status', 'title', 'video',
       'vote_average', 'vote_count'],
      dtype='object')

In [15]:
df_movies.dtypes

adult                    object
budget                   object
genres                   object
id                       object
imdb_id                  object
original_language        object
original_title           object
overview                 object
popularity               object
poster_path              object
production_companies     object
production_countries     object
release_date             object
revenue                 float64
runtime                 float64
spoken_languages         object
status                   object
title                    object
video                    object
vote_average            float64
vote_count              float64
dtype: object

In [16]:
df_movies.adult.value_counts()

False                                                                                                                             45454
True                                                                                                                                  9
 - Written by Ørnås                                                                                                                   1
 Rune Balot goes to a casino connected to the October corporation to try to wrap up her case once and for all.                        1
 Avalanche Sharks tells the story of a bikini contest that turns into a horrifying affair when it is hit by a shark avalanche.        1
Name: adult, dtype: int64

In [17]:
# Keeping rows where adult column is false and dropping adult column
df_movies = df_movies[df_movies.adult == 'False'].drop('adult', axis = 'columns')

In [18]:
df_movies.video.value_counts()

False    45358
True        93
Name: video, dtype: int64

In [19]:
# Keeping rows with video is false
df_movies.video = df_movies.video == 'True'
df_movies.video.value_counts()

False    45454
Name: video, dtype: int64

In [20]:
# Converting columns to numeric types
df_movies.budget = df_movies.budget.astype(int)
df_movies.id = pd.to_numeric(df_movies.id, errors = 'raise')
df_movies.popularity = pd.to_numeric(df_movies.popularity, errors = 'raise')

In [21]:
# Converting column to datetime datatype
df_movies['release_date'] = pd.to_datetime(df_movies['release_date'])

In [22]:
df_movies.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 45454 entries, 0 to 45465
Data columns (total 20 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   budget                45454 non-null  int32         
 1   genres                45454 non-null  object        
 2   id                    45454 non-null  int64         
 3   imdb_id               45437 non-null  object        
 4   original_language     45443 non-null  object        
 5   original_title        45454 non-null  object        
 6   overview              44500 non-null  object        
 7   popularity            45451 non-null  float64       
 8   poster_path           45068 non-null  object        
 9   production_companies  45451 non-null  object        
 10  production_countries  45451 non-null  object        
 11  release_date          45368 non-null  datetime64[ns]
 12  revenue               45451 non-null  float64       
 13  runtime         

In [23]:
movies_wiki_raw[0]

{'url': 'https://en.wikipedia.org/wiki/The_Adventures_of_Ford_Fairlane',
 'year': 1990,
 'imdb_link': 'https://www.imdb.com/title/tt0098987/',
 'title': 'The Adventures of Ford Fairlane',
 'Directed by': 'Renny Harlin',
 'Produced by': ['Steve Perry', 'Joel Silver'],
 'Screenplay by': ['David Arnott', 'James Cappe', 'Daniel Waters'],
 'Story by': ['David Arnott', 'James Cappe'],
 'Based on': ['Characters', 'by Rex Weiner'],
 'Starring': ['Andrew Dice Clay',
  'Wayne Newton',
  'Priscilla Presley',
  'Lauren Holly',
  'Morris Day',
  'Robert Englund',
  "Ed O'Neill"],
 'Narrated by': 'Andrew "Dice" Clay',
 'Music by': ['Cliff Eidelman', 'Yello'],
 'Cinematography': 'Oliver Wood',
 'Edited by': 'Michael Tronick',
 'Productioncompany ': 'Silver Pictures',
 'Distributed by': '20th Century Fox',
 'Release date': ['July 11, 1990', '(', '1990-07-11', ')'],
 'Running time': '102 minutes',
 'Country': 'United States',
 'Language': 'English',
 'Budget': '$20 million',
 'Box office': '$21.4 milli

In [24]:

def clean_movie(movie):
    """This function will take the each movie will combine alternate titles and change column names."""
    movie = dict(movie)
    alt_titles = {}
#     Combine alternate titles into one list alt_titles    
    for key in ['Also known as','Arabic','Cantonese','Chinese','French',
                    'Hangul','Hebrew','Hepburn','Japanese','Literally',
                    'Mandarin','McCune-Reischauer','Original title','Polish',
                    'Revised Romanization','Romanized','Russian',
                    'Simplified','Traditional','Yiddish']:
        if key in movie:
            alt_titles[key] = movie[key]
            movie.pop(key)
    if len(alt_titles) > 0:
        movie['alt_titles'] = alt_titles

    def change_column_name(old_name, new_name):
        """This function will replace the values of old name in new name and pop the old name from list"""
        if old_name in movie:
            movie[new_name] = movie.pop(old_name)

    change_column_name('Adaptation by', 'Writer(s)')
    change_column_name('Country of origin', 'Country')
    change_column_name('Directed by', 'Director')
    change_column_name('Distributed by', 'Distributor')
    change_column_name('Edited by', 'Editor(s)')
    change_column_name('Length', 'Running time')
    change_column_name('Original release', 'Release date')
    change_column_name('Music by', 'Composer(s)')
    change_column_name('Produced by', 'Producer(s)')
    change_column_name('Producer', 'Producer(s)')
    change_column_name('Productioncompanies ', 'Production company(s)')
    change_column_name('Productioncompany ', 'Production company(s)')
    change_column_name('Released', 'Release Date')
    change_column_name('Release Date', 'Release date')
    change_column_name('Screen story by', 'Writer(s)')
    change_column_name('Screenplay by', 'Writer(s)')
    change_column_name('Story by', 'Writer(s)')
    change_column_name('Theme music composer', 'Composer(s)')
    change_column_name('Written by', 'Writer(s)')

    return movie

In [25]:

def clean_movies_json(movies_wiki_json):
    # Using list comprehension to filter out Tv shows from Movies
    movies_wiki = [movie for movie in movies_wiki_raw
                   if ('Director' in movie or 'Directed by') in movie 
                   and 'imdb_link' in movie   
                   and 'No. of episodes' not in movie]
    
    # Using a list comprehension to clean each movie with function clean_movie
    clean_movies = [clean_movie(movie) for movie in movies_wiki]
    
    movies_wiki_df = pd.DataFrame(clean_movies)
    
    # Using regexp to extract id from link and drop duplicate rows from id
    movies_wiki_df['imdb_id'] = movies_wiki_df['imdb_link'].str.extract(r'(tt\d{7})')
    movies_wiki_df.drop_duplicates('imdb_id', inplace = True)
    
    # Using list comprehension to keep only columns with null values less than 10%
    wiki_columns_keeping = [column for column in movies_wiki_df.columns 
                            if movies_wiki_df[column].isnull().sum() < len(movies_wiki_df) * 0.9]
    movies_wiki_df = movies_wiki_df[wiki_columns_keeping]
    
    box_office = movies_wiki_df['Box office'].dropna()
    
    # Convert box_office to string values using lambda function
    box_office = box_office.apply(lambda x: ' '.join(x) if type(x) ==  list else x)
    
    # Using regular expressions to match two types of patterns in box office data
    type_one = r'\$\d+\.?\d*\s*[mb]illion'
    type_two = r'\$\d{1,3}(?:,\d{3})+'
    
    def parse_dollars(s):
        """ This function will parse the box_office column using regular expressions."""
        if type(s) != str:
            return np.nan
        # Matches only the input of type $###.# million
        if re.match(r'\$\s*\d+\.?\d*\s*milli?on', s, flags=re.IGNORECASE):
            s = re.sub('\$|\s|[a-zA-Z]', '', s)  # Removes the $ sign and ' million'
            value = float(s) * 10 ** 6  # Convert string to float and multiply by a million
            return value
        # Matches only the input of type $###.# billion
        elif re.match(r'\$\s*\d+\.?\d*\s*billi?on', s, flags=re.IGNORECASE):
            s = re.sub('\$|\s|[a-zA-Z]', '', s)  # Removes the $ sign and ' billion'
            value = float(s) * 10 ** 9  # Convert string to float and multiply by a billion
            return value
        # Matches only the input of type $###.# billion
        elif re.match(r'\$\s*\d{1,3}(?:[,\.]\d{3})+(?!\s[mb]illion)', s, flags=re.IGNORECASE):
            s = re.sub('\$|,', '', s)  # Removes $ dollar sign and commas
            value = float(s)  # Convert string to float
            return value
        else:
            return np.nan
    # Cleaning the box_office column using the parse_dollar function
    movies_wiki_df['box_office'] = box_office.str.extract(f'({type_one}|{type_two})', flags=re.IGNORECASE)[0].apply(parse_dollars)
    movies_wiki_df.drop("Box office", axis=1, inplace = True)
    
    # Cleaning the release_date column using regular expressions
    release_date = movies_wiki_df['Release date'].dropna()
    release_date = release_date.apply(lambda x: ' '.join(x) if type(x) == list else x)
    date_type_one = r'(?:January|February|March|April|May|June|July|August|September|October|November|December)\s[123]\d,\s\d{4}'
    date_type_two = r'\d{4}.[01]\d.[123]\d'
    date_type_three = r'(?:January|February|March|April|May|June|July|August|September|October|November|December)\s\d{4}'
    date_type_four = r'\d{4}'
    movies_wiki_df['release_date'] = pd.to_datetime(release_date.str.extract(f'({date_type_one}|{date_type_two}|{date_type_three}|{date_type_four})')[0], infer_datetime_format=True)
    movies_wiki_df.drop('Release date', axis=1, inplace= True)
    
    # Cleaning the budget column using regular expressions and parse_dollars funtion with patterns used in box_office cleaning
    budget = movies_wiki_df['Budget'].dropna()
    budget = budget.apply(lambda x: ' '.join(x) if type(x) == list else x)
    budget = budget.str.replace(r'\$.*[-—–](?![a-z])', '$', regex=True)
    movies_wiki_df['budget'] = budget.str.extract(f'({type_one}|{type_two})', flags=re.IGNORECASE)[0].apply(parse_dollars)
    movies_wiki_df.drop('Budget', axis=1, inplace=True)
    
    # Cleaning running time column using regular expresssions
    running_time = movies_wiki_df['Running time'].dropna()
    running_time = running_time.apply(lambda x: ' '.join(x) if type(x) == list else x)
    running_time_extract = running_time.str.extract(r'(\d+)\s*ho?u?r?s?\s*(\d*)|(\d+)|s*m')
    running_time_extract = running_time_extract.apply(lambda x: pd.to_numeric(x, errors='coerce')).fillna(0)
    movies_wiki_df['running_time'] = running_time_extract.apply(lambda x: x[0]*60 + x[1] if x[2] == 0 else x[2], axis=1)
    movies_wiki_df.drop('Running time', axis=1, inplace = True)
    
    return movies_wiki_df

In [26]:
# Using the above function to clean the movies json file 
movies_wiki_df = clean_movies_json(movies_wiki_raw)

In [27]:
movies_wiki_df.columns

Index(['url', 'year', 'imdb_link', 'title', 'Based on', 'Starring',
       'Cinematography', 'Country', 'Language', 'Director', 'Distributor',
       'Editor(s)', 'Composer(s)', 'Producer(s)', 'Production company(s)',
       'Writer(s)', 'imdb_id', 'box_office', 'release_date', 'budget',
       'running_time'],
      dtype='object')

In [28]:
# Merging the dataframes on imdb_id
df_movies = pd.merge(df_movies, movies_wiki_df, on='imdb_id', suffixes=['_wiki','_movies.met'])
df_movies.head()

Unnamed: 0,budget_wiki,genres,id,imdb_id,original_language,original_title,overview,popularity,poster_path,production_companies,...,Distributor,Editor(s),Composer(s),Producer(s),Production company(s),Writer(s),box_office,release_date_movies.met,budget_movies.met,running_time
0,30000000,"[{'id': 16, 'name': 'Animation'}, {'id': 35, '...",862,tt0114709,en,Toy Story,"Led by Woody, Andy's toys live happily in his ...",21.946943,/rhIRbceoE9lR4veEXuwCC2wARtG.jpg,"[{'name': 'Pixar Animation Studios', 'id': 3}]",...,Buena Vista Pictures Distribution,"[Robert Gordon, Lee Unkrich]",Randy Newman,"[Ralph Guggenheim, Bonnie Arnold]","[Walt Disney Pictures, Pixar Animation Studios]","[John Lasseter, Pete Docter, Andrew Stanton, J...",373600000.0,1995-11-19,30000000.0,81.0
1,65000000,"[{'id': 12, 'name': 'Adventure'}, {'id': 14, '...",8844,tt0113497,en,Jumanji,When siblings Judy and Peter discover an encha...,17.015539,/vzmL6fP7aPKNKPRTFnZmiUfciyV.jpg,"[{'name': 'TriStar Pictures', 'id': 559}, {'na...",...,TriStar Pictures,Robert Dalva,James Horner,"[Scott Kroopf, William Teitler]","[Interscope Communications, Teitler Film]","[Greg Taylor, Jim Strain, Chris Van Allsburg]",262800000.0,1995-12-15,65000000.0,104.0
2,0,"[{'id': 10749, 'name': 'Romance'}, {'id': 35, ...",15602,tt0113228,en,Grumpier Old Men,A family wedding reignites the ancient feud be...,11.7129,/6ksm1sjKMFLbO7UY2i6G1ju9SML.jpg,"[{'name': 'Warner Bros.', 'id': 6194}, {'name'...",...,Warner Bros. Pictures,"[Billy Weber, Seth Flaum, Maryann Brandon]",Alan Silvestri,"[John Davis, Richard C. Berman]",,Mark Steven Johnson,71500000.0,1995-12-22,25000000.0,101.0
3,16000000,"[{'id': 35, 'name': 'Comedy'}, {'id': 18, 'nam...",31357,tt0114885,en,Waiting to Exhale,"Cheated on, mistreated and stepped on, the wom...",3.859495,/16XOMpEaLWkrcPqSQqhTmeJuqQl.jpg,[{'name': 'Twentieth Century Fox Film Corporat...,...,20th Century Fox,Richard Chew,"Kenneth ""Babyface"" Edmonds","[Terry McMillan, Ronald Bass, Deborah Schindle...",,"[Terry McMillan, Ronald Bass]",82000000.0,1995-12-22,16000000.0,124.0
4,0,"[{'id': 35, 'name': 'Comedy'}]",11862,tt0113041,en,Father of the Bride Part II,Just when George Banks has recovered from his ...,8.387519,/e64sOI48hQXyru7naBFyssKFxVd.jpg,"[{'name': 'Sandollar Productions', 'id': 5842}...",...,Buena Vista Pictures,"[Adam Bernardi, Stephen A. Rotter]",Alan Silvestri,Nancy Meyers,Touchstone Pictures,"[Nancy Meyers, Charles Shyer]",76600000.0,1995-01-01,30000000.0,106.0


In [29]:
df_movies = df_movies.drop(df_movies[(df_movies['release_date_wiki'] > '1996-01-01') & (df_movies['release_date_movies.met'] < '1965-01-01')].index)

In [30]:
df_movies['Language'].apply(lambda x: tuple(x) if type(x) == list else x).value_counts(dropna=False)

English                              5480
NaN                                   134
(English, Spanish)                     68
(English, French)                      35
(English, Japanese)                    25
                                     ... 
(Bengali, English)                      1
English, Russian                        1
(English, German, Hebrew)               1
(English, Arabic, Italian, Latin)       1
(English, Mandarin, Russian)            1
Name: Language, Length: 198, dtype: int64

In [31]:
df_movies.drop(['title_wiki','release_date_wiki','Language','Production company(s)'], inplace=True, axis = 1)

In [32]:
def merge_common_data(df, movies_met_col, wiki_col):
    """This function merges the data from two common columns
    and drops the redundant column."""
    df[movies_met_col] = df.apply(lambda x: x[wiki_col] if x[movies_met_col] == 0 else x[movies_met_col], axis=1)
    df.drop(wiki_col, inplace=True, axis = 1)

In [33]:
merge_common_data(df_movies, 'runtime', 'running_time')
merge_common_data(df_movies, 'budget_movies.met', 'budget_wiki')
merge_common_data(df_movies, 'revenue', 'box_office')

In [34]:
df_movies.status.value_counts()

Released           6024
Rumored              12
Post Production      10
In Production         2
Name: status, dtype: int64

In [35]:
df_movies = df_movies.loc[:, ['id', 'imdb_id', 'original_language', 'original_title',
       'overview', 'popularity', 'production_companies',
       'production_countries', 'revenue', 'runtime', 'spoken_languages',
        'vote_average', 'vote_count', 'url', 'year',
       'imdb_link', 'title_movies.met', 'Based on', 'Starring',
       'Cinematography', 'Country', 'Director', 'Distributor', 'Editor(s)',
       'Composer(s)', 'Producer(s)', 'Writer(s)', 'release_date_movies.met',
       'budget_movies.met']]

In [36]:
df_movies.rename({'title_movies.met':'title',
                  'id':'movie_id',
                  'url':'wikipedia_url',
                  'budget_movies.met':'budget',
                  'release_date_movies.met':'release_date'},
                   axis=1, inplace=True)

In [37]:
# Converting lists into strings
for col in df_movies.columns:
    df_movies[col] = df_movies[col].apply(lambda x: ','.join(x) if type(x) ==  list else x)

In [38]:
# Using regex to extract relevant information and convert it to string
for col in ['production_companies','production_countries','spoken_languages']:
    df_movies[col] = df_movies[col].str.findall(r'name\'\:\s\'([a-zA-z() ]+)')
    df_movies[col] = df_movies[col].apply(lambda x: ','.join(x) if type(x) ==  list else x)

In [39]:
rating_counts = (df_ratings.groupby(['movieId','rating'], as_index=False).count() 
                .rename({'userId':'count'}, axis=1) 
                .pivot(index='movieId',columns='rating', values='count'))
rating_counts.columns = ['rating_' + str(col) for col in rating_counts.columns] 

In [40]:
df_movies = pd.merge(df_movies, rating_counts, left_on='movie_id', right_index=True, how='left')
df_movies[rating_counts.columns] = df_movies[rating_counts.columns].fillna(0)

In [43]:
df_movies.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6052 entries, 0 to 6051
Data columns (total 39 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   movie_id              6052 non-null   int64         
 1   imdb_id               6052 non-null   object        
 2   original_language     6052 non-null   object        
 3   original_title        6052 non-null   object        
 4   overview              6047 non-null   object        
 5   popularity            6052 non-null   float64       
 6   production_companies  6052 non-null   object        
 7   production_countries  6052 non-null   object        
 8   revenue               5160 non-null   float64       
 9   runtime               6051 non-null   float64       
 10  spoken_languages      6052 non-null   object        
 11  vote_average          6052 non-null   float64       
 12  vote_count            6052 non-null   float64       
 13  wikipedia_url     

# 3. Load

In [54]:
# Create a connection to mysql database
engine = create_engine('mysql+pymysql://root:password@127.0.0.1:3306/movies_db')

In [60]:
# Insert the movies dataframe into sql database
df_movies.to_sql('table = movies_data', con = engine, if_exists='replace', index=False)

In [70]:
# Query to count the number of columns in table
df = pd.read_sql("SELECT count(column_name) as column_count from information_schema.columns where table_name = 'movies_data';" ,engine)
df

Unnamed: 0,column_count
0,39


In [73]:
# Query to count rows in the table
pd.read_sql("SELECT COUNT(*) AS row_count FROM movies_data;", engine)

Unnamed: 0,row_count
0,6052
