In [49]:
#necessary libraries and modules to import

import requests
import json
import sqlite3
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from glob import glob

In [50]:
#importing batch data using os

csv_files = glob("./zippedData/*.csv.gz")
csv_files

['./zippedData\\bom.movie_gross.csv.gz',
 './zippedData\\imdb.name.basics.csv.gz',
 './zippedData\\imdb.title.akas.csv.gz',
 './zippedData\\imdb.title.basics.csv.gz',
 './zippedData\\imdb.title.crew.csv.gz',
 './zippedData\\imdb.title.principals.csv.gz',
 './zippedData\\rt.movie_info.csv.gz',
 './zippedData\\rt.reviews.csv.gz',
 './zippedData\\tmdb.movies.csv.gz',
 './zippedData\\tn.movie_budgets.csv.gz']

In [51]:
# cleaning the filenames

csv_files_dict = {} #create a dictionary of datasets
for filename in csv_files: #create a for loop to batch clean files
    filename_cleaned = os.path.basename(filename).replace(".csv", "").replace(".", "_") #remove .csv file extensions
    filename_df = pd.read_csv(filename, index_col=0, encoding='utf-8') 
    csv_files_dict[filename_cleaned] = filename_df #load .csv file as dataframe using col 1 as index and encode in utf-8 and save as the cleaned filename

In [52]:
print(csv_files_dict.keys())

dict_keys(['bom_movie_gross_gz', 'imdb_name_basics_gz', 'imdb_title_akas_gz', 'imdb_title_basics_gz', 'imdb_title_crew_gz', 'imdb_title_principals_gz', 'rt_movie_info_gz', 'rt_reviews_gz', 'tmdb_movies_gz', 'tn_movie_budgets_gz'])


In [53]:
#loading datasets into dataframes from csv & json files

bom_movie_gross_df = csv_files_dict['bom_movie_gross_gz']
imdb_name_basics_df = csv_files_dict['imdb_name_basics_gz']
imdb_title_akas_df = csv_files_dict['imdb_title_akas_gz']
imdb_title_basics_df = csv_files_dict['imdb_title_basics_gz']
imdb_title_crew_df = csv_files_dict['imdb_title_crew_gz']
imdb_title_principals_df = csv_files_dict['imdb_title_principals_gz']
rotten_movie_info_df = csv_files_dict['rt_movie_info_gz']
rotten_movie_reviews_df = csv_files_dict['rt_reviews_gz']
tmdb_movies_df = csv_files_dict['tmdb_movies_gz']
tmdb_genre_ids_df = pd.read_json('tmdb_genre_ids.json')
tmdb_languages_df = pd.read_json('tmdb_languages.json')
tn_movie_budgets_df = csv_files_dict['tn_movie_budgets_gz']

In [55]:
tmdb_movies_df

Unnamed: 0,genre_ids,id,original_language,original_title,popularity,release_date,title,vote_average,vote_count
0,"[12, 14, 10751]",12444,en,Harry Potter and the Deathly Hallows: Part 1,33.533,2010-11-19,Harry Potter and the Deathly Hallows: Part 1,7.7,10788
1,"[14, 12, 16, 10751]",10191,en,How to Train Your Dragon,28.734,2010-03-26,How to Train Your Dragon,7.7,7610
2,"[12, 28, 878]",10138,en,Iron Man 2,28.515,2010-05-07,Iron Man 2,6.8,12368
3,"[16, 35, 10751]",862,en,Toy Story,28.005,1995-11-22,Toy Story,7.9,10174
4,"[28, 878, 12]",27205,en,Inception,27.920,2010-07-16,Inception,8.3,22186
...,...,...,...,...,...,...,...,...,...
26512,"[27, 18]",488143,en,Laboratory Conditions,0.600,2018-10-13,Laboratory Conditions,0.0,1
26513,"[18, 53]",485975,en,_EXHIBIT_84xxx_,0.600,2018-05-01,_EXHIBIT_84xxx_,0.0,1
26514,"[14, 28, 12]",381231,en,The Last One,0.600,2018-10-01,The Last One,0.0,1
26515,"[10751, 12, 28]",366854,en,Trailer Made,0.600,2018-06-22,Trailer Made,0.0,1


In [56]:
tmdb_genre_ids_df

Unnamed: 0,genres
0,"{'id': 28, 'name': 'Action'}"
1,"{'id': 12, 'name': 'Adventure'}"
2,"{'id': 16, 'name': 'Animation'}"
3,"{'id': 35, 'name': 'Comedy'}"
4,"{'id': 80, 'name': 'Crime'}"
5,"{'id': 99, 'name': 'Documentary'}"
6,"{'id': 18, 'name': 'Drama'}"
7,"{'id': 10751, 'name': 'Family'}"
8,"{'id': 14, 'name': 'Fantasy'}"
9,"{'id': 36, 'name': 'History'}"


In [54]:
tmdb_languages_df

Unnamed: 0,iso_639_1,english_name,name
0,xx,No Language,No Language
1,aa,Afar,
2,af,Afrikaans,Afrikaans
3,ak,Akan,
4,an,Aragonese,
...,...,...,...
182,bn,Bengali,বাংলা
183,bs,Bosnian,Bosanski
184,ch,Chamorro,Finu' Chamorro
185,be,Belarusian,беларуская мова


In [12]:
print(os.getcwd())

C:\Users\jtang\Flatiron-Git\mod1\mod1_project\movie_industry_insights


In [58]:
#setup for API key from config.py

from config import joe_key_tmdb

In [60]:
#request api as json and set filename

tmdb_languages_json = requests.get('https://api.themoviedb.org/3/configuration/languages?api_key={}'.format(joe_key_tmdb)).json()

In [61]:
tmdb_languages_json

[{'iso_639_1': 'xx', 'english_name': 'No Language', 'name': 'No Language'},
 {'iso_639_1': 'aa', 'english_name': 'Afar', 'name': ''},
 {'iso_639_1': 'af', 'english_name': 'Afrikaans', 'name': 'Afrikaans'},
 {'iso_639_1': 'ak', 'english_name': 'Akan', 'name': ''},
 {'iso_639_1': 'an', 'english_name': 'Aragonese', 'name': ''},
 {'iso_639_1': 'as', 'english_name': 'Assamese', 'name': ''},
 {'iso_639_1': 'av', 'english_name': 'Avaric', 'name': ''},
 {'iso_639_1': 'ae', 'english_name': 'Avestan', 'name': ''},
 {'iso_639_1': 'ay', 'english_name': 'Aymara', 'name': ''},
 {'iso_639_1': 'az', 'english_name': 'Azerbaijani', 'name': 'Azərbaycan'},
 {'iso_639_1': 'ba', 'english_name': 'Bashkir', 'name': ''},
 {'iso_639_1': 'bm', 'english_name': 'Bambara', 'name': 'Bamanankan'},
 {'iso_639_1': 'bi', 'english_name': 'Bislama', 'name': ''},
 {'iso_639_1': 'bo', 'english_name': 'Tibetan', 'name': ''},
 {'iso_639_1': 'br', 'english_name': 'Breton', 'name': ''},
 {'iso_639_1': 'ca', 'english_name': 'Cat

#save json file to directory

with open('tmdb_languages.json', 'w') as f:
    json.dump(tmdb_languages_json, f)

print(type(tmdb_languages_json))
print(tmdb_languages_json.keys())

tmdb_languages_df = pd.read_json(r'C:\Users\jtang\Flatiron-Git\mod1\mod1_project\movie_industry_insights\tmdb_languagesqqq.json')