In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import os
import csv
from sqlalchemy import create_engine
import cpi
import mysql_conn



In [2]:
# read and create dataframe
movie_load = 'movie_data.csv'
movie_df = pd.read_csv(movie_load, encoding= 'utf8')
movie_df.columns

Index(['color', 'director_name', 'num_critic_for_reviews', 'duration',
       'director_facebook_likes', 'actor_3_facebook_likes', 'actor_2_name',
       'actor_1_facebook_likes', 'gross', 'genres', 'actor_1_name',
       'movie_title', 'num_voted_users', 'cast_total_facebook_likes',
       'actor_3_name', 'facenumber_in_poster', 'plot_keywords',
       'movie_imdb_link', 'num_user_for_reviews', 'language', 'country',
       'content_rating', 'budget', 'title_year', 'actor_2_facebook_likes',
       'imdb_score', 'aspect_ratio', 'movie_facebook_likes'],
      dtype='object')

In [3]:
#rename columns and repositions
movie_df = movie_df.rename(columns={'director_name': 'Director','gross': 'Gross_Income',
                                    'movie_title': 'Movie_Title','content_rating': 'Content_Rating', 
                                    'budget': 'Budget', 'title_year': 'Year','imdb_score': 'IMDB_Score'})

In [4]:
#Formating Floats to Intergers
movie_df = movie_df.dropna()
movie_df.reset_index(drop=True, inplace=True)
format_mapping= {'Gross_Income', 'Budget','Year'}
for key in format_mapping:
    movie_df[key] = movie_df[key].astype(int)

In [5]:
#adjusting for inflation
movie_df['Gross_Income_ADJ'] = movie_df.apply(lambda x: cpi.inflate(x.Gross_Income, x.Year), axis=1)
movie_df['Budget_ADJ'] = movie_df.apply(lambda x: cpi.inflate(x.Budget, x.Year), axis=1)

In [6]:
movie_df = movie_df[['Movie_Title','Year', 'Content_Rating','Budget_ADJ',
                             'Gross_Income_ADJ', 'Director','IMDB_Score']]
movie_df.head()

Unnamed: 0,Movie_Title,Year,Content_Rating,Budget_ADJ,Gross_Income_ADJ,Director,IMDB_Score
0,10 Cloverfield Lane,2016,PG-13,15319550.0,73428880.0,Dan Trachtenberg,7.3
1,10 Things I Hate About You,1999,PG-13,23540940.0,56168830.0,Gil Junger,7.2
2,102 Dalmatians,2000,G,120994200.0,95288700.0,Kevin Lima,4.8
3,10th & Wolf,2006,R,9726984.0,65026.1,Robert Moresco,6.4
4,12 Rounds,2009,PG-13,25136180.0,13976780.0,Renny Harlin,5.6


In [7]:
#non-formated csv
movie_df.to_csv("Analysis.csv")
movie_df.columns

Index(['Movie_Title', 'Year', 'Content_Rating', 'Budget_ADJ',
       'Gross_Income_ADJ', 'Director', 'IMDB_Score'],
      dtype='object')

In [8]:
#Convert to $
new_movie_df = movie_df  
format_mapping= {'Gross_Income_ADJ':'${:,.2f}','Budget_ADJ': '${:,.2f}','Year': '{:.0f}'}
for key, value in format_mapping.items():
    new_movie_df[key] = movie_df[key].apply(value.format)

In [9]:
new_movie_df.to_csv("Clean_Data.csv")

In [10]:
#OSCAR DATAFRAME

In [11]:
# read and create dataframe
oscar_load = 'oscar.csv'
oscar_df = pd.read_csv(oscar_load, encoding= 'utf8')
oscar_df.columns

Index(['Year', 'Ceremony', 'Award', 'Winner', 'Name', 'Film', 'Award_Count'], dtype='object')

In [12]:
oscar_df = oscar_df[['Ceremony','Film', 'Award_Count']]
oscar_df.head()

Unnamed: 0,Ceremony,Film,Award_Count
0,1,The Noose,0
1,1,The Last Command,1
2,1,A Ship Comes In,0
3,1,7th Heaven,3
4,1,Sadie Thompson,0


In [13]:
oscar_df.to_csv("Oscar_Mod.csv")

In [14]:
#SQL CONNECTION

In [16]:
connection_string = (
   f"root:{mysql_conn.password}@localhost/movie_search_db")
engine = create_engine(f'mysql://{connection_string}')


ModuleNotFoundError: No module named 'MySQLdb'

In [None]:
engine.table_names()

In [17]:
import sys
sys.executable

'C:\\Users\\pablo\\Anaconda3\\python.exe'

In [None]:
movies_df.to_sql(
   name='movies', con=engine,
   if_exists='append', index=True)

In [None]:
oscar_df.to_sql(
   name='oscar', con=engine,
   if_exists='append', index=True)