## Import Heroku PostgreSQL database into Pandas.
### proj2-team02: TeamMovies
### Project #2
### September 2020

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np

plt.rcParams['figure.figsize'] = (14.0, 8.0)

# Reflect Tables into SQLAlchemy ORM

In [2]:
# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func, inspect, MetaData, Table, select

# Initialize the Base object using the automap_base in order to refelect the database.
from sqlalchemy.ext.automap import automap_base
Base = automap_base()
metadata = MetaData()

### Testing the Heroku Postgres

In [4]:
# Create an engine that can talk to the database
DATABASE_URL ='postgres://iojqcykrasthlf:46c64333c5e836c1eb50be341266b79b3fc712a205d240de628698d27bf1eeea@ec2-3-226-231-4.compute-1.amazonaws.com:5432/df9m7ufmbt05jk'
engine = sqlalchemy.create_engine(DATABASE_URL)

## Explore Database

In [5]:
# Use the Inspector to explore the Herohu database and print the table names
inspector = inspect(engine)
inspector.get_table_names()

['country_origin',
 'movie_country_junction',
 'production_company',
 'movie',
 'genre',
 'movie_genre_junction',
 'language',
 'movie_language_junction',
 'job_title',
 'movie_person_title_junction',
 'person']

In [6]:
# Print the names of tables in the Heroku PostgreSQL database:
print(engine.table_names())

['country_origin', 'movie_country_junction', 'production_company', 'movie', 'genre', 'movie_genre_junction', 'language', 'movie_language_junction', 'job_title', 'movie_person_title_junction', 'person']


In [7]:
# Use Inspector to print the column names and types for the movie table.
columns = inspector.get_columns('movie')
for col in columns:
    print(col['name'], col["type"])

movie_id VARCHAR(100)
movie_title TEXT
year_published INTEGER
movie_duration INTEGER
description TEXT
votes_avg DOUBLE PRECISION
votes_count INTEGER
budget VARCHAR(50)
usa_gross_income VARCHAR(50)
worlwide_gross_income VARCHAR(50)
company_id VARCHAR(100)


In [8]:
# Use Inspector to print the column names and types for the movie_genre_junction table.
columns = inspector.get_columns('movie_genre_junction')
for col in columns:
    print(col['name'], col["type"])

movie_id VARCHAR(100)
genre_id VARCHAR(100)
unique_id VARCHAR(100)


In [9]:
# Use Inspector to print the column names and types for the genre table.
columns = inspector.get_columns('genre')
for col in columns:
    print(col['name'], col["type"])

genre_id VARCHAR(100)
genre_name VARCHAR(255)


## Refection of Heroku database

In [10]:
# Use the prepare method on the Base object create a refection of the entire database.
Base.prepare(engine, reflect=True)

In [11]:
#The relection created ORM objects of tables in the database, and accessible under the class property of the automap Base.
Base.classes.keys()

['country_origin',
 'movie_country_junction',
 'production_company',
 'movie',
 'genre',
 'movie_genre_junction',
 'language',
 'movie_language_junction',
 'job_title',
 'movie_person_title_junction',
 'person']

In [12]:
# Let's create reference to the ORM objects.
Country_origin = Base.classes.country_origin
Movie_country_junction = Base.classes.movie_country_junction
Production_company = Base.classes.production_company
Movie = Base.classes.movie
Genre = Base.classes.genre
Movie_genre_junction = Base.classes.movie_genre_junction
Language = Base.classes.language
Movie_language_junction = Base.classes.movie_language_junction
Job_title = Base.classes.job_title
Movie_person_title_junction = Base.classes.movie_person_title_junction
Person = Base.classes.person

In [13]:
# Create our session link for Python to interact with the database.
session = Session(engine)

In [14]:
# Let's create a function that will take a ORM query statement and create a Pandas DataFrame!
def query_to_dataframe(query_stmt, my_database = engine):
    query_df = pd.read_sql_query(query_stmt, my_database)
    return query_df

### Let's reflect the movie_genre_junction table separately

In [15]:
# Create the Movie_genre_junction DataFrame
mov_gen_query_stmt = session.query(Movie_genre_junction).statement
mov_gen_df = pd.read_sql_query(mov_gen_query_stmt, session.bind)
mov_gen_df  #.head()

Unnamed: 0,movie_id,genre_id,unique_id
0,tt0000574,0,tt0000574_0
1,tt0000574,1,tt0000574_1
2,tt0000574,2,tt0000574_2
3,tt0001892,2,tt0001892_2
4,tt0002101,2,tt0002101_2
...,...,...,...
166216,tt9905412,2,tt9905412_2
166217,tt9905462,2,tt9905462_2
166218,tt9911774,2,tt9911774_2
166219,tt9914286,2,tt9914286_2


In [18]:
# Create the Genre DataFrame
genre_query_stmt = session.query(Movie).statement
genre_df = pd.read_sql_query(genre_query_stmt, session.bind)
genre_df.head(3)

Unnamed: 0,movie_id,movie_title,year_published,movie_duration,description,votes_avg,votes_count,budget,usa_gross_income,worlwide_gross_income,company_id
0,tt0000574,The Story of the Kelly Gang,1906,70,True story of notorious Australian outlaw Ned ...,6.1,537,$ 2250,,,0
1,tt0001892,Den sorte drøm,1911,53,Two men of high rank are both wooing the beaut...,5.9,171,,,,1
2,tt0002101,Cleopatra,1912,100,The fabled queen of Egypt's affair with Roman ...,5.2,420,$ 45000,,,2


In [19]:
 current_movie_info = session.query(Movie.movie_id,Movie.movie_title, Movie.year_published,\
                               Movie.movie_duration, Movie.budget, Movie.usa_gross_income,\
                               Movie.worlwide_gross_income,\
                               Country_origin.country_name, Country_origin.lat, Country_origin.long, Movie.votes_avg,\
                               Genre.genre_name,\
                               Production_company.company_name).filter(\
                               Movie.movie_id == Movie_country_junction.movie_id).filter(\
                               Movie.company_id == Production_company.company_id).filter(\
                               Movie.movie_id == Movie_genre_junction.movie_id).filter(\
                               Movie_genre_junction.genre_id == Genre.genre_id).filter(\
                               Movie_country_junction.country_id == Country_origin.country_id).statement
current_movie_info_df = pd.read_sql_query(current_movie_info, session.bind)
current_movie_info_df #.head()

Unnamed: 0,movie_id,movie_title,year_published,movie_duration,budget,usa_gross_income,worlwide_gross_income,country_name,lat,long,votes_avg,genre_name,company_name
0,tt0000574,The Story of the Kelly Gang,1906,70,$ 2250,,,Australia,-25.274398,133.775136,6.1,Biography,J. and N. Tait
1,tt0000574,The Story of the Kelly Gang,1906,70,$ 2250,,,Australia,-25.274398,133.775136,6.1,Crime,J. and N. Tait
2,tt0000574,The Story of the Kelly Gang,1906,70,$ 2250,,,Australia,-25.274398,133.775136,6.1,Drama,J. and N. Tait
3,tt0001892,Den sorte drøm,1911,53,,,,Germany,51.165691,10.451526,5.9,Drama,Fotorama
4,tt0001892,Den sorte drøm,1911,53,,,,Denmark,56.263920,9.501785,5.9,Drama,Fotorama
...,...,...,...,...,...,...,...,...,...,...,...,...,...
212720,tt9905412,Ottam,2019,120,INR 4000000,,$ 4791,India,20.593684,78.962880,7.8,Drama,Thomas Thiruvalla Films
212721,tt9905462,Pengalila,2019,111,INR 10000000,,,India,20.593684,78.962880,8.4,Drama,Benzy Productions
212722,tt9911774,Padmavyuhathile Abhimanyu,2019,130,,,,India,20.593684,78.962880,8.4,Drama,RMCC Productions
212723,tt9914286,Sokagin Çocuklari,2019,98,,,$ 2833,Turkey,38.963745,35.243322,7.2,Drama,Gizem Ajans


In [None]:
# Use a PostgreSQL query to get content of the movie table
movie_genre_junction_query = '''
SELECT * FROM public.movie_genre_junction
'''
# Create DataFrame from PostgreSQL query result.
movie_genre_junction_df = query_to_dataframe(movie_genre_junction_query)
# Display the DataFrame of the query results.
movie_genre_junction_df.head()

In [None]:
connection.close()