# Import pandas and extract data from csv files

In [None]:
import pandas as pd
from sqlalchemy import create_engine  

In [None]:
csv_file = "Resources/disney_plus_titles.csv"
disney_df = pd.read_csv(csv_file)
disney_df.head()

In [None]:
csv_file = "Resources/netflix_titles.csv"
netflix_df = pd.read_csv(csv_file)
netflix_df.head()

## Filter out anything that is not a movie

In [None]:
disney_movies_df = disney_df.loc[disney_df['type'] == 'Movie']
disney_movies_df.count()

In [None]:
netflix_movies_df = netflix_df.loc[netflix_df['type'] == 'Movie']
netflix_movies_df.count()

# Prep streaming data table

In [None]:
streaming_data = [[1, 'Netflix'], [2, 'Disney Plus']]
streaming_df = pd.DataFrame(streaming_data, columns = ['streaming_id', 'streaming_service_name'])

In [None]:
streaming_df

# Clean dataframes

In [None]:
#combined dataframe with all movies
combined_movies_df = pd.concat([disney_movies_df, netflix_movies_df], axis=0)
combined_movies_df.count()

In [None]:
combined_movies_df = combined_movies_df.drop_duplicates(subset=['title'])
combined_movies_df

In [None]:
combined_movies_df = combined_movies_df.set_index("show_id")
combined_movies_df.rename(columns={"cast": "cast_name"}, inplace=True)

In [None]:
combined_movies_df

In [None]:
combined_movies_df = combined_movies_df.assign(movie_id=combined_movies_df.reset_index().index + 1)

In [None]:
combined_movies_df

In [None]:
combined_movies_df = combined_movies_df[["movie_id", "title", "director", "cast_name", "country", "date_added", "release_year", "rating", "duration", "listed_in", "description"]]

In [None]:
combined_movies_df

# Build the association table 

In [None]:
# loop through the disney df, compare the title with the title in the combined_movie_df, build a tuple 

joined = []

for i, disney_row in disney_movies_df.iterrows():
    title = disney_row['title']
    for j, movie_row in combined_movies_df.iterrows():
        join = []
        if title == movie_row['title']:
            id = movie_row['movie_id']
            join = [id,2]
            joined.append(join)
            break
        
print(joined)

In [None]:
disney_temp_df = pd.DataFrame(joined, columns = ['movie_id', 'streaming_id'])
disney_temp_df

In [None]:
# loop through the netflix df, compare the title with the title in the combined_movie_df, build a tuple 

joined = []

for i, netflix_row in netflix_movies_df.iterrows():
    title = netflix_row['title']
    for j, movie_row in combined_movies_df.iterrows():
        join = []
        if title == movie_row['title']:
            id = movie_row['movie_id']
            join = [id,1]
            joined.append(join)
            break
        
print(joined)

In [None]:
temp_df = pd.DataFrame(joined, columns = ['movie_id', 'streaming_id'])
temp_df

In [None]:
movie_streaming_df = pd.concat([disney_temp_df, temp_df])

In [None]:
movie_streaming_df

# Connect to local database

In [None]:
## BEFORE RUNNING THIS MAKE SURE YOU HAVE A LOCAL POSTGRES DB CREATED CALLED movies_db
## UPDATE YOUR CONNECTION STRING IF NEEDED 

rds_connection_string = "postgres:bootcamp@localhost:5432/movies_db"
engine = create_engine(f'postgresql://{rds_connection_string}')

In [None]:
engine.table_names()

In [None]:
streaming_df.to_sql(name='streaming', con=engine, if_exists='append', index=False)

In [None]:

combined_movies_df.to_sql(name='movies', con=engine, if_exists='append', index=False)

In [None]:
# THIS DOESN"T WORK YET
movie_streaming_df.to_sql(name='movie_streaming', con=engine, if_exists='append', index=False)