In [None]:
# Import dependencies for pandas (read in the CSV) and create engine from sqlalchemy to set up our database
import pandas as pd
from sqlalchemy import create_engine
import codecs

In [None]:
# Import csv using the pandas read_csv function, display dataframe head to get a quick look at the data
episodes_file = "episodes.csv"
episodes_df = pd.read_csv(episodes_file)
episodes_df.head()

In [None]:
# Create a filtered dataframe from specific columns 
# Here we are only really interested the eventual primary key (podcast_uuid) and sortable, descriptive columns 
# Filtering out extraneous fields
episodes_cols = ["title", "description", "podcast_uuid"]
episodes_transformed = episodes_df[episodes_cols].copy()

# Rename the column headers
episodes_transformed = episodes_transformed.rename(columns={"title": "title", 
                                                            "description": "description",
                                                          "podcast_uuid": "id"})

# Clean the data by dropping duplicates and setting the index
# Display the head of our new dataframe
episodes_transformed.drop_duplicates("id", inplace=True)
episodes_transformed.set_index("id", inplace=True)

episodes_transformed.head()

In [None]:
# Drop NA values
episodes_final = episodes_transformed.dropna()

episodes_final.head()

In [None]:
# Sort the data by titles
episodes_final_sorted = episodes_final.sort_values(by=['title'], ascending = True)

episodes_final_sorted.head()

In [None]:
# Testing for case sensitivity
episode_find = episodes_final_sorted.loc[episodes_final_sorted['title'].str.contains('Stranger Things')]

episode_find.head()

In [None]:
# Setting up connection to mysql workbench
connection_string = "root:<enter your password here>@localhost/podcasts_db"
engine = create_engine(f'mysql://{connection_string}')

In [None]:
# Display table names in data base, testing connection
engine.table_names()

In [None]:
# Send data frames into mysql
episodes_final_sorted.to_sql(name='episodes', con=engine, if_exists='append', index=True)