## This notebook contains Python code for analyzing data relevant to travelers

In [None]:
import pandas as pd
import numpy as np

import pymysql as mysql
import getpass

import matplotlib.pyplot as plt
import seaborn as sns

We need to connect to MySQL server, and specifically connect to the `ads507airlines` database. Then we verify the connection with a SQL command to show all tables.

In [None]:
conn=mysql.connect(host='localhost',
                   port=int(3306),
                   user='root',
                   passwd=getpass.getpass('Enter password: '),
                   db='ads507airlines')

In [None]:
tableNames = pd.read_sql("""SHOW TABLES""", conn)

tableNames

In [None]:
for i in tableNames['Tables_in_ads507airlines']:
    print(i)

### Example for the traveler reviewing historical busy seasons for multiple destinations

In [None]:
query_string = """
SELECT *, 2018 as year FROM seasonal2018
UNION
SELECT *, 2019 as year FROM seasonal2019
UNION
SELECT *, 2020 as year FROM seasonal2020
UNION
SELECT *, 2021 as year FROM seasonal2021
UNION
SELECT *, 2022 as year FROM seasonal2022
;"""

seasonal_df = pd.read_sql(query_string, conn)

seasonal_df

In [None]:
plt.figure(figsize=(12,4))
sns.barplot(data=seasonal_df[seasonal_df['origin']=='IAH'],x='year',y='Scheduled_Flights_Count',
            hue='season')
plt.legend(loc='upper center')
plt.title('Seasonal Flights from IAH Airport')
plt.ylabel('Total Flights from IAH');

In [None]:
plt.figure(figsize=(12,4))
sns.barplot(data=seasonal_df[seasonal_df['origin']=='JFK'],x='year',y='Scheduled_Flights_Count',
            hue='season')
plt.legend(loc='upper center')
plt.title('Seasonal Flights from JFK Airport')
plt.ylabel('Total Flights from JFK');

In [None]:
plt.figure(figsize=(12,5))
sns.barplot(data=seasonal_df[seasonal_df['origin']=='SAN'],x='year',y='Scheduled_Flights_Count',
            hue='season')
plt.legend(loc='upper center')
plt.title('Seasonal Flights from SAN Airport')
plt.ylabel('Total Flights from SAN');

In [None]:
plt.figure(figsize=(12,5))
sns.barplot(data=seasonal_df[seasonal_df['origin']=='SEA'],x='year',y='Scheduled_Flights_Count',
            hue='season')
plt.legend(loc='upper center')
plt.title('Seasonal Flights from SEA Airport')
plt.ylabel('Total Flights from SEA');

### Example for the traveler who wants to understand average flight delays by airline

In [None]:
query_string = """
SELECT *, 2018 as year from yearly_delay2018
UNION
SELECT *, 2019 as year from yearly_delay2019
UNION
SELECT *, 2020 as year from yearly_delay2020
UNION
SELECT *, 2021 as year from yearly_delay2021
UNION
SELECT *, 2022 as year from yearly_delay2022;"""

airlines_df = pd.read_sql(query_string, conn)

airlines_df

In [None]:
pd.DataFrame(airlines_df.groupby('name')['total_delay'].mean()).reset_index().rename(columns={'name':'Airline','total_delay':'Average Delay'})

### Example for the traveler who wants to see average flight delays by destination

In [None]:
query_string = """
SELECT *, 2018 as year from destinations_2018
UNION
SELECT *, 2019 as year from destinations_2019
UNION
SELECT *, 2020 as year from destinations_2020
UNION
SELECT *, 2021 as year from destinations_2021
UNION
SELECT *, 2022 as year from destinations_2022;"""

destinations_df = pd.read_sql(query_string, conn)
destinations_df

In [None]:
destinations_by_delay_df = pd.DataFrame(destinations_df.groupby(['List_of_Destinations','origin'])['Total_Delay'].mean())
destinations_by_delay_df = destinations_by_delay_df.reset_index().rename(columns={'name':'Destination','Total_Delay':'Average Delay'})

destinations_by_delay_df.sort_values(by=['origin','List_of_Destinations'])

In [None]:
query_string = """
SELECT *, 2018 as year FROM holidays_2018
UNION
SELECT *, 2019 as year FROM holidays_2019
UNION
SELECT *, 2020 as year FROM holidays_2020
UNION
SELECT *, 2021 as year FROM holidays_2021
UNION
SELECT *, 2022 as year FROM holidays_2022
;"""

holidays_df = pd.read_sql(query_string, conn)

holidays_df