In [None]:
!pip install sorted_months_weekdays
!pip install sort_dataframeby_monthorweek

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import folium
from folium.plugins import MarkerCluster # for world map

from sorted_months_weekdays import *
from sort_dataframeby_monthorweek import * #for sorting based on month

import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv("../input/covid19s-impact-on-airport-traffic/covid_impact_on_airport_traffic.csv")
df.head()

In [None]:
# Adding a column to corresponding weekday
df['Day'] = pd.to_datetime(df['Date']).dt.day_name()

In [None]:
# Adding a column to corresponding month
df['Month'] = pd.to_datetime(df['Date']).dt.month_name()

In [None]:
df['long'] = df['Centroid'].apply(lambda x: x[6:-1].split(' ')[0])
df['lat'] = df['Centroid'].apply(lambda x: x[6:-1].split(' ')[1])

In [None]:
#dropping unwanted columns
df.drop(columns=['AggregationMethod','Version','Centroid'], inplace=True )

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df.isna().sum()

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
# changing name of United States of America (the) to United States of America
df['Country'] = df['Country'].str.replace(" (the)", "", regex=False).astype(object)

In [None]:
#Encode the input Variables
def Encode(df):
    for column in df.columns[~df.columns.isin(['PercentOfBaseline','Date'])]:
        df[column] = df[column].factorize()[0] # or df['product_name'], mapping = df['product_name'].factorize(), that's why [0] is used because im only taking the labels throwing away the uniques that map back to i/p

    return df

df_en = Encode(df.copy())
df_en.head()

In [None]:
#correlation between variables
corrmat = df_en.corr()
top_corr_features=corrmat.index
plt.figure(figsize=(20,10))
g = sns.heatmap(df_en.corr(), annot=True,cmap='RdYlGn')

# Exploratory Data Analysis

In [None]:
fig = plt.figure(figsize=(20,15))

ax = sns.histplot(data=df, x = 'PercentOfBaseline',kde=True,hue='Country', multiple='stack', shrink = .8)

plt.setp(ax.get_legend().get_texts(), fontsize='22') # for legend text
plt.setp(ax.get_legend().get_title(), fontsize='32') # for legend title

plt.ylabel("Count",size=20)
plt.xlabel("Percentage of baseline",size=20)
plt.title('Country wise percentage of baseline', size = 20)
plt.tick_params(labelsize=15)
plt.show()

### Country with airports count

In [None]:
country_name = ['Australia', 'Chile', 'Canada', 'United States of America']
count_list = []
for i in country_name:
    airport_count = len(df.AirportName[df['Country']==i].value_counts())
    count_list.append(airport_count)
count_list

In [None]:
fig = plt.figure(figsize=(20,7))
plt.bar(country_name, count_list)
plt.ylabel("No.of Airports ",size=20)
plt.xlabel("Country",size=20)
plt.title("Country's airport count", size = 20)
plt.tick_params(labelsize=15)
plt.show()

### Country wise dataset distribution

In [None]:
def make_autopct(values):
    def my_autopct(pct):
        total = sum(values)
        val = int(round(pct*total/100.0))
        return '{p:.2f}%'.format(p=pct)
    return my_autopct

In [None]:
df['Country'].value_counts()

In [None]:
labels = df['Country'].value_counts().keys()
values = df['Country'].value_counts().values
fig1, ax1 = plt.subplots()
colors = ['skyblue','pink', 'yellow', 'green']
ax1.pie(values, labels=labels, autopct=make_autopct(values),shadow=True,
        startangle=90,colors=colors, radius=3)
plt.show()

In [None]:
world_map= folium.Map(tiles="cartodbpositron")
marker_cluster = MarkerCluster().add_to(world_map)

In [None]:
for i in range(len(df)):
    lat = df.iloc[i]['lat']
    long = df.iloc[i]['long']
    radius=5
    folium.CircleMarker(location = [lat, long], radius=radius,fill =True).add_to(marker_cluster)
    
world_map

### PercentOfBaseline of each airport

In [None]:
for i in df['AirportName'].unique():
    df_baseline = df[['PercentOfBaseline','Date']][df['AirportName']==i].sort_values('Date')
    
    fig = plt.figure(figsize=(60,20))

    sns.lineplot(data=df_baseline, x = "Date", y="PercentOfBaseline")

    plt.xticks(rotation=90)
    plt.ylabel("PercentOfBaseline",size=50)
    plt.xlabel("Date",size=50)
    plt.yticks(fontsize = 40)

    plt.title(i, size = 60)
    plt.grid()

plt.show()


### Country's Avg PercentOfBaseline per month

In [None]:
for i in df['Country'].unique():
    
    df_month = df[['PercentOfBaseline', 'Month']][df['Country']==i].sort_values('Month')
    
    df_month_avg = df_month.groupby('Month', as_index=False)['PercentOfBaseline'].mean()
    
    df_month_avg_sort = Sort_Dataframeby_Month(df=df_month_avg,monthcolumnname='Month')
    
    df_month_avg_sort.plot.bar(x='Month', y='PercentOfBaseline', figsize = (20,10))

    plt.ylabel("Average percent of Baseline",size=25)
    plt.xlabel("Month",size=25)
    plt.title(i, size = 30)
    plt.tick_params(labelsize=20)
    plt.show()

### Airport's Avg PercentOfBaseline per month

In [None]:

for i in df['AirportName'].unique():
    
    df_month = df[['PercentOfBaseline', 'Month']][df['AirportName']==i].sort_values('Month')
    df_month_avg = df_month.groupby('Month', as_index=False)['PercentOfBaseline'].mean()  
    df_month_avg_sort = Sort_Dataframeby_Month(df=df_month_avg,monthcolumnname='Month')
    
    df_month_avg_sort.plot.bar(x='Month', y='PercentOfBaseline', figsize = (20,10))

    plt.ylabel("Average percent of Baseline",size=25)
    plt.xlabel("Month",size=25)
    plt.title(i, size = 30)
    plt.tick_params(labelsize=20)
    
plt.show()

