# Data review

In [None]:
# Import libraries
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import calendar

In [None]:
# Read data
df = pd.read_csv('../input/covid19s-impact-on-airport-traffic/covid_impact_on_airport_traffic.csv')
df.head()

* AggregationMethod: Aggregation period used to compute this metric

* Date: Date Traffic volume measured, in format YYYY-MM-DD.

* Version: Version of this dataset

* AirportName: Name of airport

* PercentOfBaseline: Proportion of trips on this date as compared to Avg number of trips on the same day of week in baseline period i.e 1st February 2020 - 15th March 2020

* Centroid: Geography representing centroid of the Airport polygon

* City: City within which the Airport is located

* State: State within which the Airport is located

* ISO_3166_2: ISO-3166-2 code representing Country and Subdivision

* Country: Country within which the Airport is located

* Geography: Polygon of the Airport that is used to compute this metric

In [None]:
df.describe()

In [None]:
# Information about dataframe
df.info(memory_usage="deep")

In [None]:
# All columns
df.columns

# Data preprocessing

In [None]:
# Rename all columns into lower case
df.rename(columns=lambda x: x.strip().lower(),inplace=True)

In [None]:
# Changing the date type from str to datetime
df['date'] = pd.to_datetime(df['date'])

In [None]:
# Add "weekday" column and parsing data to it
days={0:'Monday',1:'Tuesday',2:'Wednesday',3:'Thursday',4:"Friday",5:'Saturday',6:'Sunday'}
df['weekday']=df['date'].apply(lambda x:days.get(x.weekday()))


df['month'] = pd.DatetimeIndex(df['date']).month
df['month'] = df['month'].apply(lambda x: calendar.month_abbr[x])

In [None]:
df.isna().sum()

**We can note that there is no missing data, so there is no need to restore**

In [None]:
df.head()

In [None]:
df.info(memory_usage="deep")

# Data visualization

In [None]:
df_Country_count = pd.DataFrame(df["country"].value_counts())

g = df_Country_count.plot.pie(y='country', autopct='%1.1f%%', figsize=(7, 7))
g.set_title("Records for each country")

**We can note that of all the flights 61% and 31% belong to America and Canada**

In [None]:
df_weekday_count = pd.DataFrame(df["weekday"].value_counts())
g = df_weekday_count.plot.pie(y='weekday', autopct='%1.1f%%', figsize=(7, 7))
g.set_title("records for each weekday")

**We can notice that on weekends people fly less and consequently there are few flights**

In [None]:
plt.figure(figsize=(10, 5))
g = sns.countplot(data=df, x="airportname",
              order = df['airportname'].value_counts().index)
g.set_xticklabels(g.get_xticklabels(), rotation=90)
g.set_title("records for each airport")

In [None]:
mean_percentofbaseline = df.groupby(['month'])['percentofbaseline'].mean()
month = ('Apr', 'Aug', 'Dec', 'Jul', 'Jun', 'Mar', 'May', 'Nov', 'Oct', 'Sep')

plt.figure(figsize=(9, 3))
plt.plot(month, mean_percentofbaseline)
plt.title('Mean Percant of Baseline per month')
plt.xlabel('Month')
plt.ylabel('Mean Percant of Baseline')
plt.show()

**Here we can see how the Percant of Baseline dropped during the first coronavirus outbreak because a quarantine was declared. And the second outbreak worldwide occurred closer to May, and just in May we are seeing a decrease in air travel**

In [None]:
import folium
from folium.plugins import MarkerCluster

df['long'] = df['centroid'].apply(lambda x: x[6:-1].split(' ')[0])
df['lat'] = df['centroid'].apply(lambda x: x[6:-1].split(' ')[1])
df.drop(columns=['centroid'], axis=1)

world_map = folium.Map(tiles="cartodbpositron")
marker_cluster = MarkerCluster().add_to(world_map)

for i in range(len(df)):
    folium.CircleMarker(location=[df.iloc[i]['lat'], df.iloc[i]['long']], radius=5,
                        fill=True).add_to(marker_cluster)
    
world_map