# Import all libraries required for interactive visualization

In [None]:
import chart_studio.plotly as py
import cufflinks as cf
import pandas as pd
import numpy as np
%matplotlib inline
cf.go_offline()
import ipywidgets as widgets
from ipywidgets import interact
from ipywidgets import interact, interactive, fixed, interact_manual
import matplotlib.pyplot as plt
import seaborn as sns

# Global Constants

In [None]:
AIRPORT ='AMS'
AIRLINE = 'KLM'
ARRIVAL_FOLDER_LOCATION = 'C:\\Users\\Sreelatha\\Desktop\\ironHack\\Project 3\\flight_data\\2020\\arr'
DEPARTURE_FOLDER_LOCATION = 'C:\\Users\\Sreelatha\\Desktop\\ironHack\\Project 3\\flight_data\\2020\\dep'

## Import all the libraries required to get the information from the multiple json files

In [None]:
import os 
import glob 
import requests
import json
import datetime 
import pandas as pd
from dateutil.relativedelta import relativedelta
from pandas.io.json import json_normalize


In [None]:
def get_flight_json_files(path, current_month):

# detect the current working directory
    path= path +'\\' + current_month 
    files = [f for f in glob.glob(path + "**/*.json", recursive=True)]
    return files

def load_flight_data(file_list):
    new_flight_df = pd.DataFrame()
    
    for file in file_list:
        with open(file) as data_file: 
                current_flight_df = pd.DataFrame()
                flight_dict = json.loads(data_file.read()) 
                flight_dict = dict(flight_dict['pagination'])
               
                data_file.seek(0)
                data = json.load(data_file)
                current_flight_df = json_normalize(data,record_path=['data'])
                current_flight_df['flight_total'] =  flight_dict['total']
                #print(current_flight_df)
                new_flight_df=  new_flight_df.append(current_flight_df)
               
                
    return new_flight_df


# start date and end date
start_date = datetime.datetime(2020, 1, 1)
end_date = datetime.datetime(2020,3,27)      
flight_pd = pd.DataFrame()
temp_flight_pd = pd.DataFrame()


from datetime import date, timedelta
while start_date <= end_date: 
    current_month = start_date.strftime("%m")
    print(current_month)
    file_list = get_flight_json_files(ARRIVAL_FOLDER_LOCATION, current_month) 
    temp_flight_pd = temp_flight_pd.append(load_flight_data(file_list))

    
    start_date += relativedelta(months=+1) 
    
temp_flight_pd['flight.type']='Arr'
flight_pd = flight_pd.append(temp_flight_pd)

start_date = datetime.datetime(2020, 1, 1)
temp_flight_pd = pd.DataFrame()
   
while start_date <= end_date: 
    current_month = start_date.strftime("%m")
    print(current_month)
    file_list = get_flight_json_files(DEPARTURE_FOLDER_LOCATION, current_month)
    temp_flight_pd = temp_flight_pd.append(load_flight_data(file_list))
 
    start_date += relativedelta(months=+1)  
    
temp_flight_pd['flight.type']='Dep'
flight_pd = flight_pd.append(temp_flight_pd)
flight_pd['flight_month'] = pd.DatetimeIndex(flight_pd['flight_date']).month

# Check if Dataframe is empty using empty attribute
if  flight_pd.empty == True:
     print('DataFrame is empty')
else:
    print('DataFrame is not empty')
#     print(flight_pd.count())
#     print(flight_pd.columns)


In [None]:
flight_pd.columns

### "flight_pd" has a lot of data with many unwanted data. So, I am only pcalling the columns I need in "flight_pd" and putting it in a dataframe named "flight_pd"

In [None]:
filtered_flight_pd = flight_pd[['flight_date', 'flight_total','flight_month','airline.name','flight_status', 'departure.airport', 'arrival.airport','departure.iata', 'arrival.iata', 'flight.iata','flight.type']]
filtered_flight_pd.head

### Since the flight route dataframe, named flight_pd, does not have the name of the airports and the country they are located in; another dataframe was created based on another database. This dataframe named airports was downloaded from "https://openflights.org/data.html#airport" website.

In [None]:
airports = pd.read_csv('../airport_data/airports.csv')
airports.head()

### Now I would like to concat both the dataframes as I would like to include the airport name and country name.

In [None]:
# Merging the dataframes                       
nl_flights = pd.merge(filtered_flight_pd, airports, how ='inner', left_on ='arrival.iata',right_on='iata') 


# Sorting the date in European format:
nl_flights["flight_date"] = pd.to_datetime(nl_flights["flight_date"])

nl_flights


### I would like to save all the data in an excel format

In [None]:
# Saving the dataframe to an excel file format:
nl_flight = nl_flights.to_excel('nl_flights.xlsx')

nl_flight = nl_flights.to_csv('nl_flights.csv')


### Loading the COVID-19 Cases from Jan 2020 through March 2020 in the Netherlands. 
The data comes from " https://data.humdata.org/dataset/covid-19-global-travel-restrictions-and-airline-information. 
The data had all the countries in the world. I extracted only the cases from NL and put it in a dataframe.

In [None]:
covid = pd.read_csv('../covid_data/covid.csv')
covid = covid[['Country/Region', 'Date', 'Value']]
covid = covid.loc[covid['Country/Region'] == "Netherlands"]


# Sorting the date in European format:
covid["Date"] = pd.to_datetime(covid["Date"])
covid = covid.sort_values(by="Date")




In [None]:
covid_one = covid.groupby(['Date']).sum().reset_index()
covid_one
# Interactive Plot:

import plotly.express as px
fig = px.line(covid_one, x="Date", y="Value", title="Number of infections in Jan, Feb, March")
fig.show()


In [None]:
# Grouping by Date as there are multiple rows for the same date in NL.
covid = covid.groupby(['Date']).sum()

# Interactive Plot:
covid_cases = covid['Value']

covid_cases.iplot(kind='bar')

### Merging the flight and country database with COVID Database

In [None]:
# Merging the dataframes
covid_flights = nl_flights[['flight_date','flight_total']]
covid_flights = covid_flights.drop_duplicates()
covid_flights = covid_flights.groupby('flight_date').agg('sum')

#covid_flights = pd.merge(covid_flights, covid, how ='inner', left_on ='flight_date',right_on='Date') 
#covid_flights = covid_flights.rename(columns={'Value': 'covid_infections', 'Date': 'covid_date', 'Country/Region': 'covid_country'})


covid_flights.head()



### Total number of flights for Jan, Feb, March 2020

In [None]:
# Interactive Plot:
total_flights = covid_flights['flight_total']

total_flights.iplot(kind='bar')

### I would like to visualize the number of flights based on the flight status.

In [None]:
# Interactive Plot:
status = nl_flights['flight_status'].value_counts()

status.iplot(kind='bar')

In [None]:
cancelled_flight_filter = nl_flights['flight_status'] == 'cancelled'
new = nl_flights[cancelled_flight_filter].groupby('flight_date')['flight_status'].value_counts().reset_index(name = 'flights_cancelled').drop(['flight_status'], axis = 1) 
new


In [None]:

# Interactive Plot:

import plotly.express as px
fig = px.line(new, x="flight_date", y="flights_cancelled")
fig.show()



In [None]:

# covid data
x = covid_one.rename(columns={'Value': 'covid_infections'})

# cancelled flights data
new

# merge covid data and cancelled flights data

test = pd.merge(new, x, how ='inner', left_on ='flight_date',right_on='Date')
test = test.drop(['Date'], axis = 1)
# test.transpose()
test


import plotly.graph_objs as go

test = test.set_index('flight_date')
test.head()
trace0 = go.Scatter(x = test.index, y = test.flights_cancelled, mode = 'lines', name = "Flights Cancelled")
trace1 = go.Scatter(x = test.index, y = test.covid_infections, mode = 'lines', name = "Covid Infections")

data = [trace0, trace1]
layout = go.Layout(title = 'Covid Infections vs Flight Cancellations')

figure = go.Figure(data=data, layout=layout)
figure.show()

In [None]:
# Pair plot:
sns.pairplot(test)
plt.show()

### Number of flight landed and cancelled in Jan 2020

In [None]:
import plotly.express as px


month1 = nl_flights.loc[nl_flights['flight_month']== 1]
fig = px.histogram(month1, x="flight.type", color="flight_status")
fig.show()



### Number of flight landed and cancelled in March 2020

In [None]:
import plotly.express as px
month3 = nl_flights.loc[nl_flights['flight_month']== 3]

fig = px.histogram(month3, x="flight.type", color="flight_status")
fig.show()

# Comparing the abobe and below graphs, there are a high number of flight cancelled in March 2020 compared to Jan 2020

In [None]:
test.reset_index(inplace = True) 
test.head()

In [None]:
@interact(Covid_Data = test['covid_infections'].unique(), 
          Flight_Date = test['flight_date'].unique())

def linechart(Covid_Data, Flight_Date):
    df = test[(test['covid_infections']==Covid_Data) & (test['flight_date']==Flight_Date)]
    grouped = test.groupby('flight_date').agg('sum')
    grouped.iplot(kind='bar', title='Covid Infections per Day')