In [1]:
import pandas as pd
import plotly.express as px

In [2]:
data1 = pd.read_csv('flights0.csv')
data2 = pd.read_csv('flights1.csv')
data = pd.concat([data1, data2])

In [3]:
data

Unnamed: 0,MONTH,DAY_OF_MONTH,STATE_NAME,DEP_DELAY_NEW,CANCELLED
0,1,6,GA,0.0,0.0
1,1,7,GA,0.0,0.0
2,1,8,GA,0.0,0.0
3,1,9,GA,0.0,0.0
4,1,10,GA,0.0,0.0
...,...,...,...,...,...
3711013,12,31,FL,64.0,0.0
3711014,12,31,DC,0.0,0.0
3711015,12,31,PA,0.0,0.0
3711016,12,31,MA,0.0,0.0


#### Columns significations

- _MONTH_: Month
- _DAY_OF_MONTH_: Day of the month (1-31)
- _STATE_NAME_: Origin city name
- _DEP_DELAY_NEW_: Departure delay in minutes
- _CANCELLED_: Flag if flight was cancelled

In [4]:
def makeDate(df):
    month = df['MONTH']
    day = df['DAY_OF_MONTH']
    month = '0' + str(month) if month < 10 else str(month)
    day = '0' + str(day) if day < 10 else str(day)
    return '2019-' + month + '-' + day

In [5]:
data['DATE'] = data.apply(makeDate, axis=1)

In [6]:
data

Unnamed: 0,MONTH,DAY_OF_MONTH,STATE_NAME,DEP_DELAY_NEW,CANCELLED,DATE
0,1,6,GA,0.0,0.0,2019-01-06
1,1,7,GA,0.0,0.0,2019-01-07
2,1,8,GA,0.0,0.0,2019-01-08
3,1,9,GA,0.0,0.0,2019-01-09
4,1,10,GA,0.0,0.0,2019-01-10
...,...,...,...,...,...,...
3711013,12,31,FL,64.0,0.0,2019-12-31
3711014,12,31,DC,0.0,0.0,2019-12-31
3711015,12,31,PA,0.0,0.0,2019-12-31
3711016,12,31,MA,0.0,0.0,2019-12-31


In [7]:
dico = {1: 'janvier', 2: 'février', 3: 'mars', 4: 'avril', 5: 'mai', 6: 'juin', 7: 'juillet',
        8: 'août', 9: 'septembre', 10: 'octobre', 11: 'novembre', 12: 'décembre', }

def showMeTheMonth(df):
    return dico[df['MONTH']]

In [8]:
def makeMap(df, state='delayed', timeline='all', month=1, day=1):
    data_delay = df.drop(['CANCELLED'], axis=1)
    data_delay = data_delay[data_delay['DEP_DELAY_NEW'] > 0.]

    data_cancel = df.drop(['DEP_DELAY_NEW'], axis=1)
    data_fly = data_cancel[data_cancel['CANCELLED'] == 0.]
    data_cancel = data_cancel[data_cancel['CANCELLED'] == 1.]

    if timeline == 'day':
        if state == 'cancelled':
            data_cancel = data_cancel[(data_cancel['MONTH'] == month) & (data_cancel['DAY_OF_MONTH'] == day)]
            
            data_cancel = data_cancel.groupby(['STATE_NAME']).agg({'CANCELLED': ['count']})
            data_cancel.columns = ['CANCELLED_COUNT']
            data_cancel = data_cancel.reset_index()
            
            data_fly = data_fly.groupby(['STATE_NAME']).agg({'CANCELLED': ['count']})
            data_fly.columns = ['FLYING_COUNT']
            data_fly = data_fly.reset_index()
            
            data_cancel = pd.merge(data_cancel, data_fly, on=['STATE_NAME'])
            data_cancel['CANCELLED_RATIO'] = data_cancel['CANCELLED_COUNT'] / data_cancel['FLYING_COUNT'] * 100
            
            fig = px.choropleth(data_cancel, locations='STATE_NAME', locationmode='USA-states', scope='usa', color='CANCELLED_RATIO',
                                labels={'CANCELLED_RATIO':'Pourcentage de vols annulés'}, hover_name='STATE_NAME',
                                hover_data={'STATE_NAME': False}, color_continuous_scale=['white', 'blue'],
                                title="Pourcentage de vols annulés par état le " + str(day) + ' ' + dico[month] + ' 2019')
        else:
            data_delay = data_delay[(data_delay['MONTH'] == month) & (data_delay['DAY_OF_MONTH'] == day)]
            
            data_delay = data_delay.groupby(['STATE_NAME']).agg({'DEP_DELAY_NEW': ['mean']})
            data_delay.columns = ['DELAY_MEAN']
            data_delay = data_delay.reset_index()
            
            fig = px.choropleth(data_delay, locations='STATE_NAME', locationmode='USA-states', scope='usa', color='DELAY_MEAN',
                                labels={'DELAY_MEAN':'Retard moyen'}, hover_name='STATE_NAME', hover_data={'STATE_NAME': False},
                                color_continuous_scale=['white', 'blue'],
                                title="Retard moyen des vols par état le " + str(day) + ' ' + dico[month] + ' 2019')
    
    elif timeline == 'month':
        if state == 'cancelled':
            data_cancel = data_cancel[data_cancel['MONTH'] == month]
            
            data_cancel = data_cancel.groupby(['DAY_OF_MONTH', 'STATE_NAME']).agg({'CANCELLED': ['count']})
            data_cancel.columns = ['CANCELLED_COUNT']
            data_cancel = data_cancel.reset_index()
            
            data_fly = data_fly.groupby(['DAY_OF_MONTH', 'STATE_NAME']).agg({'CANCELLED': ['count']})
            data_fly.columns = ['FLYING_COUNT']
            data_fly = data_fly.reset_index()
            
            data_cancel = pd.merge(data_cancel, data_fly, on=['DAY_OF_MONTH', 'STATE_NAME'])
            data_cancel['CANCELLED_RATIO'] = data_cancel['CANCELLED_COUNT'] / data_cancel['FLYING_COUNT'] * 100
            
            fig = px.choropleth(data_cancel, locations='STATE_NAME', locationmode='USA-states', scope='usa', color='CANCELLED_RATIO',
                                animation_frame='DAY_OF_MONTH', labels={'CANCELLED_RATIO':'Pourcentage de vols annulés', 'DAY_OF_MONTH': 'jour'},
                                hover_name='STATE_NAME', hover_data={'DAY_OF_MONTH': False, 'STATE_NAME': False},
                                color_continuous_scale=['white', 'blue'], title="Pourcentage de vols annulés par état en " + dico[month] + ' 2019')
        else:
            data_delay = data_delay[data_delay['MONTH'] == month]
            
            data_delay = data_delay.groupby(['DAY_OF_MONTH', 'STATE_NAME']).agg({'DEP_DELAY_NEW': ['mean']})
            data_delay.columns = ['DELAY_MEAN']
            data_delay = data_delay.reset_index()
            
            fig = px.choropleth(data_delay, locations='STATE_NAME', locationmode='USA-states', scope='usa', color='DELAY_MEAN',
                                animation_frame='DAY_OF_MONTH', labels={'DELAY_MEAN':'Retard moyen', 'DAY_OF_MONTH': 'jour'},
                                hover_name='STATE_NAME', hover_data={'DAY_OF_MONTH': False, 'STATE_NAME': False},
                                color_continuous_scale=['white', 'blue'], title="Retard moyen des vols par état en " + dico[month] + ' 2019')
    
    elif timeline == 'year':
        if state == 'cancelled':
            data_cancel = data_cancel.groupby(['MONTH', 'STATE_NAME']).agg({'CANCELLED': ['count']})
            data_cancel.columns = ['CANCELLED_COUNT']
            data_cancel = data_cancel.reset_index()
            
            data_fly = data_fly.groupby(['MONTH', 'STATE_NAME']).agg({'CANCELLED': ['count']})
            data_fly.columns = ['FLYING_COUNT']
            data_fly = data_fly.reset_index()
            
            data_cancel = pd.merge(data_cancel, data_fly, on=['MONTH', 'STATE_NAME'])
            data_cancel['CANCELLED_RATIO'] = data_cancel['CANCELLED_COUNT'] / data_cancel['FLYING_COUNT'] * 100
            
            data_cancel['MONTH'] = data_cancel.apply(showMeTheMonth, axis=1)
            fig = px.choropleth(data_cancel, locations='STATE_NAME', locationmode='USA-states', scope='usa', color='CANCELLED_RATIO',
                                animation_frame='MONTH', labels={'CANCELLED_RATIO':'Pourcentage de vols annulés', 'MONTH': 'mois'},
                                hover_name='STATE_NAME', hover_data={'MONTH': False, 'STATE_NAME': False},
                                color_continuous_scale=['white', 'blue'], title="Pourcentage de vols annulés par état et par mois en 2019")
        else:
            data_delay = data_delay.groupby(['MONTH', 'STATE_NAME']).agg({'DEP_DELAY_NEW': ['mean']})
            data_delay.columns = ['DELAY_MEAN']
            data_delay = data_delay.reset_index()
            
            data_delay['MONTH'] = data_delay.apply(showMeTheMonth, axis=1)
            fig = px.choropleth(data_delay, locations='STATE_NAME', locationmode='USA-states', scope='usa', color='DELAY_MEAN',
                                animation_frame='MONTH', labels={'DELAY_MEAN':'Retard moyen', 'MONTH': 'mois'}, hover_name='STATE_NAME',
                                hover_data={'MONTH': False, 'STATE_NAME': False}, color_continuous_scale=['white', 'blue'],
                                title="Retard moyen des vols par état et par mois en 2019")
            
    elif timeline == 'absolute':
        if state == 'cancelled':
            data_cancel = data_cancel.groupby(['STATE_NAME']).agg({'CANCELLED': ['count']})
            data_cancel.columns = ['CANCELLED_COUNT']
            data_cancel = data_cancel.reset_index()
            
            data_fly = data_fly.groupby(['STATE_NAME']).agg({'CANCELLED': ['count']})
            data_fly.columns = ['FLYING_COUNT']
            data_fly = data_fly.reset_index()
            
            data_cancel = pd.merge(data_cancel, data_fly, on=['STATE_NAME'])
            data_cancel['CANCELLED_RATIO'] = data_cancel['CANCELLED_COUNT'] / data_cancel['FLYING_COUNT'] * 100
            
            fig = px.choropleth(data_cancel, locations='STATE_NAME', locationmode='USA-states', scope='usa', color='CANCELLED_RATIO',
                                labels={'CANCELLED_RATIO':'Pourcentage de vols annulés'}, hover_name='STATE_NAME',
                                hover_data={'STATE_NAME': False}, color_continuous_scale=['white', 'blue'],
                                title="Pourcentage de vols annulés par état en 2019")
        else:
            data_delay = data_delay.groupby(['STATE_NAME']).agg({'DEP_DELAY_NEW': ['mean']})
            data_delay.columns = ['DELAY_MEAN']
            data_delay = data_delay.reset_index()
            
            fig = px.choropleth(data_delay, locations='STATE_NAME', locationmode='USA-states', scope='usa', color='DELAY_MEAN',
                                labels={'DELAY_MEAN':'Retard moyen'}, hover_name='STATE_NAME', hover_data={'STATE_NAME': False},
                                color_continuous_scale=['white', 'blue'], title="Retard moyen des vols par état en 2019")
            
    else:
        if state == 'cancelled':
            data_cancel = data_cancel.groupby(['DATE', 'STATE_NAME']).agg({'CANCELLED': ['count']})
            data_cancel.columns = ['CANCELLED_COUNT']
            data_cancel = data_cancel.reset_index()
            
            data_fly = data_fly.groupby(['DATE', 'STATE_NAME']).agg({'CANCELLED': ['count']})
            data_fly.columns = ['FLYING_COUNT']
            data_fly = data_fly.reset_index()
            
            data_cancel = pd.merge(data_cancel, data_fly, on=['DATE', 'STATE_NAME'])
            data_cancel['CANCELLED_RATIO'] = data_cancel['CANCELLED_COUNT'] / data_cancel['FLYING_COUNT'] * 100
            
            fig = px.choropleth(data_cancel, locations='STATE_NAME', locationmode='USA-states', scope='usa', color='CANCELLED_RATIO',
                                animation_frame='DATE', labels={'CANCELLED_RATIO':'Pourcentage de vols annulés'},
                                hover_name='STATE_NAME', hover_data={'DATE': False, 'STATE_NAME': False},
                                color_continuous_scale=['white', 'blue'], title="Pourcentage de vols annulés par état et par jour en 2019")
        else:
            data_delay = data_delay.groupby(['DATE', 'STATE_NAME']).agg({'DEP_DELAY_NEW': ['mean']})
            data_delay.columns = ['DELAY_MEAN']
            data_delay = data_delay.reset_index()
            
            fig = px.choropleth(data_delay, locations='STATE_NAME', locationmode='USA-states', scope='usa', color='DELAY_MEAN',
                                animation_frame='DATE', labels={'DELAY_MEAN':'Retard moyen'}, hover_name='STATE_NAME',
                                hover_data={'DATE': False, 'STATE_NAME': False}, color_continuous_scale=['white', 'blue'],
                                title="Retard moyen des vols par état et par jour en 2019")
    
    return fig