In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib notebook

In [205]:
df = pd.read_csv('data/air-passenger-arrivals-total-by-region-and-selected-country-of-embarkation/total-air-passenger-arrivals.csv')
df['year'] = df['month'].apply(lambda x: x.split('-')[0])
df['month'] = df['month'].apply(lambda x: x.split('-')[1])
df = df.drop(['level_1'], axis=1)
yearly_df = df.where((df['year'].str.startswith('2')) & (~df['year'].str.contains('2017')))
yearly_df = yearly_df.groupby(by='year')['value'].sum()
yearly_df = yearly_df.apply(lambda x:x/10000)
yearly_df

year
2000    1354.5608
2001    1333.3167
2002    1372.7235
2003    1158.7576
2004    1433.6358
2005    1536.4071
2006    1667.7822
2007    1763.9557
2008    1818.5230
2009    1802.6026
2010    2048.6451
2011    2277.8149
2012    2505.5954
2013    2650.0359
2014    2666.8771
2015    2749.1096
2016    2920.3726
Name: value, dtype: float64

In [198]:
disease_df = pd.read_csv('data/final-disease.csv')
disease_df['vector_based_diseases'] = disease_df['dengue'] + disease_df['malaria']
disease_df['air_respiratory_based_diseases'] = disease_df['avianflu'] + disease_df['sars'] + disease_df['chikunguniya']
disease_df['total_diseases'] = disease_df['vector_based_diseases']+ disease_df['air_respiratory_based_diseases'] 
min_max=(disease_df['total_diseases']-disease_df['total_diseases'].mean())/(disease_df['total_diseases'].max()-disease_df['total_diseases'].min())
disease_df['total_diseases'] = disease_df['total_diseases'].apply(lambda x: (-1)*x*min_max)

disease_df

Unnamed: 0,year,dengue,malaria,avianflu,sars,chikunguniya,vector_based_diseases,air_respiratory_based_diseases,total_diseases
0,2001,663.0,266.0,0.0,0.0,0.0,929.0,0.0,324.757061
1,2002,3938.0,175.0,0.0,0.0,0.0,4113.0,0.0,1437.810324
2,2003,4732.0,115.0,0.0,239.0,0.0,4847.0,239.0,1777.948774
3,2004,9302.0,153.0,0.0,0.0,0.0,9455.0,0.0,3305.250817
4,2005,13797.0,155.0,0.0,0.0,0.0,13952.0,0.0,4877.29872
5,2006,3058.0,189.0,0.0,0.0,0.0,3247.0,0.0,1135.076616
6,2007,8664.0,159.0,0.0,0.0,0.0,8823.0,0.0,3084.318134
7,2008,6768.0,159.0,0.0,0.0,0.0,6927.0,0.0,2421.520085
8,2009,4480.0,172.0,0.0,0.0,375.0,4652.0,375.0,1757.323729
9,2010,5358.0,190.0,0.0,0.0,34.0,5548.0,34.0,1951.33898


In [213]:
fig = plt.Figure(figsize=(30,30))
plt.gcf().set_size_inches(10,8)
plt.plot(yearly_df.index, yearly_df.values, color='black', alpha=0.6, linewidth=2.5, label='Air passenger arrivals in 10 thousands')
plt.plot(disease_df['year'], disease_df['air_respiratory_based_diseases'], color='red', alpha=0.6, linewidth=2.5, label='Reported cases for SARS, Chikunguniya and Avian flu')
plt.xlim(xmin=2001)

plt.title('Increase in air or vector borne diseases causes drop in yearly air passenger travel to Singapore',fontweight='bold')
plt.tick_params(color='grey',labelcolor='grey')
plt.xlabel('\nYear',fontweight='bold')
hlines = range(500,4000,500)
for h in hlines:
    plt.axhline(y=h, color='grey',alpha=0.5,linestyle='-.')

x_ticks = range(2001,2016,1)
x_labels = []
for x in x_ticks:
    if x%2 == 0:
        x_labels.append('')
    else:
        x_labels.append(x)
plt.xticks(x_ticks, x_labels)
axes = plt.gca()
axes.spines['top'].set_visible(False)
axes.spines['right'].set_visible(False)
axes.spines['left'].set_visible(False)
axes.spines['bottom'].set_visible(False)


plt.fill_betweenx(y=range(0,3500),x1=2002,x2=2004,color='grey',alpha=0.05)
plt.fill_betweenx(y=range(0,3500),x1=2008,x2=2010,color='grey',alpha=0.05)
plt.fill_betweenx(y=range(0,3500),x1=2012,x2=2014,color='grey',alpha=0.05)


axes.legend(loc='upper right',bbox_to_anchor=(1, 0.92),fontsize='small',fancybox=True)


<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x144909278>