# Plot the monthly number of searches for flights arriving at Málaga, Madrid or Barcelona

* For the arriving airport, you can use the
Destination column in the searches file.
* Plot a curve for Málaga, another one for
Madrid, and another one for Barcelona, in
the same figure.

## Modules and libraries, and basic setup

In [40]:
import pandas as pd
import numpy as np
from neobase import NeoBase

ModuleNotFoundError: No module named 'neobase'

In [3]:
path = '/home/miki/Documents/data/challenge/'
filename = 'searches.csv.bz2'

## Initial Exploration

Loading a reduced dataframe for intial exploration

In [22]:
dummy_searches = pd.read_csv(
    path+filename,
    nrows=1e3,
    sep = '^'
)

In [23]:
dummy_searches.columns

Index(['Date', 'Time', 'TxnCode', 'OfficeID', 'Country', 'Origin',
       'Destination', 'RoundTrip', 'NbSegments', 'Seg1Departure',
       'Seg1Arrival', 'Seg1Date', 'Seg1Carrier', 'Seg1BookingCode',
       'Seg2Departure', 'Seg2Arrival', 'Seg2Date', 'Seg2Carrier',
       'Seg2BookingCode', 'Seg3Departure', 'Seg3Arrival', 'Seg3Date',
       'Seg3Carrier', 'Seg3BookingCode', 'Seg4Departure', 'Seg4Arrival',
       'Seg4Date', 'Seg4Carrier', 'Seg4BookingCode', 'Seg5Departure',
       'Seg5Arrival', 'Seg5Date', 'Seg5Carrier', 'Seg5BookingCode',
       'Seg6Departure', 'Seg6Arrival', 'Seg6Date', 'Seg6Carrier',
       'Seg6BookingCode', 'From', 'IsPublishedForNeg', 'IsFromInternet',
       'IsFromVista', 'TerminalID', 'InternetOffice'],
      dtype='object')

In [24]:
dummy_searches.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 45 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Date               1000 non-null   object
 1   Time               1000 non-null   object
 2   TxnCode            1000 non-null   object
 3   OfficeID           1000 non-null   object
 4   Country            1000 non-null   object
 5   Origin             1000 non-null   object
 6   Destination        1000 non-null   object
 7   RoundTrip          1000 non-null   int64 
 8   NbSegments         1000 non-null   int64 
 9   Seg1Departure      1000 non-null   object
 10  Seg1Arrival        1000 non-null   object
 11  Seg1Date           996 non-null    object
 12  Seg1Carrier        374 non-null    object
 13  Seg1BookingCode    80 non-null     object
 14  Seg2Departure      712 non-null    object
 15  Seg2Arrival        712 non-null    object
 16  Seg2Date           708 non-null    object
 

In [25]:
dummy_searches.head(10)

Unnamed: 0,Date,Time,TxnCode,OfficeID,Country,Origin,Destination,RoundTrip,NbSegments,Seg1Departure,...,Seg6Arrival,Seg6Date,Seg6Carrier,Seg6BookingCode,From,IsPublishedForNeg,IsFromInternet,IsFromVista,TerminalID,InternetOffice
0,2013-01-01,20:25:57,MPT,624d8c3ac0b3a7ca03e3c167e0f48327,DE,TXL,AUH,1,2,TXL,...,,,,,1ASIWS,0,0,0,d41d8cd98f00b204e9800998ecf8427e,FRA
1,2013-01-01,10:15:33,MPT,b0af35b31588dc4ab06d5cf2986e8e02,MD,ATH,MIL,0,1,ATH,...,,,,,1ASIWS,0,0,0,d41d8cd98f00b204e9800998ecf8427e,KIV
2,2013-01-01,18:04:49,MPT,3561a60621de06ab1badc8ca55699ef3,US,ICT,SFO,1,2,ICT,...,,,,,1ASIWS,0,0,0,d41d8cd98f00b204e9800998ecf8427e,NYC
3,2013-01-01,17:42:40,FXP,1864e5e8013d9414150e91d26b6a558b,SE,RNB,ARN,0,1,RNB,...,,,,,1ASI,0,0,0,d41d8cd98f00b204e9800998ecf8427e,STO
4,2013-01-01,17:48:29,MPT,1ec336348f44207d2e0027dc3a68c118,NO,OSL,MAD,1,2,OSL,...,,,,,1ASIWS,0,0,0,d41d8cd98f00b204e9800998ecf8427e,OSL
5,2013-01-01,22:00:28,MPT,3561a60621de06ab1badc8ca55699ef3,US,IAH,BLR,1,2,IAH,...,,,,,1ASIWS,0,0,0,d41d8cd98f00b204e9800998ecf8427e,NYC
6,2013-01-01,10:47:14,MPT,d327ca6e35cc6732d4709828327ac7c1,DK,CPH,PAR,1,2,CPH,...,,,,,1ASI,0,0,0,d41d8cd98f00b204e9800998ecf8427e,CPH
7,2013-01-01,23:39:49,MPT,38a3abb0a28e3f00fa79a11f552a5052,FR,PAR,DUB,1,2,PAR,...,,,,,1ASIWS,0,0,0,d41d8cd98f00b204e9800998ecf8427e,PAR
8,2013-01-01,17:08:46,MPT,c8daef4f8bf73a61aa2c928705f7b82d,ES,DUS,ACE,1,2,DUS,...,,,,,1ASIWS,0,0,0,d41d8cd98f00b204e9800998ecf8427e,MAD
9,2013-01-01,19:57:57,MPT,28d7a8c95e4db88589d3d35b66920e78,DE,FRA,BGW,1,2,FRA,...,,,,,1ASI,0,0,0,d41d8cd98f00b204e9800998ecf8427e,BNJ


In [26]:
columns_of_interest = ['Date','Destination']

In [27]:
dummy_searches = dummy_searches[columns_of_interest]
dummy_searches.head(10)

Unnamed: 0,Date,Destination
0,2013-01-01,AUH
1,2013-01-01,MIL
2,2013-01-01,SFO
3,2013-01-01,ARN
4,2013-01-01,MAD
5,2013-01-01,BLR
6,2013-01-01,PAR
7,2013-01-01,DUB
8,2013-01-01,ACE
9,2013-01-01,BGW


Convert the `'Date'` column to an actual date instead of an object.

In [28]:
dummy_searches['Date'] = pd.to_datetime(dummy_searches['Date'], errors='coerce', format='%Y-%m-%d')
dummy_searches.head(10)

Unnamed: 0,Date,Destination
0,2013-01-01,AUH
1,2013-01-01,MIL
2,2013-01-01,SFO
3,2013-01-01,ARN
4,2013-01-01,MAD
5,2013-01-01,BLR
6,2013-01-01,PAR
7,2013-01-01,DUB
8,2013-01-01,ACE
9,2013-01-01,BGW


Trim the `'Destination'` column to make sure we only consider actual codes.

In [43]:
dummy_searches['Destination'] = dummy_searches['Destination'].str.strip()
dummy_searches['Destination'] = dummy_searches['Destination'].str.upper()
dummy_searches.head

<bound method NDFrame.head of           Date Destination
0   2013-01-01         AUH
1   2013-01-01         MIL
2   2013-01-01         SFO
3   2013-01-01         ARN
4   2013-01-01         MAD
..         ...         ...
995 2013-01-01         DTM
996 2013-01-01         PER
997 2013-01-01         HKG
998 2013-01-01         FRA
999 2013-01-01         LON

[1000 rows x 2 columns]>

In [51]:
dummy_searches['Year'] = dummy_searches['Date'].dt.year.astype(str)
dummy_searches['Month'] = dummy_searches['Date'].dt.month.astype(str)
dummy_searches['Year-Month'] = dummy_searches['Year'] + '-' + dummy_searches['Month']
dummy_searches

Unnamed: 0,Date,Destination,Year,Month,Year-Month
0,2013-01-01,AUH,2013,1,2013-1
1,2013-01-01,MIL,2013,1,2013-1
2,2013-01-01,SFO,2013,1,2013-1
3,2013-01-01,ARN,2013,1,2013-1
4,2013-01-01,MAD,2013,1,2013-1
...,...,...,...,...,...
995,2013-01-01,DTM,2013,1,2013-1
996,2013-01-01,PER,2013,1,2013-1
997,2013-01-01,HKG,2013,1,2013-1
998,2013-01-01,FRA,2013,1,2013-1


In [52]:
dummy_searches

Unnamed: 0,Date,Destination,Year,Month,Year-Month
0,2013-01-01,AUH,2013,1,2013-1
1,2013-01-01,MIL,2013,1,2013-1
2,2013-01-01,SFO,2013,1,2013-1
3,2013-01-01,ARN,2013,1,2013-1
4,2013-01-01,MAD,2013,1,2013-1
...,...,...,...,...,...
995,2013-01-01,DTM,2013,1,2013-1
996,2013-01-01,PER,2013,1,2013-1
997,2013-01-01,HKG,2013,1,2013-1
998,2013-01-01,FRA,2013,1,2013-1
