** Script overview **

This script uploads the raw flight schedule data from OAG:

-- MH, 04.02.2020

In [5]:
import pandas as pd
import numpy as np
import math
import os
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

In [6]:
def preview(dataframe, head=5):
    display(dataframe.head(head))
    print("This dataframe has " + str(len(dataframe)) + " observations.")

# 2018

## (2018) Load data

In [7]:
oag18_raw = pd.read_csv('../delivery/OAG_Global_2018.csv')

In [8]:
preview(oag18_raw)

Unnamed: 0,Specific Aircraft Code,Specific Aircraft Name,Arr Airport Code,Arr Airport Name,Dep Airport Code,Dep Airport Name,Frequency,Time series
0,100,Fokker 100,ADE,Aden,JIB,Djibouti,58,2018
1,100,Fokker 100,AMS,Amsterdam,BRU,Brussels Airport,88,2018
2,100,Fokker 100,AMS,Amsterdam,MUC,Munich International Airport,332,2018
3,100,Fokker 100,AZD,Yazd,BND,Bandar Abbas,91,2018
4,100,Fokker 100,BND,Bandar Abbas,AZD,Yazd,91,2018


This dataframe has 183211 observations.


In [9]:
oag18_raw.columns

Index(['Specific Aircraft Code', 'Specific Aircraft Name', 'Arr Airport Code',
       'Arr Airport Name', 'Dep Airport Code', 'Dep Airport Name', 'Frequency',
       'Time series'],
      dtype='object')

In [10]:
oag18_raw = oag18_raw.copy()
oag18_raw.drop('Time series',axis=1,inplace=True)
oag18_raw = oag18_raw.rename(columns={'Specific Aircraft Code': 'ac_code_iata',
                                    'Specific Aircraft Name': 'ac_name_oag',
                                    'Arr Airport Code': 'arr_ap_code',
                                    'Arr Airport Name': 'arr_ap_name',
                                    'Dep Airport Code': 'dep_ap_code',
                                    'Dep Airport Name': 'dep_ap_name',
                                    'Frequency': 'frequency_2018'                                  
                                    })
oag18_raw.head(2)

Unnamed: 0,ac_code_iata,ac_name_oag,arr_ap_code,arr_ap_name,dep_ap_code,dep_ap_name,frequency_2018
0,100,Fokker 100,ADE,Aden,JIB,Djibouti,58
1,100,Fokker 100,AMS,Amsterdam,BRU,Brussels Airport,88


In [11]:
oag18_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 183211 entries, 0 to 183210
Data columns (total 7 columns):
ac_code_iata      183211 non-null object
ac_name_oag       183211 non-null object
arr_ap_code       183211 non-null object
arr_ap_name       183211 non-null object
dep_ap_code       183211 non-null object
dep_ap_name       183211 non-null object
frequency_2018    183211 non-null int64
dtypes: int64(1), object(6)
memory usage: 9.8+ MB


## (2018) Upload data

In [12]:
for col in oag18_raw.columns[:6]:
    print("Maximum string length in "+col+" column: "+str(oag18_raw[col].apply(len).max()))

Maximum string length in ac_code_iata column: 3
Maximum string length in ac_name_oag column: 39
Maximum string length in arr_ap_code column: 3
Maximum string length in arr_ap_name column: 39
Maximum string length in dep_ap_code column: 3
Maximum string length in dep_ap_name column: 39


In [13]:
# Connect to DB
%load_ext ipython_pg.LAV
%pg_connect dbname='mobility' user='heldm'

password for heldm@lav-fileserver.ethz.ch:5433:········
SUCCESS: connected to lav-fileserver.ethz.ch
  PostGIS integration enabled

In [14]:
for col in oag18_raw.columns:
    print('COMMENT ON\nCOLUMN "OAG"."2018_schedule_raw"."{}" IS ;'.format(col))

COMMENT ON
COLUMN "OAG"."2018_schedule_raw"."ac_code_iata" IS ;
COMMENT ON
COLUMN "OAG"."2018_schedule_raw"."ac_name_oag" IS ;
COMMENT ON
COLUMN "OAG"."2018_schedule_raw"."arr_ap_code" IS ;
COMMENT ON
COLUMN "OAG"."2018_schedule_raw"."arr_ap_name" IS ;
COMMENT ON
COLUMN "OAG"."2018_schedule_raw"."dep_ap_code" IS ;
COMMENT ON
COLUMN "OAG"."2018_schedule_raw"."dep_ap_name" IS ;
COMMENT ON
COLUMN "OAG"."2018_schedule_raw"."frequency_2018" IS ;


In [15]:
%pg_sql set role "NDA-OAG-full"

SUCCESS: query did not return any data


<cursor object at 0x00000216916F99E8; closed: 0>

In [13]:
%%pg_sql cur
drop table if exists "OAG"."2018_schedule_raw";

CREATE TABLE "OAG"."2018_schedule_raw" 
("ac_code_iata" varchar(3), 
 "ac_name_oag" varchar(39), 
 "arr_ap_code" varchar(3),
 "arr_ap_name" varchar(39),
 "dep_ap_code" varchar(3),
 "dep_ap_name" varchar(39),
 "frequency_2018" float
);

COMMENT ON
COLUMN "OAG"."2018_schedule_raw"."ac_code_iata" IS 'IATA aircraft code';
COMMENT ON
COLUMN "OAG"."2018_schedule_raw"."ac_name_oag" IS 'OAG aircraft name';
COMMENT ON
COLUMN "OAG"."2018_schedule_raw"."arr_ap_code" IS 'IATA arrival aiport code';
COMMENT ON
COLUMN "OAG"."2018_schedule_raw"."arr_ap_name" IS 'IATA arrival aiport name';
COMMENT ON
COLUMN "OAG"."2018_schedule_raw"."dep_ap_code" IS 'IATA departure aiport code';
COMMENT ON
COLUMN "OAG"."2018_schedule_raw"."dep_ap_name" IS 'IATA departure aiport name';
COMMENT ON
COLUMN "OAG"."2018_schedule_raw"."frequency_2018" IS 'number of flights for this segment in 2012';

COMMIT;

SUCCESS: query did not return any data
 cursor object as 'cur'


In [14]:
%pg_copy oag18_raw OAG.2018_schedule_raw

  waring: green-mode temporarily deactivated (interrupt won't abort the import)  green mode reactivated

In [15]:
%pg_disconnect

# 2012

## (2012) Load data

In [6]:
oag12_raw = pd.read_csv('../delivery/OAG_Global_2012.csv')

In [8]:
preview(oag12_raw)

Unnamed: 0,Specific Aircraft Code,Specific Aircraft Name,Arr Airport Code,Arr Airport Name,Dep Airport Code,Dep Airport Name,Frequency,Time series
0,100,Fokker 100,AGT,Ciudad del Este,ASU,Asuncion,176,2012
1,100,Fokker 100,AMS,Amsterdam,LUX,Luxembourg,1,2012
2,100,Fokker 100,ASU,Asuncion,AGT,Ciudad del Este,225,2012
3,100,Fokker 100,BHX,Birmingham Airport,ZRH,Zurich Airport,668,2012
4,100,Fokker 100,BIO,Bilbao,LIS,Lisbon,1,2012


This dataframe has 136743 observations.


In [9]:
oag12_raw.columns

Index(['Specific Aircraft Code', 'Specific Aircraft Name', 'Arr Airport Code',
       'Arr Airport Name', 'Dep Airport Code', 'Dep Airport Name', 'Frequency',
       'Time series'],
      dtype='object')

In [10]:
oag12_raw = oag12_raw.copy()
oag12_raw.drop('Time series',axis=1,inplace=True)
oag12_raw = oag12_raw.rename(columns={'Specific Aircraft Code': 'ac_code_iata',
                                    'Specific Aircraft Name': 'ac_name_oag',
                                    'Arr Airport Code': 'arr_ap_code',
                                    'Arr Airport Name': 'arr_ap_name',
                                    'Dep Airport Code': 'dep_ap_code',
                                    'Dep Airport Name': 'dep_ap_name',
                                    'Frequency': 'frequency_2012'                                  
                                    })
oag12_raw.head(2)

Unnamed: 0,ac_code_iata,ac_name_oag,arr_ap_code,arr_ap_name,dep_ap_code,dep_ap_name,frequency_2012
0,100,Fokker 100,AGT,Ciudad del Este,ASU,Asuncion,176
1,100,Fokker 100,AMS,Amsterdam,LUX,Luxembourg,1


In [11]:
oag12_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 136743 entries, 0 to 136742
Data columns (total 7 columns):
ac_code_iata      136743 non-null object
ac_name_oag       136743 non-null object
arr_ap_code       136743 non-null object
arr_ap_name       136743 non-null object
dep_ap_code       136743 non-null object
dep_ap_name       136743 non-null object
frequency_2012    136743 non-null int64
dtypes: int64(1), object(6)
memory usage: 7.3+ MB


## (2012) Upload data

In [12]:
for col in oag12_raw.columns[:6]:
    print("Maximum string length in "+col+" column: "+str(oag12_raw[col].apply(len).max()))

Maximum string length in ac_code_iata column: 3
Maximum string length in ac_name_oag column: 39
Maximum string length in arr_ap_code column: 3
Maximum string length in arr_ap_name column: 39
Maximum string length in dep_ap_code column: 3
Maximum string length in dep_ap_name column: 39


In [22]:
# Connect to DB
%load_ext ipython_pg.LAV
%pg_connect dbname='mobility' user='heldm'

The ipython_pg.LAV extension is already loaded. To reload it, use:
  %reload_ext ipython_pg.LAV
password for heldm@lav-fileserver.ethz.ch:5433:········
SUCCESS: connected to lav-fileserver.ethz.ch
  PostGIS integration enabled

In [23]:
for col in oag12_raw.columns:
    print('COMMENT ON\nCOLUMN "OAG"."2012_schedule_raw"."{}" IS ;'.format(col))

COMMENT ON
COLUMN "OAG"."2012_schedule_raw"."ac_code_iata" IS ;
COMMENT ON
COLUMN "OAG"."2012_schedule_raw"."ac_name_oag" IS ;
COMMENT ON
COLUMN "OAG"."2012_schedule_raw"."arr_ap_code" IS ;
COMMENT ON
COLUMN "OAG"."2012_schedule_raw"."arr_ap_name" IS ;
COMMENT ON
COLUMN "OAG"."2012_schedule_raw"."dep_ap_code" IS ;
COMMENT ON
COLUMN "OAG"."2012_schedule_raw"."dep_ap_name" IS ;
COMMENT ON
COLUMN "OAG"."2012_schedule_raw"."frequency_2012" IS ;


In [24]:
%pg_sql set role "NDA-OAG-full"

SUCCESS: query did not return any data


<cursor object at 0x000001E5E8EA5E48; closed: 0>

In [25]:
%%pg_sql cur
drop table if exists "OAG"."2012_schedule_raw";

CREATE TABLE "OAG"."2012_schedule_raw" 
("ac_code_iata" varchar(3), 
 "ac_name_oag" varchar(39), 
 "arr_ap_code" varchar(3),
 "arr_ap_name" varchar(39),
 "dep_ap_code" varchar(3),
 "dep_ap_name" varchar(39),
 "frequency_2012" float
);

COMMENT ON
COLUMN "OAG"."2012_schedule_raw"."ac_code_iata" IS 'IATA aircraft code';
COMMENT ON
COLUMN "OAG"."2012_schedule_raw"."ac_name_oag" IS 'OAG aircraft name';
COMMENT ON
COLUMN "OAG"."2012_schedule_raw"."arr_ap_code" IS 'IATA arrival aiport code';
COMMENT ON
COLUMN "OAG"."2012_schedule_raw"."arr_ap_name" IS 'IATA arrival aiport name';
COMMENT ON
COLUMN "OAG"."2012_schedule_raw"."dep_ap_code" IS 'IATA departure aiport code';
COMMENT ON
COLUMN "OAG"."2012_schedule_raw"."dep_ap_name" IS 'IATA departure aiport name';
COMMENT ON
COLUMN "OAG"."2012_schedule_raw"."frequency_2012" IS 'number of flights for this segment in 2012';

COMMIT;

SUCCESS: query did not return any data
 cursor object as 'cur'


In [26]:
%pg_copy oag12_raw OAG.2012_schedule_raw

  waring: green-mode temporarily deactivated (interrupt won't abort the import)  green mode reactivated

In [27]:
%pg_disconnect