# Kepler.gl fun with Openflights data

In [1]:
import pandas as pd
import numpy as np

## 1. Data Loading

In [2]:
routes_header = ['airline', 'airline ID','source airport','source airport ID','destination airport','destination airport ID', 'code share', 'stops','equipment']
airports_header = ['id','source airport', 'city','country','source code1','source code2', 'lattitude','longitude','number1','number2','letter','region', 'airport2','airport3']

routes = pd.read_csv("../data/routes.dat",header=None,names=routes_header,skiprows=1)
airports = pd.read_csv("../data/airports.csv",header=None,names=airports_header,skiprows=1)

## 2. Prelim EDA

In [3]:
airports.head()

Unnamed: 0,id,source airport,city,country,source code1,source code2,lattitude,longitude,number1,number2,letter,region,airport2,airport3
0,2,Madang Airport,Madang,Papua New Guinea,MAG,AYMD,-5.20708,145.789001,20,10,U,Pacific/Port_Moresby,airport,OurAirports
1,3,Mount Hagen Kagamuga Airport,Mount Hagen,Papua New Guinea,HGU,AYMH,-5.82679,144.296005,5388,10,U,Pacific/Port_Moresby,airport,OurAirports
2,4,Nadzab Airport,Nadzab,Papua New Guinea,LAE,AYNZ,-6.569803,146.725977,239,10,U,Pacific/Port_Moresby,airport,OurAirports
3,5,Port Moresby Jacksons International Airport,Port Moresby,Papua New Guinea,POM,AYPY,-9.44338,147.220001,146,10,U,Pacific/Port_Moresby,airport,OurAirports
4,6,Wewak International Airport,Wewak,Papua New Guinea,WWK,AYWK,-3.58383,143.669006,19,10,U,Pacific/Port_Moresby,airport,OurAirports


**Observations:** This data set is ready to be used in Kepler.gl taking the `lattitude` and `longitude` fields paired with the `source airport` for the airport names

In [4]:
routes.head()

Unnamed: 0,airline,airline ID,source airport,source airport ID,destination airport,destination airport ID,code share,stops,equipment
0,2B,410,ASF,2966,KZN,2990,,0,CR2
1,2B,410,ASF,2966,MRV,2962,,0,CR2
2,2B,410,CEK,2968,KZN,2990,,0,CR2
3,2B,410,CEK,2968,OVB,4078,,0,CR2
4,2B,410,DME,4029,KZN,2990,,0,CR2


In [5]:
routes.shape

(67662, 9)

**Observations:** This dataset requires the addition of coordinates for the source airports and destination airports

## 3. Adding route coordinates for origin and destination

In [6]:
orig_routes = pd.merge(left = routes, right = airports, how = 'inner', left_on = 'source airport', right_on = 'source code1')

In [7]:
orig_routes.columns

Index(['airline', 'airline ID', 'source airport_x', 'source airport ID',
       'destination airport', 'destination airport ID', 'code share', 'stops',
       'equipment', 'id', 'source airport_y', 'city', 'country',
       'source code1', 'source code2', 'lattitude', 'longitude', 'number1',
       'number2', 'letter', 'region', 'airport2', 'airport3'],
      dtype='object')

In [8]:
orig_routes = orig_routes[['airline', 'airline ID', 'source airport_x', 'source airport ID',
       'destination airport', 'destination airport ID', 'stops',
        'source airport_y', 'city', 'country',
       'source code1', 'lattitude', 'longitude']]

In [9]:
orig_routes = orig_routes.rename(columns = {'city':'source_city','country':'source_country','lattitude': 'source_lat', 'longitude': 'source_lon'})

In [10]:
orig_routes.shape

(66812, 13)

In [11]:
dest_orig_routes = pd.merge(left = orig_routes, right = airports, how = 'inner', left_on = 'destination airport', right_on = 'source code1')

In [12]:
dest_orig_routes.columns

Index(['airline', 'airline ID', 'source airport_x', 'source airport ID',
       'destination airport', 'destination airport ID', 'stops',
       'source airport_y', 'source_city', 'source_country', 'source code1_x',
       'source_lat', 'source_lon', 'id', 'source airport', 'city', 'country',
       'source code1_y', 'source code2', 'lattitude', 'longitude', 'number1',
       'number2', 'letter', 'region', 'airport2', 'airport3'],
      dtype='object')

In [13]:
dest_orig_routes = dest_orig_routes[['airline', 'airline ID', 'source airport_x', 
       'source airport_y', 'source_city', 'source_country',
       'source_lat', 'source_lon', 'source airport', 'city', 'country',
       'source code1_y', 'lattitude', 'longitude' ]]

In [14]:
dest_orig_routes.head()

Unnamed: 0,airline,airline ID,source airport_x,source airport_y,source_city,source_country,source_lat,source_lon,source airport,city,country,source code1_y,lattitude,longitude
0,2B,410,ASF,Astrakhan Airport,Astrakhan,Russia,46.283298,48.006302,Kazan International Airport,Kazan,Russia,KZN,55.606201,49.278702
1,2B,410,CEK,Chelyabinsk Balandino Airport,Chelyabinsk,Russia,55.305801,61.5033,Kazan International Airport,Kazan,Russia,KZN,55.606201,49.278702
2,2B,410,DME,Domodedovo International Airport,Moscow,Russia,55.408798,37.9063,Kazan International Airport,Kazan,Russia,KZN,55.606201,49.278702
3,S7,4329,DME,Domodedovo International Airport,Moscow,Russia,55.408798,37.9063,Kazan International Airport,Kazan,Russia,KZN,55.606201,49.278702
4,U6,5234,DME,Domodedovo International Airport,Moscow,Russia,55.408798,37.9063,Kazan International Airport,Kazan,Russia,KZN,55.606201,49.278702


In [15]:
routes_coord = dest_orig_routes.rename(columns={'source airport_x': 'source_airport','source airport': 'destination airport', 'city':'dest_city',
                                'country':'dest_country', 'source code1_y':'dest_code', 'lattitude': 'dest_lat', 'longitude':'dest_lon'})

## 4. Add filters for the Caribbean region

In [16]:
#Lets define the countries of interest

Caribbean = ['Anguilla',
'Antigua and Barbuda',
'Aruba',
'Bahamas',
'Barbados',
'British Virgin Islands',
'Caribbean Netherlands',
'Cayman Islands',
'Cuba',
'Curaçao',
'Dominica',
'Dominican Republic',
'Grenada',
'Guadeloupe',
'Guyana',             
'Haiti',
'Jamaica',
'Martinique',
'Montserrat',
'Puerto Rico',
'Saint Kitts and Nevis',
'Saint Lucia',
'Saint Vincent and the Grenadines',
'Sint Maarten',
'Trinidad and Tobago',
'Turks and Caicos Islands',
'United States Virgin Islands']

In [17]:
routes_coord['Caribbean_orig'] = routes_coord.apply(lambda x: 1 if x['source_country'] in Caribbean else 0, axis=1)

In [18]:
routes_coord['Caribbean_dest'] = routes_coord.apply(lambda x: 1 if x['dest_country'] in Caribbean else 0, axis=1)

In [19]:
routes_coord.head()

Unnamed: 0,airline,airline ID,source_airport,source airport_y,source_city,source_country,source_lat,source_lon,destination airport,dest_city,dest_country,dest_code,dest_lat,dest_lon,Caribbean_orig,Caribbean_dest
0,2B,410,ASF,Astrakhan Airport,Astrakhan,Russia,46.283298,48.006302,Kazan International Airport,Kazan,Russia,KZN,55.606201,49.278702,0,0
1,2B,410,CEK,Chelyabinsk Balandino Airport,Chelyabinsk,Russia,55.305801,61.5033,Kazan International Airport,Kazan,Russia,KZN,55.606201,49.278702,0,0
2,2B,410,DME,Domodedovo International Airport,Moscow,Russia,55.408798,37.9063,Kazan International Airport,Kazan,Russia,KZN,55.606201,49.278702,0,0
3,S7,4329,DME,Domodedovo International Airport,Moscow,Russia,55.408798,37.9063,Kazan International Airport,Kazan,Russia,KZN,55.606201,49.278702,0,0
4,U6,5234,DME,Domodedovo International Airport,Moscow,Russia,55.408798,37.9063,Kazan International Airport,Kazan,Russia,KZN,55.606201,49.278702,0,0


In [21]:
routes_coord.to_csv('../data/routes_coord.csv')

## 5. Load Kepler.gl

In [24]:
# Load an empty map
from keplergl import KeplerGl
map_1 = KeplerGl()
map_1

User Guide: https://github.com/keplergl/kepler.gl/blob/master/docs/keplergl-jupyter/user-guide.md


KeplerGl()

In [25]:
# DataFrame
map_1.add_data(data=routes_coord, name='data_1')

In [28]:
config1 = {'version': 'v1',
 'config': {'visState': {'filters': [{'dataId': 'data_1',
     'id': 'qpn5lccc',
     'name': 'Caribbean_orig',
     'type': 'range',
     'value': [0.995, 1],
     'enlarged': False,
     'plotType': 'histogram',
     'yAxis': None},
    {'dataId': 'data_1',
     'id': 'j2xd1qlx',
     'name': 'Caribbean_dest',
     'type': 'range',
     'value': [1, 1],
     'enlarged': False,
     'plotType': 'histogram',
     'yAxis': None}],
   'layers': [{'id': 'zini9fi',
     'type': 'point',
     'config': {'dataId': 'data_1',
      'label': 'source',
      'color': [18, 92, 119],
      'columns': {'lat': 'source_lat', 'lng': 'source_lon', 'altitude': None},
      'isVisible': True,
      'visConfig': {'radius': 10,
       'fixedRadius': False,
       'opacity': 0.8,
       'outline': False,
       'thickness': 2,
       'strokeColor': None,
       'colorRange': {'name': 'Global Warming',
        'type': 'sequential',
        'category': 'Uber',
        'colors': ['#5A1846',
         '#900C3F',
         '#C70039',
         '#E3611C',
         '#F1920E',
         '#FFC300']},
       'strokeColorRange': {'name': 'Global Warming',
        'type': 'sequential',
        'category': 'Uber',
        'colors': ['#5A1846',
         '#900C3F',
         '#C70039',
         '#E3611C',
         '#F1920E',
         '#FFC300']},
       'radiusRange': [0, 50],
       'filled': True},
      'textLabel': [{'field': None,
        'color': [255, 255, 255],
        'size': 18,
        'offset': [0, 0],
        'anchor': 'start',
        'alignment': 'center'}]},
     'visualChannels': {'colorField': None,
      'colorScale': 'quantile',
      'strokeColorField': None,
      'strokeColorScale': 'quantile',
      'sizeField': None,
      'sizeScale': 'linear'}},
    {'id': 'jltngxc',
     'type': 'point',
     'config': {'dataId': 'data_1',
      'label': 'dest',
      'color': [77, 193, 156],
      'columns': {'lat': 'dest_lat', 'lng': 'dest_lon', 'altitude': None},
      'isVisible': False,
      'visConfig': {'radius': 10,
       'fixedRadius': False,
       'opacity': 0.8,
       'outline': False,
       'thickness': 2,
       'strokeColor': None,
       'colorRange': {'name': 'Global Warming',
        'type': 'sequential',
        'category': 'Uber',
        'colors': ['#5A1846',
         '#900C3F',
         '#C70039',
         '#E3611C',
         '#F1920E',
         '#FFC300']},
       'strokeColorRange': {'name': 'Global Warming',
        'type': 'sequential',
        'category': 'Uber',
        'colors': ['#5A1846',
         '#900C3F',
         '#C70039',
         '#E3611C',
         '#F1920E',
         '#FFC300']},
       'radiusRange': [0, 50],
       'filled': True},
      'textLabel': [{'field': None,
        'color': [255, 255, 255],
        'size': 18,
        'offset': [0, 0],
        'anchor': 'start',
        'alignment': 'center'}]},
     'visualChannels': {'colorField': None,
      'colorScale': 'quantile',
      'strokeColorField': None,
      'strokeColorScale': 'quantile',
      'sizeField': None,
      'sizeScale': 'linear'}},
    {'id': '7pc1xcl',
     'type': 'arc',
     'config': {'dataId': 'data_1',
      'label': 'source -> dest arc',
      'color': [146, 38, 198],
      'columns': {'lat0': 'source_lat',
       'lng0': 'source_lon',
       'lat1': 'dest_lat',
       'lng1': 'dest_lon'},
      'isVisible': False,
      'visConfig': {'opacity': 0.8,
       'thickness': 2,
       'colorRange': {'name': 'Global Warming',
        'type': 'sequential',
        'category': 'Uber',
        'colors': ['#5A1846',
         '#900C3F',
         '#C70039',
         '#E3611C',
         '#F1920E',
         '#FFC300']},
       'sizeRange': [0, 10],
       'targetColor': None},
      'textLabel': [{'field': None,
        'color': [255, 255, 255],
        'size': 18,
        'offset': [0, 0],
        'anchor': 'start',
        'alignment': 'center'}]},
     'visualChannels': {'colorField': None,
      'colorScale': 'quantile',
      'sizeField': None,
      'sizeScale': 'linear'}},
    {'id': 'kv40z77',
     'type': 'line',
     'config': {'dataId': 'data_1',
      'label': 'source -> dest line',
      'color': [119, 110, 87],
      'columns': {'lat0': 'source_lat',
       'lng0': 'source_lon',
       'lat1': 'dest_lat',
       'lng1': 'dest_lon'},
      'isVisible': True,
      'visConfig': {'opacity': 0.8,
       'thickness': 2,
       'colorRange': {'name': 'Global Warming',
        'type': 'sequential',
        'category': 'Uber',
        'colors': ['#5A1846',
         '#900C3F',
         '#C70039',
         '#E3611C',
         '#F1920E',
         '#FFC300']},
       'sizeRange': [0, 10],
       'targetColor': None},
      'textLabel': [{'field': None,
        'color': [255, 255, 255],
        'size': 18,
        'offset': [0, 0],
        'anchor': 'start',
        'alignment': 'center'}]},
     'visualChannels': {'colorField': None,
      'colorScale': 'quantile',
      'sizeField': None,
      'sizeScale': 'linear'}}],
   'interactionConfig': {'tooltip': {'fieldsToShow': {'data_1': ['airline',
       'airline ID',
       'source_airport',
       'source airport_y',
       'source_city']},
     'enabled': True},
    'brush': {'size': 0.5, 'enabled': False}},
   'layerBlending': 'normal',
   'splitMaps': [],
   'animationConfig': {'currentTime': None, 'speed': 1}},
  'mapState': {'bearing': 0,
   'dragRotate': False,
   'latitude': 17.01472777812096,
   'longitude': -70.38077644033638,
   'pitch': 0,
   'zoom': 3.604471261425981,
   'isSplit': False},
  'mapStyle': {'styleType': 'light',
   'topLayerGroups': {},
   'visibleLayerGroups': {'label': True,
    'road': True,
    'border': False,
    'building': True,
    'water': True,
    'land': True,
    '3d building': False},
   'threeDBuildingColor': [218.82023004728686,
    223.47597962276103,
    223.47597962276103],
   'mapStyles': {}}}}