In [13]:
!pip install networkx geopy

Collecting geopy
  Downloading https://files.pythonhosted.org/packages/53/fc/3d1b47e8e82ea12c25203929efb1b964918a77067a874b2c7631e2ec35ec/geopy-1.21.0-py2.py3-none-any.whl (104kB)
Collecting geographiclib<2,>=1.49 (from geopy)
  Downloading https://files.pythonhosted.org/packages/8b/62/26ec95a98ba64299163199e95ad1b0e34ad3f4e176e221c40245f211e425/geographiclib-1.50-py3-none-any.whl
Installing collected packages: geographiclib, geopy
Successfully installed geographiclib-1.50 geopy-1.21.0


You are using pip version 19.0.3, however version 20.0.2 is available.
You should consider upgrading via the 'python -m pip install --upgrade pip' command.


In [1]:
import networkx as nx
import os
import json
import pandas as pd
import numpy as np
import geopy as gp
from geopy import distance as gp_distance

#### Work with airports data

In [2]:
airports = pd.read_csv('./airports.csv', names=['Airport_ID', 'Name', 'City', 'Country', 'IATA', 'ICAO', 'Latitude', 'Longitude', 'Altitide', 'TimeZone', 'DST', 'Tz_database_time_zone', 'Type', 'Source'])

In [3]:
airports['Airport_ID'] = airports['Airport_ID'].astype(str)
airports.head()

Unnamed: 0,Airport_ID,Name,City,Country,IATA,ICAO,Latitude,Longitude,Altitide,TimeZone,DST,Tz_database_time_zone,Type,Source
0,1,Goroka Airport,Goroka,Papua New Guinea,GKA,AYGA,-6.08169,145.391998,5282,10,U,Pacific/Port_Moresby,airport,OurAirports
1,2,Madang Airport,Madang,Papua New Guinea,MAG,AYMD,-5.20708,145.789001,20,10,U,Pacific/Port_Moresby,airport,OurAirports
2,3,Mount Hagen Kagamuga Airport,Mount Hagen,Papua New Guinea,HGU,AYMH,-5.82679,144.296005,5388,10,U,Pacific/Port_Moresby,airport,OurAirports
3,4,Nadzab Airport,Nadzab,Papua New Guinea,LAE,AYNZ,-6.569803,146.725977,239,10,U,Pacific/Port_Moresby,airport,OurAirports
4,5,Port Moresby Jacksons International Airport,Port Moresby,Papua New Guinea,POM,AYPY,-9.44338,147.220001,146,10,U,Pacific/Port_Moresby,airport,OurAirports


In [4]:
airport_useful_columns = airports[['Airport_ID', 'Country', 'Name']]
airport_useful_columns.to_json('airport_columns.json', orient='records')

In [None]:
#### Work with routes data

In [5]:
routes = pd.read_csv('./routes.csv', names=['Airline', 'Airline_ID', 'Source_airport', 'Source_airport_ID', 'Destination_airport', 'Destination_airport_ID', 'Codeshare', 'Stops', 'Equipment'])
cols = ["Source_airport_ID","Destination_airport_ID"]
routes[cols] = routes[cols].replace({r'\N':np.nan})
routes = routes[routes['Source_airport_ID'].notna()]
routes = routes[routes['Destination_airport_ID'].notna()]

In [7]:
routes.head()

Unnamed: 0,Airline,Airline_ID,Source_airport,Source_airport_ID,Destination_airport,Destination_airport_ID,Codeshare,Stops,Equipment
0,2B,410,AER,2965,KZN,2990,,0,CR2
1,2B,410,ASF,2966,KZN,2990,,0,CR2
2,2B,410,ASF,2966,MRV,2962,,0,CR2
3,2B,410,CEK,2968,KZN,2990,,0,CR2
4,2B,410,CEK,2968,OVB,4078,,0,CR2


In [None]:
#### Merge airports for source and destination airport cooordinates

In [8]:
routes = routes.merge( airports[['Airport_ID', 'Latitude', 'Longitude']], left_on='Source_airport_ID', right_on='Airport_ID')
routes = routes.drop(columns=['Airport_ID'])
routes = routes.rename(columns={'Latitude': 'Source_Latitude', 'Longitude': 'Source_Longitude'})

In [9]:
routes = routes.merge( airports[['Airport_ID', 'Latitude', 'Longitude']], left_on='Destination_airport_ID', right_on='Airport_ID')
routes = routes.drop(columns=['Airport_ID'])
routes = routes.rename(columns={'Latitude': 'Destination_Latitude', 'Longitude': 'Destination_Longitude'})

In [None]:
# Work with airlines data

In [10]:
airline_df = pd.read_csv('airlines.csv', names=['Airline_ID', 'Airline_Name','Alias', 'IATA', 'ICAO', 'Callsign', 'Country', 'Active' ])

In [11]:
airline_df.head()

Unnamed: 0,Airline_ID,Airline_Name,Alias,IATA,ICAO,Callsign,Country,Active
0,-1,Unknown,\N,-,,\N,\N,Y
1,1,Private flight,\N,-,,,,Y
2,2,135 Airways,\N,,GNL,GENERAL,United States,N
3,3,1Time Airline,\N,1T,RNX,NEXTIME,South Africa,Y
4,4,2 Sqn No 1 Elementary Flying Training School,\N,,WYT,,United Kingdom,N


In [None]:
# Use only airlines that are active (this is not enirely reliable)

In [12]:
active_airline_df = airline_df[airline_df['Active']=='Y']

In [13]:
active_airline_df[['Airline_ID', 'Airline_Name']].to_json('airlines.json', orient='records')

In [14]:
active_airline_df['Airline_ID'] = active_airline_df['Airline_ID'].astype(str)
active_airline_df.shape

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


(1255, 8)

In [None]:
#### Merge routes with airlines to get airline names

In [15]:
# This is done to remove routes from deactivated airlines
routes = routes.merge(active_airline_df[['Airline_ID', 'Airline_Name']], left_on='Airline_ID', right_on='Airline_ID')

In [18]:
routes.head()

Unnamed: 0,Airline,Airline_ID,Source_airport,Source_airport_ID,Destination_airport,Destination_airport_ID,Codeshare,Stops,Equipment,Source_Latitude,Source_Longitude,Destination_Latitude,Destination_Longitude,Airline_Name
0,2B,410,AER,2965,KZN,2990,,0,CR2,43.449902,39.9566,55.606201,49.278702,Aerocondor
1,2B,410,ASF,2966,KZN,2990,,0,CR2,46.283298,48.006302,55.606201,49.278702,Aerocondor
2,2B,410,CEK,2968,KZN,2990,,0,CR2,55.305801,61.5033,55.606201,49.278702,Aerocondor
3,2B,410,DME,4029,KZN,2990,,0,CR2,55.408798,37.9063,55.606201,49.278702,Aerocondor
4,2B,410,EGO,6156,KZN,2990,,0,CR2,50.643799,36.590099,55.606201,49.278702,Aerocondor


In [19]:
routes = routes.dropna(subset=['Source_Latitude', 'Source_Longitude', 'Destination_Latitude', 'Destination_Longitude'])

------
### Calculate distance using geopy
We will be using geodisc function for caculating distance

In [20]:
def calculate_distance_in_km(row):
    source_tuple = (row['Source_Latitude'], row['Source_Longitude'])
    destination_tuple = (row['Destination_Latitude'], row['Destination_Longitude'])
#     distance = distance.geodesic(source_tuple, destination_tuple).km
    row['Distance'] = gp_distance.geodesic(source_tuple, destination_tuple).km
    return row

In [21]:
routes = routes.apply(calculate_distance_in_km, axis=1)

In [24]:
routes.head(20)

Unnamed: 0,Airline,Airline_ID,Source_airport,Source_airport_ID,Destination_airport,Destination_airport_ID,Codeshare,Stops,Equipment,Source_Latitude,Source_Longitude,Destination_Latitude,Destination_Longitude,Airline_Name,Distance
0,2B,410,AER,2965,KZN,2990,,0,CR2,43.449902,39.9566,55.606201,49.278702,Aerocondor,1507.98968
1,2B,410,ASF,2966,KZN,2990,,0,CR2,46.283298,48.006302,55.606201,49.278702,Aerocondor,1040.943207
2,2B,410,CEK,2968,KZN,2990,,0,CR2,55.305801,61.5033,55.606201,49.278702,Aerocondor,773.126239
3,2B,410,DME,4029,KZN,2990,,0,CR2,55.408798,37.9063,55.606201,49.278702,Aerocondor,718.084202
4,2B,410,EGO,6156,KZN,2990,,0,CR2,50.643799,36.590099,55.606201,49.278702,Aerocondor,1010.815885
5,2B,410,LED,2948,KZN,2990,,0,CR2,59.800301,30.262501,55.606201,49.278702,Aerocondor,1220.784291
6,2B,410,SVX,2975,KZN,2990,,0,CR2,56.743099,60.8027,55.606201,49.278702,Aerocondor,725.966451
7,2B,410,KZN,2990,DME,4029,,0,CR2,55.606201,49.278702,55.408798,37.9063,Aerocondor,718.084202
8,2B,410,NBC,6969,DME,4029,,0,CR2,55.564701,52.092499,55.408798,37.9063,Aerocondor,895.419005
9,2B,410,UUA,6160,DME,4029,,0,CR2,54.639999,52.801701,55.408798,37.9063,Aerocondor,954.62875


### Export combined data file

In [23]:
routes.to_csv('combined_data.csv')

In [30]:
data_df = pd.read_csv(os.path.join('combined_data.csv'), header=0)

  interactivity=interactivity, compiler=compiler, result=result)


In [None]:
# create network from dataframe and do basic testing and exploration

In [31]:
G = nx.from_pandas_edgelist(data_df,'Source_airport_ID','Destination_airport_ID',edge_attr='Distance',create_using=nx.DiGraph())
# nx.set_node_attributes(G,airports.Latitude.copy().rename(airports.IATA).to_dict(),'Latitude')
# nx.set_node_attributes(G,airports.Longitude.copy().rename(airports.IATA).to_dict(),'Longitude')
# nx.set_node_attributes(G,airport.Final_Continent.copy().rename(airport.IATA).to_dict(),'Continent')

In [34]:
nx.shortest_path(G, source=4364, target=2965, weight='Distance')


[4364, 2975, 2965]

In [77]:
paths = nx.all_shortest_paths(G, source='6156', target='2965', weight='Distance')


In [78]:
for path in paths:
    print(path)

['6156', '2960', '2965']


In [79]:
simple_paths = nx.all_simple_paths(G, source='6156', target='2965', cutoff=2)


In [None]:
for path in simple_paths:
    print(path)

In [81]:
routes.to_csv('combined_data.csv')