In [None]:
import requests
import json
import pandas as pd
import numpy as np
from pymongo import MongoClient
from itertools import combinations
from geopy.distance import great_circle
import config


In [None]:
airport_data = pd.read_csv('/content/drive/My Drive/airport-codes_csv.csv')
airport_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 57421 entries, 0 to 57420
Data columns (total 12 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   ident         57421 non-null  object 
 1   type          57421 non-null  object 
 2   name          57421 non-null  object 
 3   elevation_ft  49608 non-null  float64
 4   continent     28978 non-null  object 
 5   iso_country   57175 non-null  object 
 6   iso_region    57421 non-null  object 
 7   municipality  51527 non-null  object 
 8   gps_code      41561 non-null  object 
 9   iata_code     9225 non-null   object 
 10  local_code    30030 non-null  object 
 11  coordinates   57421 non-null  object 
dtypes: float64(1), object(11)
memory usage: 5.3+ MB


In [None]:
airport_data = airport_data.dropna(axis=0, subset=['iata_code'])
airport_data = airport_data.drop(labels=['elevation_ft', 'local_code', 'gps_code'], axis=1)



In [None]:
airport_codes = airport_data.iata_code.unique()
us_airport_codes = airport_data[airport_data.iso_country=='US']
large_airport_codes = airport_data[(airport_data['type']=='large_airport')]


In [None]:
def fix_coords(row):
    order = [1, 0]
    split_stripped_coords = [x.strip() for x in row.split(',')]
    reordered = tuple(split_stripped_coords[i] for i in order)
    return reordered

large_airport_codes['correct_coords'] = large_airport_codes.coordinates.apply(fix_coords)
large_airport_codes=large_airport_codes.drop(labels=['coordinates', 'type'],axis=1)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


In [None]:
large_airport_codes.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 606 entries, 11927 to 57414
Data columns (total 8 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   ident           606 non-null    object
 1   name            606 non-null    object
 2   continent       402 non-null    object
 3   iso_country     605 non-null    object
 4   iso_region      606 non-null    object
 5   municipality    600 non-null    object
 6   iata_code       606 non-null    object
 7   correct_coords  606 non-null    object
dtypes: object(8)
memory usage: 42.6+ KB


In [None]:

combs = list(combinations(large_airport_codes.iata_code.unique(), 2))

In [None]:
list_to_df = []

for combination in combs: 
  loc1_coords = large_airport_codes.correct_coords[large_airport_codes.iata_code==combination[0]].iloc[0]
  loc2_coords = large_airport_codes.correct_coords[large_airport_codes.iata_code==combination[1]].iloc[0]

  loc1_name = large_airport_codes.name[large_airport_codes.iata_code==combination[0]].iloc[0]
  loc2_name = large_airport_codes.name[large_airport_codes.iata_code==combination[1]].iloc[0]


  gc_distance = great_circle(loc1_coords, loc2_coords).mi 
  list_to_df.append([combination[0],loc1_name, loc1_coords, combination[1],loc2_name, loc2_coords, gc_distance])

In [None]:
list_to_df[:10]

[['POM',
  'Port Moresby Jacksons International Airport',
  ('-9.443380355834961', '147.22000122070312'),
  'KEF',
  'Keflavik International Airport',
  ('63.985001', '-22.6056'),
  8635.37883637017],
 ['POM',
  'Port Moresby Jacksons International Airport',
  ('-9.443380355834961', '147.22000122070312'),
  'PRN',
  'PriÅ¡tina International Airport',
  ('42.5728', '21.035801'),
  8476.052502788527],
 ['POM',
  'Port Moresby Jacksons International Airport',
  ('-9.443380355834961', '147.22000122070312'),
  'YEG',
  'Edmonton International Airport',
  ('53.309700012200004', '-113.580001831'),
  7120.067716968816],
 ['POM',
  'Port Moresby Jacksons International Airport',
  ('-9.443380355834961', '147.22000122070312'),
  'YHZ',
  'Halifax / Stanfield International Airport',
  ('44.8807983398', '-63.5085983276'),
  9381.188649598402],
 ['POM',
  'Port Moresby Jacksons International Airport',
  ('-9.443380355834961', '147.22000122070312'),
  'YOW',
  'Ottawa Macdonald-Cartier International 

In [None]:
route_distances = pd.DataFrame(list_to_df, columns=['origin_code','origin_name', 'origin_coords', 'dest_code', 'dest_name', 'dest_coords', 'gc_distance'])

In [None]:
route_distances.head(5)

Unnamed: 0,origin_code,origin_name,origin_coords,dest_code,dest_name,dest_coords,gc_distance
0,POM,Port Moresby Jacksons International Airport,"(-9.443380355834961, 147.22000122070312)",KEF,Keflavik International Airport,"(63.985001, -22.6056)",8635.378836
1,POM,Port Moresby Jacksons International Airport,"(-9.443380355834961, 147.22000122070312)",PRN,PriÅ¡tina International Airport,"(42.5728, 21.035801)",8476.052503
2,POM,Port Moresby Jacksons International Airport,"(-9.443380355834961, 147.22000122070312)",YEG,Edmonton International Airport,"(53.309700012200004, -113.580001831)",7120.067717
3,POM,Port Moresby Jacksons International Airport,"(-9.443380355834961, 147.22000122070312)",YHZ,Halifax / Stanfield International Airport,"(44.8807983398, -63.5085983276)",9381.18865
4,POM,Port Moresby Jacksons International Airport,"(-9.443380355834961, 147.22000122070312)",YOW,Ottawa Macdonald-Cartier International Airport,"(45.3224983215332, -75.66919708251953)",8890.278824


In [None]:
def get_params(row):
  if row[6]<300:
    id = "passenger_flight-route_type_na-aircraft_type_na-distance_lt_300mi-class_na-contrails_na"
  elif (row[6] >=300) and (row[6]<2300):
    id = "passenger_flight-route_type_na-aircraft_type_na-distance_gt_300mi_lt_2300mi-class_na-contrails_na"
  else:
    id = "passenger_flight-route_type_na-aircraft_type_na-distance_gt_2300mi-class_na-contrails_na"
  
  route = [row[0], row[3]]

  return {"emission_factor":id, "parameters": {"route": route}, "persist":True}

route_distances["query_params"] = route_distances.apply(get_params, axis=1)

In [None]:
route_distances.groupby(['origin_name', 'dest_name']).agg({'query_params':'count'}).sort_values('query_params', ascending=True)

Unnamed: 0_level_0,Unnamed: 1_level_0,query_params
origin_name,dest_name,Unnamed: 2_level_1
Aalborg Airport,Abeid Amani Karume International Airport,1
Ndjili International Airport,Hangzhou Xiaoshan International Airport,1
Ndjili International Airport,Hartsfield Jackson Atlanta International Airport,1
Ndjili International Airport,Hasanuddin International Airport,1
Ndjili International Airport,Hazrat Shahjalal International Airport,1
...,...,...
General Ignacio P. Garcia International Airport,Trivandrum International Airport,1
General Ignacio P. Garcia International Airport,Ufa International Airport,1
General Ignacio P. Garcia International Airport,Ulaanbaatar International Airport,1
General Ignacio P. Garcia International Airport,Sri Guru Ram Dass Jee International Airport,1


In [None]:
import pymongo
from pymongo import MongoClient

In [None]:
uri = 'mongodb://urhejh70922nhwipt6kt:hNfQXzFxrsDQGGbyH8KX@bs8ntk4apfl7fga-mongodb.services.clever-cloud.com:27017/bs8ntk4apfl7fga'
client = MongoClient( uri )

In [None]:
# MongoDB connection info
hostname = 'bs8ntk4apfl7fga-mongodb.services.clever-cloud.com'
port = 27017
username = 'urhejh70922nhwipt6kt'
password = config.db_password
databaseName = 'bs8ntk4apfl7fga'

# connect with authentication
client = MongoClient(hostname, port)
db = client[databaseName]
db.authenticate(username, password)


True

In [None]:
#db.create_collection('fight_emissions')

In [None]:
route_distances[(route_distances.origin_name == 'Keflavik International Airport') &(route_distances.dest_name == 'Ottawa Macdonald-Cartier International Airport')]

Unnamed: 0,origin_code,origin_name,origin_coords,dest_code,dest_name,dest_coords,gc_distance,query_params
608,KEF,Keflavik International Airport,"(63.985001, -22.6056)",YOW,Ottawa Macdonald-Cartier International Airport,"(45.3224983215332, -75.66919708251953)",2382.160082,{'emission_factor': 'passenger_flight-route_ty...


In [None]:
# import time


# for idx, params in enumerate(route_distances.query_params[:10]):
#     payload = json.dumps(params)
# #   headers = {"Authorization": config.climatiq_bearer_key, "Content-Type": "application/json"}
# #   request_dict = requests.post('https://beta2.api.climatiq.io/estimate',data=payload, headers=headers).json()
#     request_dict = {'test':'test'}
#     info_dict = dict(route_distances.iloc[idx])
#     info_dict.pop('query_params')
#     to_insert = dict(info_dict, **request_dict)

#     print(to_insert)
  
# #   db.flight_emissions.insert_one(to_insert)
# #   time.sleep(0.1)

In [None]:
route_distances.query_params[14]

{'emission_factor': 'passenger_flight-route_type_na-aircraft_type_na-distance_gt_2300mi-class_na-contrails_na',
 'parameters': {'route': ['POM', 'ACC']},
 'persist': True}

In [None]:
#checking for duplicates in the db
pipeline = [
    {'$group': {'_id': {'origin_code':'$origin_code', 'dest_code':'$dest_code'}, 'id': {'$sum': 1}}},  
    {'$sort': {'co2e':-1}},
    {'$limit':5}                                        # sort by price rating, ascending
]

cursor = db.flight_emissions.aggregate(pipeline)
list(cursor)


[{'_id': {'dest_code': 'PUS', 'origin_code': 'LTN'}, 'id': 1},
 {'_id': {'dest_code': 'FSZ', 'origin_code': 'LTN'}, 'id': 1},
 {'_id': {'dest_code': 'CJJ', 'origin_code': 'LTN'}, 'id': 1},
 {'_id': {'dest_code': 'EZE', 'origin_code': 'LTN'}, 'id': 1},
 {'_id': {'dest_code': 'OSN', 'origin_code': 'LTN'}, 'id': 1}]

In [None]:
flights = pd.DataFrame(db.flight_emissions.find({}))


In [None]:
flights['true_origin_code'] = route_distances.origin_code
flights['true_origin_name'] = route_distances.origin_name
flights['true_origin_coords'] = route_distances.origin_coords

flights['true_dest_code'] = route_distances.dest_code
flights['true_dest_name'] = route_distances.dest_name
flights['true_dest_coords'] = route_distances.dest_coords

flights['true_gc_distance'] = route_distances.gc_distance


In [None]:
flights.drop(labels=['origin_code', 'origin_name', 'origin_coords', 'dest_code', 'dest_name', 'dest_coords', 'gc_distance'], axis=1, inplace=True)

In [None]:
flights.rename({'true_origin_code':'origin_code', 'true_origin_name':'origin_name', 'true_origin_coords':'origin_coords',
                'true_dest_code':'dest_code', 'true_dest_name':'dest_name', 'true_dest_coords':'dest_coords', 'true_gc_distance':'gc_distance'},
               inplace=True, axis=1)

In [None]:
len(flights.dest_name.unique())

605

In [None]:
flights = flights.dropna(subset = ['co2e'],axis=0).drop(labels=['error','message'], axis=1)


In [None]:
large_airport_codes.loc[21154, 'municipality'] = 'Manzini'
large_airport_codes.loc[38859, 'municipality'] = 'Dhahran'
large_airport_codes.loc[41601, 'municipality'] = 'Pyeongtaek'
large_airport_codes.loc[41666, 'municipality'] = 'Okinawa'
large_airport_codes.loc[53056, 'municipality'] = 'Mattala'
large_airport_codes.loc[54346, 'municipality'] = 'Medan'

large_airport_codes.loc[22315, 'iso_country'] = 'NA'

large_airport_codes.continent[large_airport_codes.continent.isnull()] = 'NA'


In [None]:
flights = pd.merge(flights,large_airport_codes[['name', 'continent', 'iso_country', 'iso_region', 'municipality', 'iata_code']],how='inner',left_on='origin_code', right_on='iata_code')
flights.rename({'continent':'origin_continent', 'iso_country':'origin_country', 'iso_region':'origin_region', 'municipality':'origin_city'}, inplace=True,axis=1)

In [None]:
flights.drop(labels=['iata_code'], axis=1,inplace=True)

In [None]:
flights = pd.merge(flights,large_airport_codes[['name', 'continent', 'iso_country', 'iso_region', 'municipality', 'iata_code']],how='inner',left_on='dest_code', right_on='iata_code')


In [None]:
flights.rename({'continent':'dest_continent', 'iso_country':'dest_country', 'iso_region':'dest_region', 'municipality':'dest_city'}, inplace=True,axis=1)
flights.drop(labels=['iata_code','name_y', 'name_x'], axis=1,inplace=True)

In [None]:
replace_city_dict = {'PoznaÅ\x84':'Poznan', 'GdaÅ\x84sk':'Gdansk', 'WrocÅ\x82aw':'Wroclaw', 'MontrÃ©al':'Montreal', 'Bordeaux/MÃ©rignac':'Bordeaux/Merignac',
                     'San JosÃ© del Cabo': 'San Jose del Cabo', 'BelÃ©m':'Belem', 'MalÃ©':'Male', 'MÃ¼nster':'Munster','DÃ¼sseldorf':'Dusseldorf', 'Ã\x9crÃ¼mqi':'Urumqi',
                     'BÃ¢le/Mulhouse':'Basel/Mulhouse', 'SÃ£o Paulo':'Sao Paulo', 'ReykjavÃ\xadk':'Reykjavik','LiÃ¨ge':'Liege', 'CancÃºn':'Cancun',
                     'Pasay / ParaÃ±aque, Metro Manila':'Pasay / Paranaque, Metro Manila','FlorianÃ³polis':'Florianopolis', 'NÃ¡poli':'Na Poli', 'HagÃ¥tÃ±a, Guam International Airport':'Hagatna',
                     'TromsÃ¸':'Tromso','KrakÃ³w':'Krakow', 'Pointe-Ã\xa0-Pitre':'Point-a-Pitre', 'LuleÃ¥':'Lulea', 'Praia da VitÃ³ria':'Praia da Vitoria', 'BodÃ¸':'Bodo', 'MÃ¡laga':'Malaga',
                     'BrasÃ\xadlia':'Brasilia','MalmÃ¶':'Malmo', 'Ä°zmir':'Izmir' }
for k, v in replace_city_dict.items():
    flights.origin_city[flights.origin_city == k] = v
    flights.dest_city[flights.dest_city == k] = v


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [None]:
flights['origin_lat'] = flights.origin_coords.apply(lambda x:float(x[0]))
flights['origin_long'] = flights.origin_coords.apply(lambda x:float(x[1]))
flights['dest_lat'] = flights.dest_coords.apply(lambda x:float(x[0]))
flights['dest_long'] = flights.dest_coords.apply(lambda x:float(x[1]))
flights['formatted_co2e'] = flights['co2e'].apply(lambda x: int(x))
flights['formatted_tons'] = round(flights['formatted_co2e'] / 1000, 2)

In [None]:
iso_codes = pd.read_csv('/content/drive/My Drive/country_iso_codes.csv')
iso_codes.Code[iso_codes.Name=='Namibia'] = 'NA'
iso_codes = iso_codes.append({'Name':'Kosovo', 'Code':'XK'}, ignore_index=True)

In [None]:
flights = pd.merge(flights, iso_codes, left_on='origin_country', right_on='Code', how='left')
flights = pd.merge(flights, iso_codes, left_on='dest_country', right_on='Code', how='left')

In [None]:
flights.head()

Unnamed: 0,_id,co2e,co2e_unit,id,source,year,region,category,origin_code,origin_name,origin_coords,dest_code,dest_name,dest_coords,gc_distance,origin_continent,origin_country,origin_region,origin_city,dest_continent,dest_country,dest_region,dest_city,origin_lat,origin_long,dest_lat,dest_long,formatted_co2e,formatted_tons,Name_x,Code_x,Name_y,Code_y
0,61b26bb4203479d554e28590,1390.294028,kg,passenger_flight-route_type_na-aircraft_type_n...,EPA,2021,US,Air Travel,POM,Port Moresby Jacksons International Airport,"(-9.443380355834961, 147.22000122070312)",KEF,Keflavik International Airport,"(63.985001, -22.6056)",8635.378836,OC,PG,PG-NCD,Port Moresby,EU,IS,IS-2,Reykjavik,-9.44338,147.220001,63.985001,-22.6056,1390,1.39,Papua New Guinea,PG,Iceland,IS
1,61b26bb4203479d554e28591,1364.64252,kg,passenger_flight-route_type_na-aircraft_type_n...,EPA,2021,US,Air Travel,POM,Port Moresby Jacksons International Airport,"(-9.443380355834961, 147.22000122070312)",PRN,PriÅ¡tina International Airport,"(42.5728, 21.035801)",8476.052503,OC,PG,PG-NCD,Port Moresby,EU,XK,XK-01,Prishtina,-9.44338,147.220001,42.5728,21.035801,1364,1.36,Papua New Guinea,PG,Kosovo,XK
2,61b26d3c203479d554e287ed,295.769183,kg,passenger_flight-route_type_na-aircraft_type_n...,EPA,2021,US,Air Travel,KEF,Keflavik International Airport,"(63.985001, -22.6056)",PRN,PriÅ¡tina International Airport,"(42.5728, 21.035801)",2257.78322,EU,IS,IS-2,Reykjavik,EU,XK,XK-01,Prishtina,63.985001,-22.6056,42.5728,21.035801,295,0.3,Iceland,IS,Kosovo,XK
3,61b26bb4203479d554e28592,1146.329279,kg,passenger_flight-route_type_na-aircraft_type_n...,EPA,2021,US,Air Travel,POM,Port Moresby Jacksons International Airport,"(-9.443380355834961, 147.22000122070312)",YEG,Edmonton International Airport,"(53.309700012200004, -113.580001831)",7120.067717,OC,PG,PG-NCD,Port Moresby,,CA,CA-AB,Edmonton,-9.44338,147.220001,53.3097,-113.580002,1146,1.15,Papua New Guinea,PG,Canada,CA
4,61b26d3c203479d554e287ee,492.355426,kg,passenger_flight-route_type_na-aircraft_type_n...,EPA,2021,US,Air Travel,KEF,Keflavik International Airport,"(63.985001, -22.6056)",YEG,Edmonton International Airport,"(53.309700012200004, -113.580001831)",3058.112561,EU,IS,IS-2,Reykjavik,,CA,CA-AB,Edmonton,63.985001,-22.6056,53.3097,-113.580002,492,0.49,Iceland,IS,Canada,CA


In [None]:
flights.rename({'Name_x':'origin_country_full', 'Name_y':'dest_country_full'},inplace=True,axis=1)
flights.drop(labels=['Code_x','Code_y'],axis=1,inplace=True)

In [None]:
flights.origin_country_full[flights.origin_country_full.isnull()] = 'Namibia'
flights.dest_country_full[flights.dest_country_full.isnull()] = 'Namibia'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [None]:
#db.create_collection('final_flight_app')

Collection(Database(MongoClient(host=['bs8ntk4apfl7fga-mongodb.services.clever-cloud.com:27017'], document_class=dict, tz_aware=False, connect=True), 'bs8ntk4apfl7fga'), 'final_flight_app')

In [None]:
flights_dict2 = flights.to_dict('records')

In [None]:
db.final_flight_app.insert_many(flights_dict2)

<pymongo.results.InsertManyResult at 0x7fbdae7c4eb0>

In [None]:
# from google.colab import files

# iso_codes.to_csv('fixed_iso_codes.csv')
# files.download("fixed_iso_codes.csv")

In [None]:
list(db.final_flight_app.find({}))[60:70]

[{'_id': ObjectId('61b26fe3203479d554e2915c'),
  'category': 'Air Travel',
  'co2e': 143.82658546374958,
  'co2e_unit': 'kg',
  'dest_city': "St. John's",
  'dest_code': 'YYT',
  'dest_continent': 'NA',
  'dest_coords': ['47.618598938', '-52.7518997192'],
  'dest_country': 'CA',
  'dest_country_full': 'Canada',
  'dest_lat': 47.618598938,
  'dest_long': -52.7518997192,
  'dest_name': "St. John's International Airport",
  'dest_region': 'CA-NL',
  'formatted_co2e': 143,
  'formatted_tons': 0.14,
  'gc_distance': 1097.914421203209,
  'id': 'passenger_flight-route_type_na-aircraft_type_na-distance_gt_300mi_lt_2300mi-class_na-contrails_na',
  'origin_city': 'Ottawa',
  'origin_code': 'YOW',
  'origin_continent': 'NA',
  'origin_coords': ['45.3224983215332', '-75.66919708251953'],
  'origin_country': 'CA',
  'origin_country_full': 'Canada',
  'origin_lat': 45.3224983215332,
  'origin_long': -75.66919708251953,
  'origin_name': 'Ottawa Macdonald-Cartier International Airport',
  'origin_regi