In [1]:
import os
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Clean original ADBX-Exchange Flight Data

### Import flight data  
All flights from Moscow between May 24 - June 24, 2023 (pulled from Icarus)

In [2]:
df = pd.read_csv("/Users/karinashedrofsky/LEDE_2023/flights-project/csvs/icarus_flights.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9026 entries, 0 to 9025
Data columns (total 10 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   icao                 9026 non-null   object
 1   call_sign            8566 non-null   object
 2   start_time           9026 non-null   object
 3   end_time             9026 non-null   object
 4   origin_area          9026 non-null   object
 5   origin_country       9026 non-null   object
 6   destination_area     8433 non-null   object
 7   destination_country  8429 non-null   object
 8   aircraft_model       1950 non-null   object
 9   aircraft_model_code  1950 non-null   object
dtypes: object(10)
memory usage: 705.3+ KB


# Check out the data

### Show all origin locations
The "Moscow" search on Icarus includes flights from cities near Moscow

In [3]:
origin_counts = df.groupby('origin_area').size().reset_index(name='count')
origin_counts.sort_values(by='count', ascending=False).reset_index(drop=True)

Unnamed: 0,origin_area,count
0,Moskovsskaya,5347
1,Moskva,2904
2,Vladimir,260
3,Kaluga,247
4,Tula,65
5,Yaroslavl',60
6,Ryazan',53
7,Tver',45
8,Ivanovo,41
9,Nizhegorod,3


#### Group together Moskovsskaya and Moskva to make "Moscow"


In [4]:
replacements = {
    'Moskovsskaya': 'Moscow',
    'Moskva': 'Moscow'
}

df['origin_area'] = df['origin_area'].replace(replacements)

### Show all destination countries

In [5]:
dest_country_counts = df.groupby('destination_country').size().reset_index(name='count')
dest_country_counts.sort_values(by='count', ascending=False).reset_index(drop=True)
dest_country_counts

Unnamed: 0,destination_country,count
0,Al Muḩarraq,9
1,Algeria,1
2,Armenia,173
3,Azerbaijan,102
4,Bangladesh,1
5,Belarus,136
6,Belgium,3
7,Cuba,3
8,Democratic Republic of the Congo,1
9,Egypt,73


### All destination 'areas'

In [6]:
dest_area_counts = df.groupby('destination_area').size().reset_index(name='count')
dest_area_counts.sort_values(by='count', ascending=False).reset_index(drop=True)
dest_area_counts.head()

Unnamed: 0,destination_area,count
0,Abkhazia,17
1,Abu Dhabi,22
2,Abşeron,1
3,Ad Dakhliyah,2
4,Ad Daqahliyah,2


### Clean the destination names in the df

In [10]:
replacements = {
    'Moskovsskaya': 'Moscow',
    'Moskva': 'Moscow',
    'City of St. Petersburg': 'St. Petersburg',
    'Ad Daqahliyah': 'Ad Dakhliyah'
}

df.loc[:, 'destination_area'] = df['destination_area'].replace(replacements)

In [11]:
replacements = {
    'Al Muḩarraq': 'Bahrain'
}

df.loc[:,'destination_country'] = df['destination_country'].replace(replacements)

# Create symbol map of all flight destinations

### Make new df that combines area and country for map viz

In [27]:
df['area_country'] = df['destination_area'] + ', ' + df['destination_country']

In [29]:
desintation_counts_df = df["area_country"].value_counts().reset_index()

desintation_counts_df

Unnamed: 0,area_country,count
0,"Moscow, Russia",869
1,"St. Petersburg, Russia",620
2,"Stavropol', Russia",330
3,"Tatarstan, Russia",278
4,"Sverdlovsk, Russia",274
...,...,...
232,ZSHC - Hangzhou Xiaoshan International Airport...,1
233,"Ialoveni, Moldova",1
234,"Shida Kartli, Georgia",1
235,"Cluj, Romania",1


<iframe title="A month of flight destinations from Moscow" aria-label="Map" id="datawrapper-chart-kdC52" src="https://datawrapper.dwcdn.net/kdC52/1/" scrolling="no" frameborder="0" style="width: 0; min-width: 100% !important; border: none;" height="362" data-external="1"></iframe><script type="text/javascript">!function(){"use strict";window.addEventListener("message",(function(a){if(void 0!==a.data["datawrapper-height"]){var e=document.querySelectorAll("iframe");for(var t in a.data["datawrapper-height"])for(var r=0;r<e.length;r++)if(e[r].contentWindow===a.source){var i=a.data["datawrapper-height"][t]+"px";e[r].style.height=i}}}))}();
</script>

In [30]:
#save both DataFrames as csv
desintation_counts_df.to_csv("/Users/karinashedrofsky/LEDE_2023/flights-project/csvs/destination_counts.csv")

In [32]:
df.to_csv("/Users/karinashedrofsky/LEDE_2023/flights-project/csvs/icarus_flights_cleaned.csv", index=False)