In [1]:
import os
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Pull it all together  
Now we want to combine the DataFrames so we can analyze the data to find interesting leads

## Look at plane info without the flight details  
This way we can answer questions like where all the private and military planes are registered, for instance

In [4]:
reg_country_df = pd.read_csv("/Users/karinashedrofsky/LEDE_2023/flights-project/csvs/reg_country.csv")
plane_type_df = pd.read_csv("/Users/karinashedrofsky/LEDE_2023/flights-project/csvs/plane_type.csv")

In [5]:
country_type_df = pd.merge(reg_country_df, plane_type_df, left_on='plane', right_on='Plane', how='inner')
country_type_df.drop('Plane', axis=1, inplace=True)
country_type_df.drop('tail_prefix', axis=1, inplace=True)

In [6]:
country_type_df.rename(columns={'plane': 'plane_model', 'Country or region': 'reg_country'}, inplace=True)

## Use python to answer some of those questions:

#### Where are the private planes registered?

In [10]:
private_planes_df = country_type_df[country_type_df['type'] == 'PRIVATE']
private_planes_df.groupby('reg_country').size()

reg_country
Algeria            1
Armenia            1
Azerbaijan         1
Belarus            1
China              1
Czech Republic     1
Gambia             1
Germany            1
Kazakhstan         6
Russia            83
Turkey             1
dtype: int64

The code below allows you to get more information on the private planes registered in the specified country

In [11]:
private_planes_df[private_planes_df['reg_country'] == 'Armenia']

Unnamed: 0,icao,tail,plane_model,reg_country,type,max_passengers
991,600009,EK-222,SOCATA TBM 940,Armenia,PRIVATE,6


####  What about the military planes?

In [12]:
military_planes_df = country_type_df[country_type_df['type'] == 'MILITARY']
military_planes_df.groupby('reg_country').size()

reg_country
Azerbaijan       1
Russia          19
Syria            1
Turkmenistan     1
dtype: int64

The same code allows you to get more information on the military planes registered in the specified country

In [13]:
military_planes_df[military_planes_df['reg_country'] == 'Syria']

Unnamed: 0,icao,tail,plane_model,reg_country,type,max_passengers
1037,778681,YK-ATA,Ilyushin Il-76T,Syria,MILITARY,UNKNOWN


A quick Google search shows this plane may be interesting

![https://www.linkedin.com/pulse/interesting-syrian-air-force-flights-rick-francona/](images/yk-ata.png)

#### Export the DataFrame as a DataTables table for easy filtering and querying

In [121]:
country_type_table = country_type_df.to_html(index=False)

In [122]:
with open('country_type_table.html', 'w') as f:
    f.write(country_type_table)

In [17]:
# but always save it as a csv, too

country_type_df.to_csv("/Users/karinashedrofsky/LEDE_2023/flights-project/csvs/country_type.csv", index=False)

## Create the ultimate DataFrame that combines all flight data with the enriched plane information  

Now we can ask the data questions like where all <b>private</b> planes that took off from Moscow have flown, for instance

In [14]:
original_flight_df = pd.read_csv("/Users/karinashedrofsky/LEDE_2023/flights-project/csvs/icarus_flights_cleaned.csv")

In [18]:
#merge the original flight df with the df above that indicates each planes registration country and category
final_flights_df = pd.merge(original_flight_df, country_type_df, on='icao', how='left')

In [19]:
#clean it up
final_flights_df.drop(columns=['aircraft_model', 'aircraft_model_code', 'area_country'], inplace=True)

In [20]:
final_flights_df.rename(columns={'Country or region': 'reg_country', 'type': 'plane_category'}, inplace=True)

In [21]:
final_flights_df.rename(columns={'plane': 'plane_model'}, inplace=True)

In [22]:
final_flights_df['max_passengers'] = final_flights_df['max_passengers'].str.upper()

In [23]:
final_flights_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9026 entries, 0 to 9025
Data columns (total 13 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   icao                 9026 non-null   object
 1   call_sign            8566 non-null   object
 2   start_time           9026 non-null   object
 3   end_time             9026 non-null   object
 4   origin_area          9026 non-null   object
 5   origin_country       9026 non-null   object
 6   destination_area     8433 non-null   object
 7   destination_country  8429 non-null   object
 8   tail                 8876 non-null   object
 9   plane_model          8882 non-null   object
 10  reg_country          8872 non-null   object
 11  plane_category       8882 non-null   object
 12  max_passengers       8861 non-null   object
dtypes: object(13)
memory usage: 916.8+ KB


#### Make DataTables table for easy filtering and querying

In [123]:
full_flight_table = final_flights_df.to_html(index=False)

In [124]:
with open('full_flight_table.html', 'w') as f:
    f.write(full_flight_table)

In [24]:
# and, again, save it as a csv

final_flights_df.to_csv("/Users/karinashedrofsky/LEDE_2023/flights-project/csvs/full_flight_data.csv", index=False)