# 6.3 Geographic Visualization - Milena Nedyalkova

### This script contains the following:
#### 1. Import data and libraries
#### 2. Data wrangling
#### 3. Plotting a choropleth

### 1. Import data and libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
import os
import json
import folium

In [2]:
# This command propts matplotlib visuals to appear in the notebook 

%matplotlib inline

In [3]:
# Import the JSON file and the cleaned data set 

country_geo = r'C:\Users\PC\Desktop\CareerFoundry Data Analytics\countries.geojson'
data_path = r'C:\\Users\\PC\\Desktop\\CareerFoundry Data Analytics\\airport_traffic_2024_clean.csv'
data = pd.read_csv(data_path)

### 2. Wrangle the data

In [4]:
# Drop the non-European countries from the dataset and remove the unnecessary columns

data = data.drop(data[data['Country'] == 'Morocco'].index)
data = data.drop(data[data['Country'] == 'Israel'].index)

In [5]:
# Rename some of the countries to match the names in the geojson file

data['Country'] = data['Country'].replace("Czech Republic", "Czechia")
data['Country'] = data['Country'].replace("Türkiye", "Turkey")
data['Country'] = data['Country'].replace("Republic of North Macedonia", "North Macedonia")
data['Country'] = data['Country'].replace("Serbia", "Republic of Serbia")

In [6]:
print(sorted(data['Country'].unique()))

['Albania', 'Armenia', 'Austria', 'Belgium', 'Bosnia and Herzegovina', 'Bulgaria', 'Croatia', 'Cyprus', 'Czechia', 'Denmark', 'Estonia', 'Finland', 'France', 'Georgia', 'Germany', 'Greece', 'Hungary', 'Iceland', 'Ireland', 'Italy', 'Latvia', 'Lithuania', 'Luxembourg', 'Malta', 'Moldova', 'Montenegro', 'Netherlands', 'North Macedonia', 'Norway', 'Poland', 'Portugal', 'Republic of Serbia', 'Romania', 'Slovakia', 'Slovenia', 'Spain', 'Sweden', 'Switzerland', 'Turkey', 'United Kingdom']


In [7]:
# Aggregate total flights by country and check the values for plausibility

country_flights = data.groupby("Country", as_index=False)["Total flights"].sum()
country_flights

Unnamed: 0,Country,Total flights
0,Albania,68577
1,Armenia,38895
2,Austria,316723
3,Belgium,310553
4,Bosnia and Herzegovina,16455
5,Bulgaria,64982
6,Croatia,49952
7,Cyprus,93534
8,Czechia,149007
9,Denmark,240676


In [8]:
# Check the top 10 countries

country_flights_sorted = country_flights.sort_values(by = "Total flights", ascending = False)
top10_countries = country_flights_sorted.head(10)
print(top10_countries)

           Country  Total flights
35           Spain        2283157
39  United Kingdom        1985592
12          France        1707533
14         Germany        1660443
19           Italy        1332148
38          Turkey        1154225
28          Norway         667131
15          Greece         593238
26     Netherlands         532865
30        Portugal         481963


### 3. Plotting a choropleth

In [9]:
# Create base map
m = folium.Map(location=[54, 15], zoom_start=4)

# Add choropleth (directly passing file path)
folium.Choropleth(
    geo_data=country_geo,   
    data=country_flights,
    columns=["Country", "Total flights"],
    key_on="feature.properties.name",  
    fill_color="YlGnBu",
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name="Total Flights (2024)"
).add_to(m)

# Save to HTML
m.save(r'C:\Users\PC\Desktop\CareerFoundry Data Analytics\flights_per_country_choropleth_.html')

## The choropleth map confirms the hypothesis that traffic distribution varies significantly by country. Spain and the UK lead in total flights, followed by France, Germany, Italy, and Türkiye, reflecting the higher volume of air traffic in larger or more tourist-focused countries compared to smaller nations.