# Network Analysis

In [7]:
# Import necessary libraries and modules
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
import networkx as nx
from networkx.algorithms import community as nx_community
import numpy as np
import pandas as pd

In [8]:
# Import necessary datasets
flights_weather_df = pd.read_csv('flights_weather_df.csv')  # Flights and weather dataset
airports_df_ext = pd.read_csv('df2.csv')  # Airport information dataset

---

In [None]:
# Create new dataframe
airpods_df = pd.DataFrame({'airport': pd.concat([df['origin_airport'], df['destination_airport']]).unique()})

In [None]:
# Confirm all airports in 
all([airport in df2['iata_code'].values for airport in airports_df['airport']])

In [None]:
df2 = df2[df2['iata_code'].isin(airports_df['airport'])]

In [None]:
df2

In [None]:
airports_coords = {}

for _, row in df2.iterrows():
    airport = row['iata_code']
    coords = row['coordinates']
    longitude, latitude = map(float, coords.split(', '))
    airports_coords[airport] = {'longitude': longitude, 'latitude': latitude}

In [None]:
plt.figure(figsize=(20, 10), dpi=200)
ax = plt.axes(projection=ccrs.PlateCarree())

edgecolor = 'lightgrey'

ax.set_extent([-125.8, -66.5, 23.1, 46.9])  # W, E, S, N
ax.add_feature(cfeature.LAND, alpha=0.75)
ax.add_feature(cfeature.OCEAN, alpha=0.5)
ax.add_feature(cfeature.LAKES, facecolor='white')
ax.add_feature(cfeature.LAKES, alpha=0.5)
ax.add_feature(cfeature.COASTLINE, edgecolor=edgecolor)
ax.add_feature(cfeature.BORDERS, edgecolor=edgecolor)
ax.add_feature(cfeature.STATES, edgecolor=edgecolor)

for airport, coords in airports_coords.items():
    degree = G.degree(airport)
    ax.plot(coords['longitude'], coords['latitude'], marker='o', color='royalblue', markersize=degree * 0.2, alpha=0.5)

plt.tight_layout()
plt.show()

---

In [None]:
# Create empty graph
G = nx.DiGraph()

In [None]:
# Add nodes to graph, one for each unique airport in dataset
for airport in airports_df['airport']:
    G.add_node(airport)

In [None]:
# Add edges to graph, one for each recorded flight in dataset
for _, row in df.iterrows():
    if G.has_edge(row['origin_airport'], row['destination_airport']):
        G[row['origin_airport']][row['destination_airport']]['weight'] += 1
    else:
        G.add_edge(row['origin_airport'], row['destination_airport'], weight=1)

---

In [None]:
G.number_of_nodes()

In [None]:
G.number_of_edges()

In [None]:
sum(data['weight'] for _, _, data in G.edges(data=True))

In [None]:
nx.average_clustering(G)  # 0.57 ndicates a moderate tendency to form local clusters or subgroups

In [None]:
nx.density(G)  # 0.049 indicates a relatively sparse graph, as is typical for transportation networks

In [None]:
betweenness_centrality = nx.betweenness_centrality(G, weight='weight')

sorted(betweenness_centrality.items(), key=lambda item: item[1], reverse=True)[:10]  # These airports appear on many of the shortest paths between pairs of airports.

In [None]:
# Rank airports by degree and weighted degree
degree_ranking = sorted(G, key=G.degree, reverse=True)
Weighted_degree_ranking = sorted(G, key=lambda n: G.degree(n, weight='weight'), reverse=True)

# Compile top 5 airports by betweenness centrality with their rankings
{
    airport: {
        "betweenness_centrality": f"{betweenness_centrality[airport]}",
        "degree": f"{G.degree(airport)} ({degree_ranking.index(airport) + 1}th)",
        "weighted_degree": f"{G.degree(airport, weight='weight')} ({Weighted_degree_ranking.index(airport) + 1}th)"
    }

    for airport in sorted(betweenness_centrality.keys(), key=lambda ap: betweenness_centrality[ap], reverse=True)[:5]

}

---

In [None]:
communities = nx.community.greedy_modularity_communities(G, weight='weight', best_n=4)

In [None]:
[len(community) for community in communities]

In [None]:
plt.figure(figsize=(20, 10), dpi=200)
ax = plt.axes(projection=ccrs.PlateCarree())

ax.set_extent([-125.8, -66.5, 23.1, 46.9])  # W, E, S, N
ax.add_feature(cfeature.LAND, alpha=0.75)
ax.add_feature(cfeature.OCEAN, alpha=0.5)
ax.add_feature(cfeature.LAKES, facecolor='white')
ax.add_feature(cfeature.LAKES, alpha=0.5)
ax.add_feature(cfeature.COASTLINE, edgecolor=edgecolor)
ax.add_feature(cfeature.BORDERS, edgecolor=edgecolor)
ax.add_feature(cfeature.STATES, edgecolor=edgecolor)

color_palette = plt.cm.Set1

for index, community in enumerate(communities):
    for airport in community:
        coords = airports_coords[airport]
        color = color_palette(index)
        ax.plot(coords['longitude'], coords['latitude'], marker='o', 
                color=color, markersize=10, alpha=0.8)

legend_handles = [plt.Line2D([0], [0], marker='o', color='w', label=f'Community {i + 1}',
                             markersize=10, alpha=0.8, 
                             markerfacecolor=color_palette(i)) for i in range(len(communities))]

ax.legend(handles=legend_handles, loc='lower left')

plt.tight_layout()
plt.show()

In [None]:
nx_community.modularity(G, communities, weight='weight')

---

In [None]:
avg_departure_delays = df.groupby('origin_airport')['departure_delay'].mean().to_dict()
avg_arrival_delays = df.groupby('destination_airport')['arrival_delay'].mean().to_dict()

In [None]:
for airport in G.nodes():
    G.nodes[airport]['avg_departure_delay'] = avg_departure_delays[airport]
    G.nodes[airport]['avg_arrival_delay'] = avg_arrival_delays[airport]

In [None]:
degrees = [G.degree(airport) for airport in G.nodes()]
avg_departure_delays = [G.nodes[airport]['avg_departure_delay'] for airport in G.nodes()]
avg_arrival_delays = [G.nodes[airport]['avg_arrival_delay'] for airport in G.nodes()]

departure_corr = np.corrcoef(degrees, avg_departure_delays)[0][1]
arrival_corr = np.corrcoef(degrees, avg_arrival_delays)[0][1]

fig, axs = plt.subplots(1, 2, figsize=(12, 6), dpi=200)

axs[0].scatter(degrees, avg_departure_delays, alpha=0.5)
axs[0].set_xlabel('Airport Size (Degrees)')
axs[0].set_ylabel('Average Departure Delays (Minutes)')
legend_handle = [Line2D([0], [0], marker='', color='w', label=f'Correlation Coefficient: {round(departure_corr, 2)}')]
axs[0].legend(handles=legend_handle, handlelength=0, handletextpad=0, loc='upper right')
axs[0].grid(True, color='lightgrey', linewidth=0.5, zorder=0)
axs[0].set_axisbelow(True)

axs[1].scatter(degrees, avg_arrival_delays, alpha=0.5)
axs[1].set_xlabel('Airport Size (Degrees)')
axs[1].set_ylabel('Average Arrival Delay (Minutes)')
legend_handle = [Line2D([0], [0], marker='', color='w', label=f'Correlation Coefficient: {round(arrival_corr, 2)}')]
axs[1].legend(handles=legend_handle, handlelength=0, handletextpad=0, loc='upper right')
axs[1].grid(True, color='lightgrey', linewidth=0.5, zorder=0)
axs[1].set_axisbelow(True)

plt.tight_layout()
plt.show()

In [None]:
community_avg_delays = {
    'community_index': [i + 1 for i in range(len(communities))],  # Community numbering starting from 1
    'avg_departure_delay': [np.mean([G.nodes[airport]['avg_departure_delay'] for airport in community if 'avg_departure_delay' in G.nodes[airport]]) for community in communities],
    'avg_arrival_delay': [np.mean([G.nodes[airport]['avg_arrival_delay'] for airport in community if 'avg_arrival_delay' in G.nodes[airport]]) for community in communities]
}

bar_positions = np.arange(len(communities))
bar_width = 0.35

fig, ax = plt.subplots(figsize=(12, 6), dpi=200)

ax.bar(bar_positions - bar_width/2, community_avg_delays['avg_departure_delay'], bar_width, label='Avg Departure Delay')
ax.bar(bar_positions + bar_width/2, community_avg_delays['avg_arrival_delay'], bar_width, label='Avg Arrival Delay')
ax.set_ylabel('Average Delay (minutes)')
ax.set_xticks(bar_positions)
ax.set_xticklabels([f"Community {i}" for i in community_avg_delays['community_index']])
ax.legend(loc='upper right')

plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Assuming 'df' is a predefined DataFrame with the necessary data

df['scheduled_departure_dt'] = pd.to_datetime(df['scheduled_departure_dt'])

# Group by month and calculate the average delays
monthly_delays = df.groupby(pd.Grouper(key='scheduled_departure_dt', freq='M')).agg({
    'departure_delay': 'mean',
    'arrival_delay': 'mean'
}).reset_index()

# Create the figure and axes
fig, ax = plt.subplots(figsize=(12, 6), dpi=200)

# Plot the data using the axes object
ax.plot(monthly_delays['scheduled_departure_dt'], monthly_delays['departure_delay'], label='Average Departure Delay')
ax.plot(monthly_delays['scheduled_departure_dt'], monthly_delays['arrival_delay'], label='Average Arrival Delay')

# Set the title and labels using the axes object
ax.set_ylabel('Average Delay (minutes)')

# Display the legend
ax.legend(loc='upper right')

plt.tight_layout()
plt.show()