In [1]:
import pandas as pd

df = pd.read_csv("data/processed/global_panel.csv")

In [2]:
latest_year = df['year'] = 2022
df_latest = df[df['year'] == latest_year]


cluster_vars = [
    'co2_per_capita',
    'renewables_share_energy',
    'fossil_share_energy',
    'low_carbon_share_energy',
    'gdp'
]

df_cluster = df_latest[['country'] + cluster_vars].dropna()
df_cluster.set_index('country', inplace=True)
df_cluster.head()


Unnamed: 0_level_0,co2_per_capita,renewables_share_energy,fossil_share_energy,low_carbon_share_energy,gdp
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Algeria,0.533,4.487,95.513,4.487,35661970000.0
Algeria,0.669,3.313,96.687,3.313,33932630000.0
Algeria,0.655,4.042,95.958,4.042,37094770000.0
Algeria,0.687,5.171,94.829,5.171,41423890000.0
Algeria,0.836,2.996,97.004,2.996,45387460000.0


In [3]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_scaled = scaler.fit_transform(df_cluster)

In [4]:
from sklearn.cluster import KMeans

k = 4
kmeans = KMeans(n_clusters=k, random_state=42)


clusters = kmeans.fit_predict(X_scaled)


df_cluster['cluster'] = clusters
df_cluster.head()

Unnamed: 0_level_0,co2_per_capita,renewables_share_energy,fossil_share_energy,low_carbon_share_energy,gdp,cluster
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Algeria,0.533,4.487,95.513,4.487,35661970000.0,0
Algeria,0.669,3.313,96.687,3.313,33932630000.0,0
Algeria,0.655,4.042,95.958,4.042,37094770000.0,0
Algeria,0.687,5.171,94.829,5.171,41423890000.0,0
Algeria,0.836,2.996,97.004,2.996,45387460000.0,0


In [5]:
print(df_cluster.columns)

Index(['co2_per_capita', 'renewables_share_energy', 'fossil_share_energy',
       'low_carbon_share_energy', 'gdp', 'cluster'],
      dtype='object')


In [6]:
df_cluster.reset_index(inplace=True)
df_cluster.rename(columns={'index': 'country'}, inplace=True)

In [7]:
import plotly.express as px


df_cluster['cluster_label'] = df_cluster['cluster'].map({
    3: "High CO₂, Fossil-heavy",
    2: "Moderate CO₂, Balanced Energy",
    0: "Moderate CO₂, Fossil-heavy",
    1: "Low CO₂, Renewable-heavy"
})

fig = px.scatter(
    df_cluster,
    x='co2_per_capita',
    y='renewables_share_energy',
    size='fossil_share_energy',
    color='cluster_label',
    hover_name='country',
    hover_data={
        'co2_per_capita': True,
        'renewables_share_energy': True,
        'fossil_share_energy': True,
        'low_carbon_share_energy': True,
        'gdp': True,
        'cluster_label': False
    },
    title=f"Country Clusters by CO₂ & Energy Profile ({latest_year})",
    size_max=60
)

fig.update_layout(
    xaxis_title="CO₂ per Capita",
    yaxis_title="Renewables Share (%)",
    legend_title="Cluster",
    template="plotly_white"
)

fig.show()