In [1]:
from warnings import filterwarnings
filterwarnings(action='ignore', category=FutureWarning)

Let's load up our data. This is all change in degrees Celcius relative to a baseline. It does not make sense to sum the time series but it does make sense to take their mean and variance.

In [2]:
import pandas as pd

CLIMATE = '/kaggle/input/climate-change-indicators/climate_change_indicators.csv'
df = pd.read_csv(filepath_or_buffer=CLIMATE, index_col=['ObjectId']).drop(columns=['ISO2', 'Indicator', 'Unit', 'Source', 'CTS_Code', 'CTS_Name', 'CTS_Full_Descriptor'])
df.head()

Unnamed: 0_level_0,Country,ISO3,F1961,F1962,F1963,F1964,F1965,F1966,F1967,F1968,...,F2013,F2014,F2015,F2016,F2017,F2018,F2019,F2020,F2021,F2022
ObjectId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,"Afghanistan, Islamic Rep. of",AFG,-0.113,-0.164,0.847,-0.764,-0.244,0.226,-0.371,-0.423,...,1.281,0.456,1.093,1.555,1.54,1.544,0.91,0.498,1.327,2.012
2,Albania,ALB,0.627,0.326,0.075,-0.166,-0.388,0.559,-0.074,0.081,...,1.333,1.198,1.569,1.464,1.121,2.028,1.675,1.498,1.536,1.518
3,Algeria,DZA,0.164,0.114,0.077,0.25,-0.1,0.433,-0.026,-0.067,...,1.192,1.69,1.121,1.757,1.512,1.21,1.115,1.926,2.33,1.688
4,American Samoa,ASM,0.079,-0.042,0.169,-0.14,-0.562,0.181,-0.368,-0.187,...,1.257,1.17,1.009,1.539,1.435,1.189,1.539,1.43,1.268,1.256
5,"Andorra, Principality of",AND,0.736,0.112,-0.752,0.308,-0.49,0.415,0.637,0.018,...,0.831,1.946,1.69,1.99,1.925,1.919,1.964,2.562,1.533,3.243


Let's take a look at the latest year available.

In [3]:
from plotly import express
express.choropleth(data_frame=df, locations='ISO3', color='F2022')

We want to look at the whole dataset longitudinally, so we need the year to be an index. Let's do some transformations to make that happen.

In [4]:
years_df = df.drop(columns=['Country',]).T
years_df.columns = years_df.iloc[0]
years_df = years_df.tail(n=len(years_df)-1)
years_df = years_df.reset_index()
years_df['index'] = years_df['index'].apply(func=lambda x: x.replace('F', ''))
years_df['index'] = years_df['index'].astype(int)
years_df.head()

ISO3,index,AFG,ALB,DZA,ASM,AND,AGO,AIA,ATG,ARG,...,VUT,VEN,VNM,WLF,PSE,ESH,WLD,YEM,ZMB,ZWE
0,1961,-0.113,0.627,0.164,0.079,0.736,0.041,0.086,0.09,0.122,...,0.168,0.072,0.014,0.323,-0.11,0.632,0.211,0.029,0.228,0.267
1,1962,-0.164,0.326,0.114,-0.042,0.112,-0.152,-0.024,0.031,-0.046,...,0.092,-0.113,-0.24,-0.051,0.822,0.576,0.038,-0.009,-0.168,0.237
2,1963,0.847,0.075,0.077,0.169,-0.752,-0.19,0.234,0.288,0.162,...,-0.165,-0.012,-0.302,0.125,0.848,0.333,0.168,0.169,-0.39,-0.458
3,1964,-0.764,-0.166,0.25,-0.14,0.308,-0.229,0.189,0.214,-0.343,...,0.22,0.097,0.082,0.187,-0.626,0.819,-0.246,-0.251,-0.279,-0.097
4,1965,-0.244,-0.388,-0.1,-0.562,-0.49,-0.196,-0.365,-0.385,0.09,...,-0.569,-0.096,-0.017,-0.41,-0.031,-0.337,-0.223,-0.623,-0.418,-0.48


Now we can plot this whole dataset and see what the trendline looks like.

In [5]:
express.scatter(data_frame=years_df, x='index', y=years_df.columns[1:], height=800, trendline='lowess', trendline_scope='overall')

Over the period of interest the mean country has experienced a mean increase of roughly one degree. This is probably the nut graf.

In [6]:
from umap import UMAP

columns = [column for column in df.columns if column.startswith('F')]

reducer = UMAP(n_components=2, random_state=2024, transform_seed=2024, verbose=True, n_jobs=1, n_epochs=100)
df[['x', 'y']] = pd.DataFrame(data=reducer.fit_transform(X=df[columns].fillna(value=0)))
express.scatter(data_frame=df, x='x', y='y', text='ISO3', height=800, hover_name='Country').update_traces(marker={'size': 1})

2024-03-05 15:01:43.284044: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-05 15:01:43.284180: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-05 15:01:43.448159: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


UMAP(n_epochs=100, n_jobs=1, random_state=2024, transform_seed=2024, verbose=True)
Tue Mar  5 15:01:56 2024 Construct fuzzy simplicial set
Tue Mar  5 15:01:56 2024 Finding Nearest Neighbors
Tue Mar  5 15:01:58 2024 Finished Nearest Neighbor Search
Tue Mar  5 15:02:01 2024 Construct embedding


Epochs completed:   0%|            0/100 [00:00]

	completed  0  /  100 epochs
	completed  10  /  100 epochs
	completed  20  /  100 epochs
	completed  30  /  100 epochs
	completed  40  /  100 epochs
	completed  50  /  100 epochs
	completed  60  /  100 epochs
	completed  70  /  100 epochs
	completed  80  /  100 epochs
	completed  90  /  100 epochs
Tue Mar  5 15:02:02 2024 Finished embedding


We sort of expect see neighboring countries cluster when we do dimension reduction, but we don't.