In [1]:
import pandas as pd

In [2]:
# Load dataset
data = pd.read_csv("../data/datasets.csv")
data.head()

Unnamed: 0,ObjectId,Country,ISO2,ISO3,Indicator,Unit,Source,CTS Code,CTS Name,CTS Full Descriptor,...,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024
0,1,"Afghanistan, Islamic Rep. of",AF,AFG,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,ECCS,Surface Temperature Change,"Environment, Climate Change, Climate and Weath...",...,1.139,1.588,1.63,1.597,0.951,0.552,1.418,1.967,1.748,2.188
1,2,Africa,,AFRTMP,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,ECCS,Surface Temperature Change,"Environment, Climate Change, Climate and Weath...",...,1.195,1.385,1.158,1.15,1.29,1.177,1.4,1.014,1.485,1.75
2,3,Albania,AL,ALB,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,ECCS,Surface Temperature Change,"Environment, Climate Change, Climate and Weath...",...,1.702,1.608,1.239,2.155,1.797,1.627,1.719,1.701,2.299,2.925
3,4,Algeria,DZ,DZA,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,ECCS,Surface Temperature Change,"Environment, Climate Change, Climate and Weath...",...,1.134,1.761,1.506,1.203,1.24,2.006,2.424,1.804,2.303,2.824
4,5,American Samoa,AS,ASM,Temperature change with respect to a baseline ...,Degree Celsius,Food and Agriculture Organization of the Unite...,ECCS,Surface Temperature Change,"Environment, Climate Change, Climate and Weath...",...,0.738,1.268,1.163,0.917,1.268,1.159,0.997,0.984,1.01,1.588


### Keep only Country + all temperature years

In [3]:
data["Country"] = data["Country"].astype(str).str.strip()

year_cols = [str(y) for y in range(1961, 2025)]
year_cols = [str(y) for y in range(1961, 2025)]

keep_cols = ["Country"] + year_cols
data = data[keep_cols]

data.head()

Unnamed: 0,Country,1961,1962,1963,1964,1965,1966,1967,1968,1969,...,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024
0,"Afghanistan, Islamic Rep. of",-0.096,-0.143,0.848,-0.762,-0.233,0.239,-0.355,-0.402,-0.528,...,1.139,1.588,1.63,1.597,0.951,0.552,1.418,1.967,1.748,2.188
1,Africa,-0.015,-0.033,0.069,-0.149,-0.194,0.142,-0.212,-0.225,0.359,...,1.195,1.385,1.158,1.15,1.29,1.177,1.4,1.014,1.485,1.75
2,Albania,0.643,0.351,0.089,-0.154,-0.377,0.565,-0.071,0.085,-0.009,...,1.702,1.608,1.239,2.155,1.797,1.627,1.719,1.701,2.299,2.925
3,Algeria,0.154,0.105,0.072,0.248,-0.104,0.424,0.0,-0.065,0.275,...,1.134,1.761,1.506,1.203,1.24,2.006,2.424,1.804,2.303,2.824
4,American Samoa,0.075,-0.047,0.161,-0.146,-0.538,0.21,-0.339,-0.164,0.153,...,0.738,1.268,1.163,0.917,1.268,1.159,0.997,0.984,1.01,1.588


### Normalize a few common country name

In [4]:
name_fix = {
    # SEA variants
    "Lao People's Dem. Rep.": "Laos",
    "Brunei Darussalam": "Brunei",
    "Timor-Leste, Dem. Rep. of": "Timor Leste",
    
    # EA variants
    "China, P.R.: Mainland": "Hong Kong",
    "China, P.R.: Macao": "Macau",
    "Korea, Rep. of": "South Korea",
    "Korea, Dem. People's Rep. of": "North Korea",
    "China, P.R.: Hong Kong": "Hong Kong",
}
data["Country"] = data["Country"].replace(name_fix)

### Define Regions (Southeast Asia)

In [5]:
southeast_asia = [
    "Brunei", "Myanmar", "Cambodia", "Indonesia", "Laos",
    "Malaysia", "Philippines", "Singapore", "Thailand",
    "Timor Leste", "Vietnam"
]

### Define Regions (East Asia)

In [6]:
east_asia = [
    "China", "Japan", "Mongolia", "North Korea",
    "South Korea", "Taiwan", "Hong Kong", "Macau"
]

In [7]:
# Assign region to each country
data["Region"] = None
data.loc[data["Country"].isin(southeast_asia), "Region"] = "Southeast Asia"
data.loc[data["Country"].isin(east_asia), "Region"] = "East Asia"

In [8]:
southeast_asia_data = data[data["Region"] == "Southeast Asia"]
southeast_asia_data

Unnamed: 0,Country,1961,1962,1963,1964,1965,1966,1967,1968,1969,...,2016,2017,2018,2019,2020,2021,2022,2023,2024,Region
31,Brunei,0.116,-0.049,-0.229,0.036,-0.335,0.161,-0.109,-0.184,0.231,...,1.545,1.162,1.076,1.383,1.377,1.07,1.085,1.162,1.592,Southeast Asia
36,Cambodia,-0.024,-0.114,-0.285,0.145,-0.212,0.425,-0.126,-0.163,0.444,...,1.397,1.073,0.794,1.55,1.425,0.917,0.972,1.383,2.057,Southeast Asia
96,Indonesia,-0.049,0.016,-0.105,0.002,-0.203,0.016,-0.136,-0.055,0.165,...,1.41,0.957,0.879,1.045,1.233,1.02,0.969,1.101,1.472,Southeast Asia
113,Laos,0.087,-0.221,-0.375,-0.06,-0.169,0.464,-0.102,-0.115,0.518,...,1.256,1.04,0.75,1.978,1.527,1.079,0.902,1.496,2.026,Southeast Asia
124,Malaysia,-0.019,-0.071,-0.12,0.052,-0.241,0.133,-0.14,-0.104,0.235,...,1.558,1.093,1.128,1.415,1.358,1.177,1.092,1.221,1.612,Southeast Asia
142,Myanmar,-0.023,-0.115,-0.17,0.027,-0.317,0.391,-0.047,-0.164,0.199,...,1.351,1.252,0.965,1.628,1.593,1.642,1.349,1.745,1.959,Southeast Asia
164,Philippines,-0.134,-0.107,-0.349,-0.025,-0.29,0.171,-0.154,-0.123,0.118,...,1.569,1.192,1.225,1.304,1.447,1.309,1.275,1.44,1.542,Southeast Asia
182,Singapore,,,,,,,,,,...,1.395,0.76,1.026,1.6,1.022,1.239,0.975,1.294,1.633,Southeast Asia
203,Thailand,-0.15,-0.148,-0.315,-0.027,-0.222,0.411,-0.051,-0.113,0.467,...,1.433,1.013,0.771,1.667,1.446,0.975,0.874,1.394,1.98,Southeast Asia
204,Timor Leste,-0.341,0.011,-0.624,-0.202,,0.118,-0.391,0.003,0.143,...,1.505,0.822,0.392,0.517,1.358,0.773,0.623,0.297,1.11,Southeast Asia


In [9]:
southeast_asia_data.to_csv("../data/southeast_asia.csv", index=False)
print("Saved: '../data/southeast_asia.csv'")

Saved: '../data/southeast_asia.csv'


In [10]:
east_asia_data = data[data["Region"] == "East Asia"]
east_asia_data

Unnamed: 0,Country,1961,1962,1963,1964,1965,1966,1967,1968,1969,...,2016,2017,2018,2019,2020,2021,2022,2023,2024,Region
43,Hong Kong,0.072,-0.066,0.315,0.104,0.141,0.642,0.047,-0.649,0.0,...,0.639,1.192,1.02,1.574,1.683,1.802,1.202,1.243,1.709,East Asia
44,Macau,0.072,-0.066,0.315,0.104,0.141,0.642,0.047,-0.649,0.0,...,0.639,1.192,1.02,1.574,1.683,1.802,1.202,1.243,1.709,East Asia
45,Hong Kong,0.255,-0.146,0.287,-0.024,0.076,0.179,-0.285,-0.411,-0.216,...,1.339,1.6,1.397,1.443,1.672,1.708,1.909,1.892,2.272,East Asia
104,Japan,0.666,0.123,-0.056,0.26,-0.524,0.018,0.034,-0.496,-0.043,...,1.302,0.771,0.899,1.248,1.41,1.237,1.301,1.822,2.19,East Asia
109,North Korea,0.495,-0.053,-0.103,0.241,-0.207,-0.063,0.166,-0.467,-0.457,...,1.489,1.681,0.969,1.918,1.901,1.886,1.626,2.076,2.655,East Asia
110,South Korea,0.64,-0.091,-0.478,0.604,-0.165,0.111,0.18,-0.669,-0.28,...,1.667,1.468,0.961,1.527,1.503,1.686,1.526,1.64,2.54,East Asia
137,Mongolia,0.148,-0.125,1.19,-0.179,0.855,0.371,-0.446,-0.789,-0.841,...,1.422,2.531,2.245,1.881,2.402,2.019,2.223,2.196,2.624,East Asia


In [11]:
east_asia_data.to_csv("../data/east_asia_data.csv", index=False)
print("Saved: '../data/east_asia_data.csv'")

Saved: '../data/east_asia_data.csv'


### Remove countries + Keep southeast and east + average temp each year

In [12]:
data = data[data["Region"].notna()].reset_index(drop=True)
region_avg = (
    data.groupby("Region")[year_cols]
    .mean()
    .reset_index()
)

display(region_avg)

Unnamed: 0,Region,1961,1962,1963,1964,1965,1966,1967,1968,1969,...,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024
0,East Asia,0.335429,-0.060571,0.21,0.158571,0.045286,0.271429,-0.036714,-0.59,-0.262429,...,1.239,1.213857,1.490714,1.215857,1.595,1.750571,1.734286,1.569857,1.730286,2.242714
1,Southeast Asia,-0.0499,-0.1013,-0.2855,0.0034,-0.223222,0.2776,-0.1384,-0.1313,0.3076,...,1.054818,1.421455,1.05,0.898455,1.449364,1.396455,1.127273,1.021364,1.277727,1.728909


In [13]:
region_avg.to_csv("../data/region_avg.csv", index=False)
print("Saved: '../data/region_avg.csv'")

Saved: '../data/region_avg.csv'
