In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

Các bước tiền xử lý dữ liệu từ 5 bộ data thu thập từ [IMF](https://climatedata.imf.org/pages/climatechange-data ) (*Annual Surface Temperature Change*, *Atmospheric CO₂ Concentrations*, *Change in Mean Sea Levels*, *Climate-related Disasters Frequency*, *Forest and Carbon*):
* Lọc dữ liệu các quốc gia Châu Á, thống nhất tên nước.
* Kiểm tra tính đầy đủ của các chỉ số trong từng bộ.
* Lọc lại các dữ liệu cần thiết cho quá trình phân tích về sau.
* Kiểm tra và lọc dữ liệu theo năm.
* Gộp lại thành 1 bộ dữ liệu duy nhất cho mô hình.

## 51 quốc gia châu Á

In [3]:
# Đọc bộ dữ liệu các nước châu Á
asian = pd.read_csv('../data/raw_data/asian_countries.csv')
asian

Unnamed: 0,Country,ISO3
0,Afghanistan,AFG
1,Armenia,ARM
2,Azerbaijan,AZE
3,Bahrain,BHR
4,Bangladesh,BGD
5,Bhutan,BTN
6,Brunei Darussalam,BRN
7,Cambodia,KHM
8,China,CHN
9,Cyprus,CYP


In [4]:
# Đọc các bộ dữ liệu ClimateChange
ast = pd.read_csv('../data/raw_data/annual_surface_temperature_change.csv') #Annual Surface Temperature Change
fac = pd.read_csv('../data/raw_data/forest_and_carbon.csv') #Forest and Carbon
cdf = pd.read_csv('../data/raw_data/physical_risks_climate_related_disasters_frequency.csv') #Physical Risks: Climate-Related Disasters Frequency

## Annual Surface Temperature Change

In [5]:
asian_ast = pd.merge(ast, asian, on='ISO3')
asian_ast.columns

Index(['ObjectId', 'Country_x', 'ISO2', 'ISO3', 'Indicator', 'Unit', 'Source',
       'CTS Code', 'CTS Name', 'CTS Full Descriptor', '1961', '1962', '1963',
       '1964', '1965', '1966', '1967', '1968', '1969', '1970', '1971', '1972',
       '1973', '1974', '1975', '1976', '1977', '1978', '1979', '1980', '1981',
       '1982', '1983', '1984', '1985', '1986', '1987', '1988', '1989', '1990',
       '1991', '1992', '1993', '1994', '1995', '1996', '1997', '1998', '1999',
       '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008',
       '2009', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017',
       '2018', '2019', '2020', '2021', '2022', '2023', 'Country_y'],
      dtype='object')

In [6]:
# Giữ lại các thuộc tính có giá trị phân tích
asian_ast['Country_x'] = asian_ast['Country_y']
asian_ast = asian_ast.rename(columns={"Country_x": "Country"})
asian_ast = asian_ast.drop(['ObjectId', 'Country_y', 'ISO2', 'Source',
       'CTS Code', 'CTS Name', 'CTS Full Descriptor'], axis=1, errors='ignore')

In [10]:
asian_ast

Unnamed: 0,Country,ISO3,Indicator,Unit,1961,1962,1963,1964,1965,1966,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
0,Afghanistan,AFG,Temperature change with respect to a baseline ...,Degree Celsius,-0.126,-0.173,0.844,-0.751,-0.22,0.239,...,0.521,1.204,1.612,1.642,1.624,0.991,0.587,1.475,2.154,1.956
1,Armenia,ARM,Temperature change with respect to a baseline ...,Degree Celsius,,,,,,,...,1.236,1.896,1.331,0.883,2.739,1.864,1.954,2.183,1.845,2.258
2,Azerbaijan,AZE,Temperature change with respect to a baseline ...,Degree Celsius,,,,,,,...,1.153,1.594,1.356,1.199,2.131,1.878,1.889,1.958,2.029,2.16
3,Bahrain,BHR,Temperature change with respect to a baseline ...,Degree Celsius,-0.481,0.392,0.633,-0.56,0.237,0.539,...,1.721,2.243,1.655,2.118,2.28,2.127,1.989,2.421,1.971,2.261
4,Bangladesh,BGD,Temperature change with respect to a baseline ...,Degree Celsius,0.128,-0.296,-0.115,0.083,-0.215,0.312,...,0.611,0.799,1.336,1.142,0.911,1.151,0.934,1.326,1.24,1.639
5,Bhutan,BTN,Temperature change with respect to a baseline ...,Degree Celsius,0.203,-0.3,-0.23,0.05,-0.583,0.135,...,0.765,0.905,1.338,1.325,1.221,1.136,1.035,1.648,1.456,1.751
6,Brunei Darussalam,BRN,Temperature change with respect to a baseline ...,Degree Celsius,0.029,-0.033,-0.147,0.083,-0.239,0.191,...,0.712,0.932,1.383,0.942,0.903,1.221,1.222,0.972,1.048,1.145
7,Cambodia,KHM,Temperature change with respect to a baseline ...,Degree Celsius,-0.038,-0.137,-0.293,0.18,-0.19,0.441,...,0.549,1.039,1.331,0.994,0.655,1.37,1.275,0.786,0.886,1.256
8,Hong Kong,HKG,Temperature change with respect to a baseline ...,Degree Celsius,0.052,-0.063,0.319,0.108,0.144,0.646,...,0.803,1.258,0.845,1.404,1.218,1.783,1.835,1.964,1.369,1.419
9,Macao,MAC,Temperature change with respect to a baseline ...,Degree Celsius,0.052,-0.063,0.319,0.108,0.144,0.646,...,0.803,1.258,0.845,1.404,1.218,1.783,1.835,1.964,1.369,1.419


In [8]:
# Kiểm tra dữu liệu bị khuyết

asian_ast.isnull().sum()

Country       0
ISO3          0
Indicator     0
Unit          0
1961         10
             ..
2019          1
2020          1
2021          1
2022          1
2023          1
Length: 67, dtype: int64

## Atmospheric CO₂ Concentrations

## Change in Mean Sea Levels

## Climate-related Disasters Frequency

## Forest and Carbon