In [1]:
import pandas as pd
import numpy as np
import altair as alt

# 1 : Climate

In [135]:
climate = pd.read_csv("data/climate_change_data.csv", parse_dates = True)
climate.shape

(10000, 9)

In [136]:
# Relevant Years - most recent climate metircs
climate = climate[climate['Date'] >= '2020']
climate.shape

(1304, 9)

In [137]:
# check for missing data
climate.isnull().sum()

Date              0
Location          0
Country           0
Temperature       0
CO2 Emissions     0
Sea Level Rise    0
Precipitation     0
Humidity          0
Wind Speed        0
dtype: int64

In [138]:
# columns present in dataset
climate.columns

Index(['Date', 'Location', 'Country', 'Temperature', 'CO2 Emissions',
       'Sea Level Rise', 'Precipitation', 'Humidity', 'Wind Speed'],
      dtype='object')

In [139]:
# columns data types
climate.dtypes

Date               object
Location           object
Country            object
Temperature       float64
CO2 Emissions     float64
Sea Level Rise    float64
Precipitation     float64
Humidity          float64
Wind Speed        float64
dtype: object

In [140]:
n = climate['Location'].nunique()
print(f"There are climate metrics taken for {n} different locations.")

There are climate metrics taken for 1241 different locations.


# 2 : Disasters

In [141]:
disasters = pd.read_csv("data/natural-disasters copy.csv")
disasters.shape

(7081, 169)

In [142]:
disasters = disasters[disasters['Year'] >= 2020]
disasters

Unnamed: 0,Country name,Year,Number of deaths from drought,Number of people injured from drought,Number of people affected from drought,Number of people left homeless from drought,Number of total people affected by drought,Reconstruction costs from drought,Insured damages against drought,Total economic damages from drought,...,Total economic damages from drought as a share of GDP,Total economic damages from earthquakes as a share of GDP,Total economic damages from extreme temperatures as a share of GDP,Total economic damages from floods as a share of GDP,Total economic damages from landslides as a share of GDP,Total economic damages from mass movements as a share of GDP,Total economic damages from storms as a share of GDP,Total economic damages from volcanic activity as a share of GDP,Total economic damages from volcanic activity as a share of GDP.1,total_damages_pct_gdp_glacial_lake
44,Afghanistan,2020,,,,,,,,,...,,,,0.0,0.0,,0.0,,,
45,Afghanistan,2021,0.0,0.0,11000000.0,0.0,11000000.0,0.0,0.0,0.000000e+00,...,0.0,,,0.0,0.0,,,,,
46,Afghanistan,2022,,,,,,,,,...,,,,,,,,,,
47,Afghanistan,2023,,,,,,,,,...,,,,,,,,,,
127,Africa,2020,0.0,0.0,33189316.0,0.0,33189316.0,0.0,0.0,1.243000e+09,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7060,Zambia,2022,,,,,,,,,...,,,,,,,,,,
7061,Zambia,2023,,,,,,,,,...,,,,,,,,,,
7078,Zimbabwe,2021,0.0,0.0,167500.0,0.0,167500.0,0.0,0.0,0.000000e+00,...,0.0,,,,,,0.0,,,
7079,Zimbabwe,2022,,,,,,,,,...,,,,,,,,,,


In [143]:
disasters.shape

(528, 169)

In [144]:
disasters.columns

Index(['Country name', 'Year', 'Number of deaths from drought',
       'Number of people injured from drought',
       'Number of people affected from drought',
       'Number of people left homeless from drought',
       'Number of total people affected by drought',
       'Reconstruction costs from drought', 'Insured damages against drought',
       'Total economic damages from drought',
       ...
       'Total economic damages from drought as a share of GDP',
       'Total economic damages from earthquakes as a share of GDP',
       'Total economic damages from extreme temperatures as a share of GDP',
       'Total economic damages from floods as a share of GDP',
       'Total economic damages from landslides as a share of GDP',
       'Total economic damages from mass movements as a share of GDP',
       'Total economic damages from storms as a share of GDP',
       'Total economic damages from volcanic activity as a share of GDP',
       'Total economic damages from volcanic acti

In [145]:
# removing columns containing the words "Number of"
columns_to_drop = [col for col in disasters.columns if 'Number of' in col]

# dropping the columns
disasters.drop(columns = columns_to_drop, inplace = True)
disasters.shape

(528, 97)

In [146]:
# removing columns containing "Total number of peopl"
columns_to_drop = [col for col in disasters.columns if 'Total number of people affected by' in col and 'per 100,000' in col]

# Drop the filtered columns
disasters.drop(columns=columns_to_drop, inplace=True)
disasters.shape

(528, 85)

In [148]:
disasters

Unnamed: 0,Country name,Year,Reconstruction costs from drought,Insured damages against drought,Total economic damages from drought,Death rates from drought,Injury rates from drought,Homelessness rate from drought,Reconstruction costs from earthquakes,Insured damages against earthquakes,...,Total economic damages from drought as a share of GDP,Total economic damages from earthquakes as a share of GDP,Total economic damages from extreme temperatures as a share of GDP,Total economic damages from floods as a share of GDP,Total economic damages from landslides as a share of GDP,Total economic damages from mass movements as a share of GDP,Total economic damages from storms as a share of GDP,Total economic damages from volcanic activity as a share of GDP,Total economic damages from volcanic activity as a share of GDP.1,total_damages_pct_gdp_glacial_lake
44,Afghanistan,2020,,,,,,,,,...,,,,0.0,0.0,,0.0,,,
45,Afghanistan,2021,0.0,0.0,0.000000e+00,0.0,0.0,0.0,,,...,0.0,,,0.0,0.0,,,,,
46,Afghanistan,2022,,,,,,,0.0,0.0,...,,,,,,,,,,
47,Afghanistan,2023,,,,,,,0.0,0.0,...,,,,,,,,,,
127,Africa,2020,0.0,0.0,1.243000e+09,0.0,0.0,0.0,0.0,0.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7060,Zambia,2022,,,,,,,,,...,,,,,,,,,,
7061,Zambia,2023,,,,,,,,,...,,,,,,,,,,
7078,Zimbabwe,2021,0.0,0.0,0.000000e+00,0.0,0.0,0.0,,,...,0.0,,,,,,0.0,,,
7079,Zimbabwe,2022,,,,,,,,,...,,,,,,,,,,


In [147]:
disasters.isnull().sum() > 0

Country name                                                         False
Year                                                                 False
Reconstruction costs from drought                                     True
Insured damages against drought                                       True
Total economic damages from drought                                   True
                                                                     ...  
Total economic damages from mass movements as a share of GDP          True
Total economic damages from storms as a share of GDP                  True
Total economic damages from volcanic activity as a share of GDP       True
Total economic damages from volcanic activity as a share of GDP.1     True
total_damages_pct_gdp_glacial_lake                                    True
Length: 85, dtype: bool

In [127]:
natural_disaster_types = ["drought", "earthquakes", "disasters", "volcanic activity",
                          "floods", "mass movements", "storms", "landslides", "fog", 
                          "wildfires", "extreme temperatures", "glacial lake outbursts"]

In [128]:
# Find columns for respective disaster type

for disaster in natural_disaster_types:
    
    # Select columns related to the current disaster type
    relevant_columns = [col for col in disasters.columns if disaster in col.lower()]
    print(relevant_columns)
    print("")

['Reconstruction costs from drought', 'Insured damages against drought', 'Total economic damages from drought', 'Death rates from drought', 'Injury rates from drought', 'Homelessness rate from drought', 'Total economic damages from drought as a share of GDP']

['Reconstruction costs from earthquakes', 'Insured damages against earthquakes', 'Total economic damages from earthquakes', 'Death rates from earthquakes', 'Injury rates from earthquakes', 'Homelessness rate from earthquakes', 'Total economic damages from earthquakes as a share of GDP']

['Reconstruction costs from disasters', 'Insured damages against disasters', 'Total economic damages from disasters', 'Death rates from disasters', 'Injury rates from disasters', 'Homelessness rate from disasters', 'Total economic damages from disasters as a share of GDP']

['Reconstruction costs from volcanic activity', 'Insured damages against volcanic activity', 'Total economic damages from volcanic activity', 'Death rates from volcanic activi

# 3 : World

In [155]:
world = pd.read_csv("data/worlddata.csv")
world.head()

Unnamed: 0,country,electricity_access,gdp,gdp_capita,labor_rate,labor_force,land_area,life_expectancy,adult_literacy,water_access,air_pollution,population_density,population,alcohol_consumption,unemployment_rate,social_support,freedom,generosity,income_class,cpi
0,Afghanistan,90.08,18833230000.0,536.23,47.28,9103245.75,652230.0,62.97,,66.61,100.0,53.94,35179977.0,0.0087,10.341,0.522,0.428,-0.014,Low income,14.25
1,Albania,99.94,12856050000.0,4473.89,57.54,1362512.25,27400.0,78.93,98.14,93.92,100.0,104.9,2874159.25,5.0375,14.633,0.65,0.752,-0.033,Upper middle income,37.25
2,Algeria,99.49,167755300000.0,4117.84,41.79,11979207.5,2381741.0,75.79,81.41,93.73,100.0,17.1,40736509.0,0.5844,10.54,0.785,0.51,-0.182,Lower middle income,34.5
3,Angola,43.0,70956380000.0,2395.27,77.28,12467594.5,1246700.0,61.39,,55.46,100.0,23.82,29691157.0,7.7588,9.557,,,,Lower middle income,17.75
4,Argentina,99.9,580182500000.0,13246.88,59.17,19579367.75,2736690.0,76.73,99.1,99.01,95.34,16.01,43815411.75,8.5713,8.301,0.904,0.852,-0.187,Upper middle income,36.75


In [156]:
world.shape

(186, 20)

In [157]:
world['country'].nunique()

186