In [3]:
# imports of modules we need
import pandas as pd
import datapackage
import wbgapi as wb

# Define functions which manipulate statistics data tables from various sources

In [4]:
def iso_codes():
    '''Download iso codes and continent for all countries.'''
    # iso codes and country names
    data_url = 'https://datahub.io/core/country-codes/datapackage.json'
    package = datapackage.Package(data_url)
    resources = package.resources
    for resource in resources:
        if resource.tabular:
            codes = pd.read_csv(resource.descriptor['path'])
    iso_dict = dict(zip(codes['ISO3166-1-Alpha-3'],
                        codes['UNTERM English Short']))
    iso_df = pd.DataFrame.from_dict(iso_dict,
                                    orient='index',
                                    columns=['Country'])
    iso_df.index.names=['iso']
    # continent info
    continent_dict = dict(zip(codes['ISO3166-1-Alpha-3'],
                              codes['Region Name']))
    continent_df = pd.DataFrame.from_dict(continent_dict,
                                          orient='index',
                                          columns=['Region'])
    return iso_df, continent_df

def forests():
    ''' 
    Load Global Forest Watch data on burned areas and 
    total forest areas per country.
    '''
    # burned area, per weeks, so sum up to get yearly data
    df = pd.read_csv('../../resource/GFW_Global_regions_with_the_most_burned_area/modis_burned_area__ha.csv')
    iso = df.iso.unique()  # iso codes
    df = df.groupby('iso').sum()  # sum over country (rows of table with the same country iso code)
    df.drop(columns=['alert__year', 'alert__week'], inplace=True)  # drop unnecessary columns
    df.rename(columns={'burned_area__ha': 'burned_area_ha'}, inplace=True)
    
    # adding total forest area per country
    forest_area = pd.read_csv('../../resource/GFW_Global_regions_with_the_most_burned_area/treecover_extent_in__by_region__ha.csv')
    
    df['forest_area_ha'] = 0
    for i in iso:
        df.loc[i, 'forest_area_ha'] = float(forest_area[forest_area['iso']==i].area__ha)
    return df

def t_change():
    '''
    Temperature change in respect to years 1951-1980
    Averaging years 2012-2020 to get single number per country'''
    t_change = pd.read_csv('../../resource/t_change/Environment_Temperature_change_E_All_Data.csv',
                           encoding='ISO-8859-1')
    dt = t_change.groupby(by=['Area', 'Element']).mean()
    dt = dt.drop(columns=['Area Code', 'Months Code', 'Element Code'])
    dt = dt[['Y2012', 'Y2013', 'Y2014', 'Y2015', 'Y2016', 'Y2017', 'Y2018', 'Y2019', 'Y2020']]
    dt = dt.reset_index()
    dt = dt.set_index('Area')
    dt = dt[dt['Element']== 'Temperature change'].mean(axis=1)
    dt_df = dt.to_frame(name='t_change')
    dt_df.index.names=['Country']
    return dt_df

def precipitation():
    '''
    Precipitation data in mm/year for each country
    '''
    prec = pd.read_csv('../../resource/average-precipitation-per-year.csv')
    prec = prec.groupby('Code').mean()
    prec = prec.drop(columns=['Year'])
    prec.columns = ['av_prec_mm_per_year']
    return prec


def combine_dfs(dfs, on='iso'):
    '''
    Join pandas Dataframes for countries, which are
    indexed by countries iso codes.
    '''
    total = dfs[0].join(dfs[1], on=on)
    for i in range(2,len(dfs)):
        total = total.join(dfs[i], on=on)
    return total



# Generating tables (pandas DataFrames) 

In [5]:
iso_df, continent_df = iso_codes()

In [6]:
forest = forests()
dt = t_change()
dt = iso_df.join(dt, on='Country')
dt = dt.drop(columns=['Country'])
prec = precipitation()

# Combining all DataFrames into a single one

In [7]:
df = combine_dfs((forest, iso_df, continent_df, dt, prec), on='iso')

In [8]:
df.head()

Unnamed: 0_level_0,burned_area_ha,forest_area_ha,Country,Region,t_change,av_prec_mm_per_year
iso,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AFG,3737.909,515085700.0,Afghanistan,Asia,1.107725,327.0
AGO,802703.4,997938400.0,Angola,Africa,1.151922,1010.0
ALB,553.7902,22988300.0,Albania,Europe,1.646248,1485.0
ARE,387.3991,56963580.0,United Arab Emirates (the),Asia,,78.0
ARG,1283496.0,2224081000.0,Argentina,Americas,0.75466,591.0


In [9]:
# We are missing some data inputs already
len(df)

148

In [10]:
# Dropping any countries where any data is missing (marked as NaN)
df = df.dropna()

In [11]:
# left only with 60% of world's countries.
len(df)

120

# Cosmetic changes and saving

In [12]:
# data rounding
df = df.round(2)

In [13]:
# reorganize column order
df = df[['Country', 'Region', 't_change', 'av_prec_mm_per_year', 'forest_area_ha', 'burned_area_ha']]

In [14]:
# save table in .csv format
df.to_csv('data_fires.csv')

In [15]:
# preview of the first 5 rows
df.head()

Unnamed: 0_level_0,Country,Region,t_change,av_prec_mm_per_year,forest_area_ha,burned_area_ha
iso,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AFG,Afghanistan,Asia,1.11,327.0,515085700.0,3737.91
AGO,Angola,Africa,1.15,1010.0,997938400.0,802703.37
ALB,Albania,Europe,1.65,1485.0,22988300.0,553.79
ARG,Argentina,Americas,0.75,591.0,2224081000.0,1283496.22
ARM,Armenia,Asia,1.61,562.0,23752070.0,1776.87
