In [1]:
import numpy as np
import datetime
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [35]:
# import new_merge.csv
df_new = pd.read_csv('merge_complete_elevation.csv')
# see unique countries
df_new['country'].unique()
# delete elevation_x and soil column, rename elevatrion_y to elevation
df_new = df_new.drop(['elevation_x', 'soil'], axis=1)
df_new = df_new.rename(columns={'elevation_y': 'elevation'})
# extract unique lat and lon
location = df_new.drop_duplicates(subset=['lat', 'lon'])
# subset only include lat and lon
location = location[['lat', 'lon']]

In [8]:
import requests
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
import tqdm
import time

def requests_retry_session(
    retries=3,
    backoff_factor=0.3,
    status_forcelist=(500, 502, 504),
    session=None,
):
    session = session or requests.Session()
    retry = Retry(
        total=retries,
        read=retries,
        connect=retries,
        backoff_factor=backoff_factor,
        status_forcelist=status_forcelist,
    )
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('http://', adapter)
    session.mount('https://', adapter)
    return session



In [25]:


# create a new df to store the data
result = pd.DataFrame(columns=[ 'lat', 'lon', 'country', 'state','country_code','address'])

# loop through each lat and lon
for i in tqdm.tqdm(range(len(location))):
    # request the data from the API
    url = 'https://geocode.maps.co/reverse?lat=' + str(location['lat'].iloc[i]) + '&lon=' + str(location['lon'].iloc[i]) 
    try:
        response = requests_retry_session().get(url)
        response.raise_for_status()
    except requests.exceptions.HTTPError as errh:
        print ("Http Error:",errh)
    except requests.exceptions.ConnectionError as errc:
        print ("Error Connecting:",errc)
    except requests.exceptions.Timeout as errt:
        print ("Timeout Error:",errt)
    except requests.exceptions.RequestException as err:
        print ("OOps: Something Else",err)
    data = response.json()
    # create a new df to store the data
    df = pd.DataFrame(columns=['lat', 'lon', 'country', 'state','country_code','address'])
    # extract time and temperature_2m_mean from the json file
    df.loc[i,'country'] = data['address']['country']
    if 'state' in data['address']:
        df.loc[i,'state'] = data['address']['state']
    else:
        df.loc[i,'state'] = ''
    df.loc[i,'country_code'] = data['address']['country_code']
    df.loc[i,'address'] = data['display_name']
    df.loc[i,'lat'] = location['lat'].iloc[i]
    df.loc[i,'lon'] = location['lon'].iloc[i]
    df.columns = ['lat', 'lon', 'country', 'state','country_code','address']
    # concat the data
    result = pd.concat([result, df], axis=0)
    # sleep for 1 second
    time.sleep(0.5)


100%|██████████| 1878/1878 [17:24<00:00,  1.80it/s]


In [30]:
import pycountry
def get_country_name(code):
    country = pycountry.countries.get(alpha_2=code)
    if country:
        return country.name
    else:
        return None

In [28]:
result['country_code'].unique()

array(['cd', 'cf', 'na', 'za', 'ss', 'bi', 'zw', 'ug', 'sd', 'mz', 'lb',
       'ke', 'et', 'so', 'ye', 'mg', 'af', 'pk', 'tj', 'zm', 'ls', 'tz',
       'mw', 'hn', 'sv', 'gt', 'ht', 'sz', 'do', 'ao', 'dj'], dtype=object)

In [31]:
# capitalise the country_code
result['country_code'] = result['country_code'].str.upper()

# apply get_country_name function to country_code column
result['country_en'] = result['country_code'].apply(get_country_name)


In [33]:
result.country_en.unique()

array(['Congo, The Democratic Republic of the',
       'Central African Republic', 'Namibia', 'South Africa',
       'South Sudan', 'Burundi', 'Zimbabwe', 'Uganda', 'Sudan',
       'Mozambique', 'Lebanon', 'Kenya', 'Ethiopia', 'Somalia', 'Yemen',
       'Madagascar', 'Afghanistan', 'Pakistan', 'Tajikistan', 'Zambia',
       'Lesotho', 'Tanzania, United Republic of', 'Malawi', 'Honduras',
       'El Salvador', 'Guatemala', 'Haiti', 'Eswatini',
       'Dominican Republic', 'Angola', 'Djibouti'], dtype=object)

In [36]:
# see columns of df
df_new.columns

Index(['date', 'event_count_battles', 'event_count_explosions',
       'event_count_violence', 'fatalities_battles', 'fatalities_explosions',
       'fatalities_violence', 'GOSIF_GPP', 'rainfall_chirps', 'GOSIF_GPP_SD',
       'lon', 'lat', 'temperature_2m_mean', 'temperature_2m_mean_sd',
       'shortwave_radiation_sum', 'shortwave_radiation_sum_sd',
       'precipitation_sum', 'precipitation_sum_sd',
       'nearest_neighbor_distance_x', 'market_lat', 'market_lon',
       'price_index', 'estimated_population', 'title', 'overall_phase',
       'country', 'phase3_worse_population', 'phase3_worse_percentage',
       'phase1_population', 'phase1_percent', 'phase2_population',
       'phase2_percent', 'phase3_population', 'phase3_percent',
       'phase4_population', 'phase4_percent', 'phase5_population',
       'phase5_percent', 'phase3_worse_percentage_manual',
       'phase3_plus_phase4', 'phase2_worse_percentage_manual',
       'phase2_plus_phase3', 'phase2_plus_phase3_plus_phase4',
 

In [37]:
# rename nearest_neigbor_distance_x to distance, drop title and country
df_new = df_new.drop(['title', 'country'], axis=1)

In [38]:
# merge df_new and result, add indicator column
df_new = pd.merge(df_new, result, how='left', on=['lat', 'lon'], indicator=True)

In [39]:
df_new._merge.value_counts()

_merge
both          38776
left_only         0
right_only        0
Name: count, dtype: int64

In [40]:
# drop _merge
df_new = df_new.drop(['_merge'], axis=1)

In [42]:
# set display column to 100
pd.set_option('display.max_columns', 100)

In [44]:
df_new.head()

Unnamed: 0,date,event_count_battles,event_count_explosions,event_count_violence,fatalities_battles,fatalities_explosions,fatalities_violence,GOSIF_GPP,rainfall_chirps,GOSIF_GPP_SD,lon,lat,temperature_2m_mean,temperature_2m_mean_sd,shortwave_radiation_sum,shortwave_radiation_sum_sd,precipitation_sum,precipitation_sum_sd,nearest_neighbor_distance_x,market_lat,market_lon,price_index,estimated_population,overall_phase,phase3_worse_population,phase3_worse_percentage,phase1_population,phase1_percent,phase2_population,phase2_percent,phase3_population,phase3_percent,phase4_population,phase4_percent,phase5_population,phase5_percent,phase3_worse_percentage_manual,phase3_plus_phase4,phase2_worse_percentage_manual,phase2_plus_phase3,phase2_plus_phase3_plus_phase4,soil_moisture_0_to_7cm_mean,soil_moisture_0_to_7cm_sd,elevation,country,state,country_code,address,country_en
0,2017-01-01,1.0,0.0,0.0,5.0,0.0,0.0,22458.920383,109.417561,1198.248826,14.070903,-5.533893,26.122581,0.80445,,,4.23871,5.574925,379.143546,-8.84,13.23,130.034171,,,,,,,,,,,,,,,,,,,,0.451499,0.02781,283.0,République démocratique du Congo,Kongo-Central,CD,"Songololo, Kongo-Central, République démocrati...","Congo, The Democratic Republic of the"
1,2017-01-01,1.0,0.0,0.0,5.0,0.0,0.0,22200.09091,111.121122,1175.474359,14.068317,-5.493558,25.922581,0.80445,17.102903,3.211451,4.23871,5.574925,383.42592,-8.84,13.23,130.034171,,,,,,,,,,,,,,,,,,,,0.451499,0.02781,332.0,République démocratique du Congo,Kongo-Central,CD,"Kwanza, Songololo, Kongo-Central, République d...","Congo, The Democratic Republic of the"
2,2017-01-01,1.0,0.0,0.0,5.0,0.0,0.0,22218.324112,138.828427,1342.802136,14.314904,-5.289988,26.306452,0.817286,17.347742,3.216938,4.277419,5.246822,412.492874,-8.84,13.23,130.034171,,,,,,,,,,,,,,,,,,,,0.459875,0.025151,293.0,République démocratique du Congo,Kongo-Central,CD,"Songololo, Kongo-Central, République démocrati...","Congo, The Democratic Republic of the"
3,2017-01-01,2.0,0.0,0.0,4.0,0.0,0.0,4970.260394,0.229789,1425.793103,15.778695,6.765411,25.380645,1.135318,22.995806,1.00494,0.0,0.0,203.212588,8.57,16.07,53.106102,,,,,,,,,,,,,,,,,,,,0.165539,0.000659,886.0,Ködörösêse tî Bêafrîka - République Centrafric...,Ouham-Pendé,CF,"Ouham-Pendé, Ködörösêse tî Bêafrîka - Républiq...",Central African Republic
4,2017-01-01,1.0,0.0,0.0,3.0,0.0,0.0,4307.993754,0.134368,1467.055556,16.437358,7.248169,26.880645,1.486252,22.840968,0.772474,0.0,0.0,152.447655,8.57,16.07,53.106102,,,,,,,,,,,,,,,,,,,,0.139403,0.000695,602.0,Ködörösêse tî Bêafrîka - République Centrafric...,Ouham-Pendé,CF,"Paoua, Ouham-Pendé, Ködörösêse tî Bêafrîka - R...",Central African Republic


In [46]:
df_new.columns

Index(['date', 'event_count_battles', 'event_count_explosions',
       'event_count_violence', 'fatalities_battles', 'fatalities_explosions',
       'fatalities_violence', 'GOSIF_GPP', 'rainfall_chirps', 'GOSIF_GPP_SD',
       'lon', 'lat', 'temperature_2m_mean', 'temperature_2m_mean_sd',
       'shortwave_radiation_sum', 'shortwave_radiation_sum_sd',
       'precipitation_sum', 'precipitation_sum_sd',
       'nearest_neighbor_distance_x', 'market_lat', 'market_lon',
       'price_index', 'estimated_population', 'overall_phase',
       'phase3_worse_population', 'phase3_worse_percentage',
       'phase1_population', 'phase1_percent', 'phase2_population',
       'phase2_percent', 'phase3_population', 'phase3_percent',
       'phase4_population', 'phase4_percent', 'phase5_population',
       'phase5_percent', 'phase3_worse_percentage_manual',
       'phase3_plus_phase4', 'phase2_worse_percentage_manual',
       'phase2_plus_phase3', 'phase2_plus_phase3_plus_phase4',
       'soil_moisture

In [48]:
# reorder the columns
df_new = df_new.rename(columns={'nearest_neighbor_distance_x': 'distance'})
df_new = df_new[['date','country','country_en','state','address','country_code','lon', 'lat','overall_phase',
       'phase3_worse_population', 'phase3_worse_percentage',
       'phase1_population', 'phase1_percent', 'phase2_population',
       'phase2_percent', 'phase3_population', 'phase3_percent',
       'phase4_population', 'phase4_percent', 'phase5_population',
       'phase5_percent', 'phase3_worse_percentage_manual',
       'phase3_plus_phase4', 'phase2_worse_percentage_manual',
       'phase2_plus_phase3', 'phase2_plus_phase3_plus_phase4','event_count_battles', 'event_count_explosions',
       'event_count_violence', 'fatalities_battles', 'fatalities_explosions',
       'fatalities_violence', 'GOSIF_GPP', 'rainfall_chirps', 'GOSIF_GPP_SD',
       'temperature_2m_mean', 'temperature_2m_mean_sd',
       'shortwave_radiation_sum', 'shortwave_radiation_sum_sd',
       'precipitation_sum', 'precipitation_sum_sd','soil_moisture_0_to_7cm_mean', 'soil_moisture_0_to_7cm_sd', 'elevation','distance', 'market_lat', 'market_lon',
       'price_index', 'estimated_population']]


In [49]:
df_new

Unnamed: 0,date,country,country_en,state,address,country_code,lon,lat,overall_phase,phase3_worse_population,phase3_worse_percentage,phase1_population,phase1_percent,phase2_population,phase2_percent,phase3_population,phase3_percent,phase4_population,phase4_percent,phase5_population,phase5_percent,phase3_worse_percentage_manual,phase3_plus_phase4,phase2_worse_percentage_manual,phase2_plus_phase3,phase2_plus_phase3_plus_phase4,event_count_battles,event_count_explosions,event_count_violence,fatalities_battles,fatalities_explosions,fatalities_violence,GOSIF_GPP,rainfall_chirps,GOSIF_GPP_SD,temperature_2m_mean,temperature_2m_mean_sd,shortwave_radiation_sum,shortwave_radiation_sum_sd,precipitation_sum,precipitation_sum_sd,soil_moisture_0_to_7cm_mean,soil_moisture_0_to_7cm_sd,elevation,distance,market_lat,market_lon,price_index,estimated_population
0,2017-01-01,République démocratique du Congo,"Congo, The Democratic Republic of the",Kongo-Central,"Songololo, Kongo-Central, République démocrati...",CD,14.070903,-5.533893,,,,,,,,,,,,,,,,,,,1.0,0.0,0.0,5.0,0.0,0.0,22458.920383,109.417561,1198.248826,26.122581,0.804450,,,4.238710,5.574925,0.451499,0.027810,283.0,379.143546,-8.84,13.23,130.034171,
1,2017-01-01,République démocratique du Congo,"Congo, The Democratic Republic of the",Kongo-Central,"Kwanza, Songololo, Kongo-Central, République d...",CD,14.068317,-5.493558,,,,,,,,,,,,,,,,,,,1.0,0.0,0.0,5.0,0.0,0.0,22200.090910,111.121122,1175.474359,25.922581,0.804450,17.102903,3.211451,4.238710,5.574925,0.451499,0.027810,332.0,383.425920,-8.84,13.23,130.034171,
2,2017-01-01,République démocratique du Congo,"Congo, The Democratic Republic of the",Kongo-Central,"Songololo, Kongo-Central, République démocrati...",CD,14.314904,-5.289988,,,,,,,,,,,,,,,,,,,1.0,0.0,0.0,5.0,0.0,0.0,22218.324112,138.828427,1342.802136,26.306452,0.817286,17.347742,3.216938,4.277419,5.246822,0.459875,0.025151,293.0,412.492874,-8.84,13.23,130.034171,
3,2017-01-01,Ködörösêse tî Bêafrîka - République Centrafric...,Central African Republic,Ouham-Pendé,"Ouham-Pendé, Ködörösêse tî Bêafrîka - Républiq...",CF,15.778695,6.765411,,,,,,,,,,,,,,,,,,,2.0,0.0,0.0,4.0,0.0,0.0,4970.260394,0.229789,1425.793103,25.380645,1.135318,22.995806,1.004940,0.000000,0.000000,0.165539,0.000659,886.0,203.212588,8.57,16.07,53.106102,
4,2017-01-01,Ködörösêse tî Bêafrîka - République Centrafric...,Central African Republic,Ouham-Pendé,"Paoua, Ouham-Pendé, Ködörösêse tî Bêafrîka - R...",CF,16.437358,7.248169,,,,,,,,,,,,,,,,,,,1.0,0.0,0.0,3.0,0.0,0.0,4307.993754,0.134368,1467.055556,26.880645,1.486252,22.840968,0.772474,0.000000,0.000000,0.139403,0.000695,602.0,152.447655,8.57,16.07,53.106102,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38771,2022-12-01,Тоҷикистон,Tajikistan,Вилояти Мухтори Кӯҳистони Бадахшон,"РБ06, Қозидеҳ, Ноҳияи Ишкошим, Вилояти Мухтори...",TJ,71.456211,37.03783,,,,,,,,,,,,,,,,,,,0.0,1.0,0.0,0.0,3.0,0.0,784.389728,10.718039,1692.512253,-10.670968,3.213741,10.735161,1.558302,0.622581,1.295302,0.235483,0.006302,2625.0,52.205131,37.50,71.56,92.798223,
38772,2022-12-01,افغانستان,Afghanistan,کونړ ولايت,"ناړی, ناړۍ ولسوالۍ, کونړ ولايت, افغانستان",AF,71.487215,35.171917,,,,,,,,,,,,,,,,,,,1.0,0.0,0.0,0.0,0.0,0.0,1252.600815,13.004443,1651.941176,6.951613,1.597054,10.958387,1.877639,0.296774,1.295244,0.283281,0.011201,1163.0,130.400744,34.00,71.54,106.911393,
38773,2022-12-01,پاکستان,Pakistan,خیبر پښتونخوا,"خار, باجوړ, خیبر پښتونخوا, پاکستان",PK,71.50496,34.752987,,,,,,,,,,,,,,,,,,,1.0,1.0,1.0,1.0,0.0,1.0,1416.164120,9.017693,1422.424242,9.309677,1.784630,11.210000,1.876703,0.238710,0.872421,0.214188,0.012498,926.0,83.790062,34.00,71.54,106.911393,
38774,2022-12-01,پاکستان,Pakistan,خیبر پښتونخوا,"خار, باجوړ, خیبر پښتونخوا, پاکستان",PK,71.504927,34.753595,,,,,,,,,,,,,,,,,,,1.0,1.0,1.0,1.0,0.0,1.0,1467.059042,9.080978,1422.424242,9.309677,1.784630,11.210000,1.876703,0.238710,0.872421,0.249997,0.013454,911.0,83.857774,34.00,71.54,106.911393,


In [50]:
# save as csv
df_new.to_csv('new_merged_dataset.csv', index=False)