In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import matplotlib as mpl
import cartopy.crs as ccrs
import cartopy

import geopandas
import cartopy.io.shapereader as shpreader
import shapely.vectorized

import seaborn as sns

In [2]:
ipcc_regions = geopandas.read_file("../data/referenceRegions.dbf")
ipcc_regions.head()

Unnamed: 0,NAME,LAB,USAGE,geometry
0,Alaska/N.W. Canada [ALA:1],ALA,land,"POLYGON ((-105.00000 60.00000, -168.00000 60.0..."
1,Amazon [AMZ:7],AMZ,land,"POLYGON ((-66.40000 -20.00000, -79.70000 -1.20..."
2,Central America/Mexico [CAM:6],CAM,land,"POLYGON ((-68.80000 11.40000, -79.70000 -1.200..."
3,small islands regions Caribbean,CAR*,all,"POLYGON ((-68.80000 11.40000, -85.80000 25.000..."
4,Central Asia [CAS:20],CAS,land,"POLYGON ((60.00000 30.00000, 60.00000 50.00000..."


In [3]:
place_gridcells = pd.read_csv("../data/study_gridcell_all_2.5.csv")
place_gridcells.head()

Unnamed: 0,ndf_id,doc_id
0,7574.0,486888.0
1,7574.0,3323227.0
2,7574.0,1358994.0
3,7574.0,2356292.0
4,7574.0,1552852.0


In [4]:
degrees = 2.5
tdf = pd.read_csv(f'../data/study_da_6 - Temperature - upper_pred_{degrees}.csv')
tdf["da_var"] = "Temperature"
pdf = pd.read_csv(f'../data/study_da_6 - Precipitation - upper_pred_{degrees}.csv')
pdf["da_var"] = "Precipitation"

dadf = pd.concat([tdf,pdf])

dadf = dadf[pd.notna(dadf["gridcells"]) & dadf["gridcells"]>0]

dadf["da_trend_p"] = dadf["da_trend_cells"] / dadf["gridcells"]

dadf["da_trend_cat"] = None

dadf.loc[dadf['da_trend_p']==0,"da_trend_cat"] = "0==DA"
dadf.loc[dadf['da_data_cells']==0,"da_trend_cat"] = "NA"
dadf.loc[dadf['da_trend_p']>0,"da_trend_cat"] = "0<DA<0.5"
dadf.loc[dadf['da_trend_p']>0.5,"da_trend_cat"] = "DA>0.5"

places =  pd.read_csv('../data/place_df.csv')

In [73]:
tdf['temperature_da'] = tdf['da_trend_cells']
pdf['precip_da'] = tdf['da_trend_cells']

dadf = tdf[['id','temperature_da']].merge(pdf[['id','precip_da']], how="outer").fillna(0)

dadf['da'] = np.max(dadf[['temperature_da','precip_da']],axis=1)
dadf = dadf[['id','da']]

dadf.head()

Unnamed: 0,id,da
0,13201,0.0
1,1544528,1.0
2,1287688,0.0
3,245321,0.0
4,1549132,0.0


In [74]:
dadf.columns


Index(['id', 'da'], dtype='object')

In [75]:
cat_df = pd.read_csv('../data/1_predicted_category_documents.csv')
predictions = pd.read_csv('../data/1_document_relevance.csv')
df = dadf.merge(cat_df.merge(predictions), how="outer")

pred_cats = [c for c  in df.columns if "12 - " in c and " - mean_prediction" in c]
for c in pred_cats:
    print(df[c].sum())
    label = c.replace(" - mean_prediction",'')
    cs = [c, c.replace('mean_prediction','lower_pred'), c.replace('mean_prediction','upper_pred')]
    df.loc[df[label]==1,cs] = 1
    print(df[c].sum())
print(pred_cats)

pred_cats = [
    "12 - Terrestrial ES - mean_prediction",
    "12 - Coastal and marine Ecosystems - mean_prediction",
    "12 - Mountains, snow and ice - mean_prediction",
    "12 - Rivers, lakes, and soil moisture - mean_prediction",
    "12 - Human and managed - mean_prediction",
    "12 - Total"
]

pcols = [
    '0 - relevance - mean_prediction',
    '0 - relevance - lower_pred',
    '0 - relevance - upper_pred'
]

df.loc[df['relevant']==1,pcols]=1



12291.100000000002
12464.100000000002
10176.9
10457.9
4585.8
4744.8
11683.5
11976.5
29838.4
30281.4
['12 - Coastal and marine Ecosystems - mean_prediction', '12 - Human and managed - mean_prediction', '12 - Mountains, snow and ice - mean_prediction', '12 - Rivers, lakes, and soil moisture - mean_prediction', '12 - Terrestrial ES - mean_prediction']


In [7]:
ndf = pd.read_csv("../data/gridcell_studies_all_2.5.csv")

ndf.loc[ndf['LON']>180,"LON"]-=360
ndf['ipccreg'] = 0

#df = pd.DataFrame(columns=['IPCC region', 'Documents'])
index = pd.Index(ipcc_regions.NAME, name="IPCC Region")

table = pd.DataFrame(columns=['Documents'], index=index)

for i, row in ipcc_regions.iterrows():
    inplace = shapely.vectorized.contains(row.geometry,ndf['LON'],ndf['LAT'])
    idx = np.argwhere(inplace==True)
    
    ndf.loc[idx[:,0],"ipccreg"] = i+1
    dids = place_gridcells[place_gridcells['ndf_id'].isin(idx)]['doc_id'].unique()
    
    mid = df[(df['id'].isin(dids)) & (df["0 - relevance - mean_prediction"]>=0.5) ].shape[0]
    low = df[(df['id'].isin(dids)) & (df["0 - relevance - lower_pred"]>=0.5) ].shape[0]
    high = df[(df['id'].isin(dids)) & (df["0 - relevance - upper_pred"]>=0.5) ].shape[0]
    
    table.loc[row.NAME,"Documents"] = f"{mid} ({low}-{high})"
    
    break



In [8]:
ndf = pd.read_csv("../data/gridcell_studies_all_2.5.csv")

ndf.loc[ndf['LON']>180,"LON"]-=360
ndf['ipccreg'] = 0


index = pd.MultiIndex.from_product([ipcc_regions.NAME, [x.split(' - ')[1] for x in pred_cats]], names=['IPCC Region', 'Impact'])

table = pd.DataFrame(columns=['Documents'], index=index)

for i, row in ipcc_regions.iterrows():
    inplace = shapely.vectorized.contains(row.geometry,ndf['LON'],ndf['LAT'])
    idx = np.argwhere(inplace==True)
    
    ndf.loc[idx[:,0],"ipccreg"] = i+1
    dids = place_gridcells[place_gridcells['ndf_id'].isin(idx)]['doc_id'].unique()
    
    for j, pc in enumerate(pred_cats):
        
        if "Total" in pc:
            mid = df[(df['id'].isin(dids)) & (df["0 - relevance - mean_prediction"]>=0.5) ].shape[0]
            low = df[(df['id'].isin(dids)) & (df["0 - relevance - lower_pred"]>=0.5) ].shape[0]
            high = df[(df['id'].isin(dids)) & (df["0 - relevance - upper_pred"]>=0.5) ].shape[0]            
        else:
            mid = df[
                (df['id'].isin(dids)) & 
                (df["0 - relevance - mean_prediction"]>=0.5) &
                (df[pc] >= 0.5)
            ].shape[0]
            low = df[
                (df['id'].isin(dids)) & 
                (df["0 - relevance - lower_pred"]>=0.5) &
                (df[pc.replace('mean_prediction','lower_pred')]>=0.5)
            ].shape[0]
            high = df[
                (df['id'].isin(dids)) & 
                (df["0 - relevance - upper_pred"]>=0.5) &
                (df[pc.replace('mean_prediction','upper_pred')]>=0.5)
            ].shape[0]
    
        table.loc[(row.NAME,pc.split(' - ')[1]),"Documents"] = f"{mid} ({low}-{high})"

print(table.shape)
table.head(10)

(198, 1)


Unnamed: 0_level_0,Unnamed: 1_level_0,Documents
IPCC Region,Impact,Unnamed: 2_level_1
Alaska/N.W. Canada [ALA:1],Terrestrial ES,1640 (1306-1953)
Alaska/N.W. Canada [ALA:1],Coastal and marine Ecosystems,366 (256-478)
Alaska/N.W. Canada [ALA:1],"Mountains, snow and ice",518 (409-615)
Alaska/N.W. Canada [ALA:1],"Rivers, lakes, and soil moisture",443 (319-553)
Alaska/N.W. Canada [ALA:1],Human and managed,183 (121-253)
Alaska/N.W. Canada [ALA:1],Total,3979 (3381-4552)
Amazon [AMZ:7],Terrestrial ES,52 (36-63)
Amazon [AMZ:7],Coastal and marine Ecosystems,315 (229-393)
Amazon [AMZ:7],"Mountains, snow and ice",32 (25-35)
Amazon [AMZ:7],"Rivers, lakes, and soil moisture",70 (38-102)


In [9]:
ndf = pd.read_csv("../data/gridcell_studies_all_2.5.csv")

ndf.loc[ndf['LON']>180,"LON"]-=360
ndf['ipccreg'] = 0

#df = pd.DataFrame(columns=['IPCC region', 'Documents'])
index = pd.Index(ipcc_regions.NAME, name="IPCC Region")

table = pd.DataFrame(columns=['D&A Trend', "nonD&A Trend", "NAD&A Trend", "Sensitivity","Detection"], index=index)

for i, row in ipcc_regions.iterrows():
    inplace = shapely.vectorized.contains(row.geometry,ndf['LON'],ndf['LAT'])
    idx = np.argwhere(inplace==True)
    
    ndf.loc[idx[:,0],"ipccreg"] = i+1
    dids = place_gridcells[place_gridcells['ndf_id'].isin(idx)]['doc_id'].unique()
    
    midids = df[(df['id'].isin(dids)) & (df["0 - relevance - mean_prediction"]>=0.5) ]['id']
    lowids = df[(df['id'].isin(dids)) & (df["0 - relevance - lower_pred"]>=0.5) ]['id']
    highids = df[(df['id'].isin(dids)) & (df["0 - relevance - upper_pred"]>=0.5) ]['id']
    
    #D&A Trend
    sub_dadf = dadf[dadf['da_trend_cat']=='DA>0.5']
    mid = sub_dadf[sub_dadf['id'].isin(midids)].shape[0]
    low = sub_dadf[sub_dadf['id'].isin(lowids)].shape[0]
    high = sub_dadf[sub_dadf['id'].isin(highids)].shape[0]
    
    table.loc[row.NAME,"D&A Trend"] = f"{mid} ({low}-{high})"
    
    #NO D&A Trend
    sub_dadf = dadf[dadf['da_trend_cat']=='0==DA']
    mid = sub_dadf[sub_dadf['id'].isin(midids)].shape[0]
    low = sub_dadf[sub_dadf['id'].isin(lowids)].shape[0]
    high = sub_dadf[sub_dadf['id'].isin(highids)].shape[0]
    
    table.loc[row.NAME,"nonD&A Trend"] = f"{mid} ({low}-{high})"
    
    #NO D&A Trend
    sub_dadf = dadf[dadf['da_trend_cat']=='NA']
    mid = sub_dadf[sub_dadf['id'].isin(midids)].shape[0]
    low = sub_dadf[sub_dadf['id'].isin(lowids)].shape[0]
    high = sub_dadf[sub_dadf['id'].isin(highids)].shape[0]
    
    table.loc[row.NAME,"NAD&A Trend"] = f"{mid} ({low}-{high})"
    
    #NO D&A Trend
    sub_dadf = dadf[dadf['da_trend_cat']=='NA']
    mid = sub_dadf[sub_dadf['id'].isin(midids)].shape[0]
    low = sub_dadf[sub_dadf['id'].isin(lowids)].shape[0]
    high = sub_dadf[sub_dadf['id'].isin(highids)].shape[0]
    
    table.loc[row.NAME,"NAD&A Trend"] = f"{mid} ({low}-{high})"
    
    #break
    
table.head(10)




Unnamed: 0_level_0,D&A Trend,nonD&A Trend,NAD&A Trend,Sensitivity,Detection
IPCC Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Alaska/N.W. Canada [ALA:1],454 (399-503),651 (569-716),86 (71-95),,
Amazon [AMZ:7],268 (231-289),126 (109-149),16 (14-19),,
Central America/Mexico [CAM:6],313 (274-340),205 (178-239),32 (29-34),,
small islands regions Caribbean,73 (60-76),93 (79-110),13 (10-15),,
Central Asia [CAS:20],529 (462-593),315 (277-357),28 (24-29),,
Central Europe [CEU:12],815 (719-898),535 (465-601),74 (59-79),,
Canada/Greenland/Iceland [CGI:2],1161 (1030-1280),673 (593-754),123 (105-137),,
Central North America [CNA:4],340 (294-379),339 (291-394),29 (21-32),,
East Africa [EAF:16],640 (546-722),390 (330-445),82 (68-96),,
East Asia [EAS:22],134 (117-149),175 (148-205),25 (20-26),,


In [10]:
dadf.da_trend_cat.unique()

array(['0==DA', 'DA>0.5', '0<DA<0.5', 'NA'], dtype=object)

In [11]:
ndf = pd.read_csv("../data/gridcell_studies_all_2.5.csv")
ndf[ndf['index']==7574].head()

Unnamed: 0,LAT,LON,n_studies,LAT_25,LON_25,da_cat,index,n_study_prop
7674,43.75,286.25,333.0,42.5,287.5,0.0,7574,137.268438


In [12]:
place_gridcells = pd.read_csv("../data/study_gridcell_all_2.5.csv")
place_gridcells.head()

Unnamed: 0,ndf_id,doc_id
0,7574.0,486888.0
1,7574.0,3323227.0
2,7574.0,1358994.0
3,7574.0,2356292.0
4,7574.0,1552852.0


In [13]:
ipcc_regions

Unnamed: 0,NAME,LAB,USAGE,geometry
0,Alaska/N.W. Canada [ALA:1],ALA,land,"POLYGON ((-105.00000 60.00000, -168.00000 60.0..."
1,Amazon [AMZ:7],AMZ,land,"POLYGON ((-66.40000 -20.00000, -79.70000 -1.20..."
2,Central America/Mexico [CAM:6],CAM,land,"POLYGON ((-68.80000 11.40000, -79.70000 -1.200..."
3,small islands regions Caribbean,CAR*,all,"POLYGON ((-68.80000 11.40000, -85.80000 25.000..."
4,Central Asia [CAS:20],CAS,land,"POLYGON ((60.00000 30.00000, 60.00000 50.00000..."
5,Central Europe [CEU:12],CEU,land,"POLYGON ((-10.00000 45.00000, -10.00000 48.000..."
6,Canada/Greenland/Iceland [CGI:2],CGI,land,"POLYGON ((-10.00000 50.00000, -105.00000 50.00..."
7,Central North America [CNA:4],CNA,land,"POLYGON ((-85.00000 50.00000, -85.00000 28.600..."
8,East Africa [EAF:16],EAF,land,"POLYGON ((25.00000 -11.40000, 25.00000 15.0000..."
9,East Asia [EAS:22],EAS,land,"POLYGON ((100.00000 20.00000, 100.00000 50.000..."


In [15]:
extra_cat_df = pd.read_csv('../data/1_predicted_category_documents_specific.csv')


Unnamed: 0.1,Unnamed: 0,id,content,title,wosarticle__de,wosarticle__wc,ar5,seen,relevant,random_sample,...,2 - Trend or climate change attribution - lower_pred,2 - Trend or climate change attribution - upper_pred,2 - 2.4. Sensitivity - mean_prediction,2 - 2.4. Sensitivity - std_prediction,2 - 2.4. Sensitivity - lower_pred,2 - 2.4. Sensitivity - upper_pred,2 - 2.5. Detection of a regional climate trend (no attribution) - mean_prediction,2 - 2.5. Detection of a regional climate trend (no attribution) - std_prediction,2 - 2.5. Detection of a regional climate trend (no attribution) - lower_pred,2 - 2.5. Detection of a regional climate trend (no attribution) - upper_pred
0,0,1298506,"In this study, we evaluated the effects of CO2...",Effects of seawater acidification on the early...,seawater acidification; Glyptocidaris crenular...,['Limnology; Oceanography'],,0.0,0.0,0,...,0.000000,0.000000,0.8,0.400000,0.4,1.000000,0.0,0.0,0.0,0.0
1,1,1294278,"Mean air temperatures and the frequency, inten...",Nest box design for a changing climate: The va...,artificial hollows; climate change; hollow-nes...,['Ecology'],,0.0,0.0,0,...,0.000000,0.000000,1.0,0.000000,1.0,1.000000,0.0,0.0,0.0,0.0
2,2,1522976,Surging glaciers are potential analogues for l...,Glacial geological studies of surge-type glaci...,Surge-type glacier; Iceland; Landsystem model;...,"['Geosciences, Multidisciplinary']",,0.0,0.0,0,...,1.000000,1.000000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0
3,3,11008,Deposits of the Last Interglacial on the south...,THE LAST INTERGLACIAL IN THE MEDITERRANEAN AS ...,,"['Geography, Physical; Geosciences, Multidisci...",,0.0,0.0,0,...,0.400000,1.000000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0
4,4,772888,Identifying patterns in the effects of tempera...,Cross-taxa generalities in the relationship be...,climate change; population size; time series; ...,['Biology; Ecology; Evolutionary Biology'],,0.0,0.0,0,...,0.000000,0.889898,0.5,0.500000,0.0,1.000000,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97117,97117,460524,Corals in the Gulf I withstand summer temperat...,Thermal tolerances of reef corals in the Gulf:...,Gulf thermal history; Temperature stress thres...,['Environmental Sciences; Marine & Freshwater ...,1.0,1.0,1.0,0,...,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0
97118,97118,2306024,A remotely operated vehicle (ROV) survey condu...,First characterisation of a Leiopathes glaberr...,deep Mediterranean Sea; Black corals; fishing ...,['Zoology'],,0.0,0.0,0,...,0.110102,1.000000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0
97119,97119,1311965,Several different factors may determine where ...,The role of migration for spatial turnover of ...,,['Ecology'],,0.0,0.0,0,...,1.000000,1.000000,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.0
97120,97120,96735,Coastal ecosystems that are characterized by k...,Diurnal fluctuations in seawater pH influence ...,ocean acidification; natural pH fluctuations; ...,['Biology; Ecology; Evolutionary Biology'],,0.0,0.0,0,...,0.000000,0.000000,1.0,0.000000,1.0,1.000000,0.0,0.0,0.0,0.0


In [76]:
merged_df = df.merge(extra_cat_df)

merged_df.head()

Unnamed: 0.1,id,da,Unnamed: 0,content,title,wosarticle__de,wosarticle__wc,ar5,seen,relevant,...,18 - Health - lower_pred,18 - Health - upper_pred,18 - Displacement and migration - mean_prediction,18 - Displacement and migration - std_prediction,18 - Displacement and migration - lower_pred,18 - Displacement and migration - upper_pred,18 - Other anthropogenic - mean_prediction,18 - Other anthropogenic - std_prediction,18 - Other anthropogenic - lower_pred,18 - Other anthropogenic - upper_pred
0,13201,0.0,57960.0,It is still difficult to confirm from availabl...,An Overview of Extreme Hot Weather Incidents a...,heatwave; extreme hot weather; natural ventila...,['Construction & Building Technology; Energy &...,,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1544528,1.0,60309.0,Trichocomaceae family encompasses the greatest...,DIVERSITY OF ISOLATED Trichocomaceae FROM SOIL...,Trichocomaceae; Pinus elliottii; Corymbia citr...,['Plant Sciences; Forestry'],,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1287688,0.0,90871.0,We use previously published and new data from ...,Major changes in the red-billed gull (Larus no...,red-billed gull; Larus novaehollandiae scopuli...,['Ornithology'],,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,245321,0.0,41115.0,This study evaluates the economic effects of c...,Evaluating the economic effects of climate cha...,Climate change; Economic effects; European sar...,['Environmental Sciences; Environmental Studies'],,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1549132,0.0,19024.0,The frog Nanorana pleskei (Dicroglossidae) is ...,Urea and plasma ice-nucleating proteins promot...,Cryoprotectants; Urea; Ice-nucleating protein;...,['Physiology; Zoology'],,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [79]:
specific_impact_cats = [x for x in merged_df.columns if "18 -" in x and "mean" in x]

specific_impact_cats = [
    '18 - Food/Agriculture - mean_prediction',
    '18 - Livelihoods and wellbeing - mean_prediction',
    '18 - Health - mean_prediction',
    '18 - Displacement and migration - mean_prediction',
]

for c in specific_impact_cats:
    print(merged_df[c].sum())
    label = c.replace(" - mean_prediction",'')
    cs = [c, c.replace('mean_prediction','lower_pred'), c.replace('mean_prediction','upper_pred')]
    merged_df.loc[merged_df[label]==1,cs] = 1
    print(merged_df[c].sum())
print(pred_cats)

specific_impact_cats

435.1
435.1
62.5
63.5
1723.4
1723.4
296.5
296.5
['12 - Terrestrial ES - mean_prediction', '12 - Coastal and marine Ecosystems - mean_prediction', '12 - Mountains, snow and ice - mean_prediction', '12 - Rivers, lakes, and soil moisture - mean_prediction', '12 - Human and managed - mean_prediction', '12 - Total']


['18 - Food/Agriculture - mean_prediction',
 '18 - Livelihoods and wellbeing - mean_prediction',
 '18 - Health - mean_prediction',
 '18 - Displacement and migration - mean_prediction']

In [24]:
from pycountry_convert import country_name_to_country_alpha3

In [44]:
country_dict = []
for x in """Algeria, Angola, Benin, Botswana, Burkina Faso, Burundi,  Cameroon, Chad, Congo, Ivory Coast, Djibouti, Egypt, Eritrea, Eswatini, Swaziland, Ethiopia, Gabon, Gambia, Ghana, Guinea, Kenya, Lesotho, Liberia, Libya, Malawi, Mali, Mauritania, Morocco, Mozambique, Namibia, Niger, Nigeria, Rwanda, Senegal, Sierra Leone, Somalia, Sudan, Tanzania, Togo, Tunisia, Uganda, Zambia, Zimbabwe""".split(', '):
    country_dict.append({"country": country_name_to_country_alpha3(x.strip()), "region": "Africa"})
    
for x in """Afghanistan, Armenia, Azerbaijan, Bahrain, Bangladesh, Bhutan, Brunei, Cambodia, China, Cyprus, Georgia, India, Indonesia, Iran, Iraq, Israel, Japan, Jordan, Kazakhstan, Kuwait, Kyrgyzstan, Laos, Lebanon, Malaysia, Mongolia, Myanmar, Nepal, South Korea, North Korea, Oman, Pakistan, Palestine, Philippines, Qatar, Russia, Saudi Arabia, Singapore, Sri Lanka, Syria, Taiwan, Tajikistan, Thailand, Turkey, Turkmenistan, United Arab Emirates, Uzbekistan, Vietnam, Yemen
""".split(', '):
    country_dict.append({"country": country_name_to_country_alpha3(x.strip()), "region": "Asia"})
    
for x in """Australia,  New Zealand""".split(', '):
    country_dict.append({"country": country_name_to_country_alpha3(x.strip()), "region": "Australasia"})
    
for x in """Belize, Costa Rica, El Salvador, Guatemala, Honduras, Nicaragua, Panama, Argentina, Bolivia, Brazil, Chile, Colombia, Ecuador, French Guiana, Guyana, Paraguay, Peru, Suriname, Uruguay, Venezuela
""".split(', '):
    country_dict.append({"country": country_name_to_country_alpha3(x.strip()), "region": "Central and South America"})
    
for x in """United States, Canada, Mexico, Greenland
""".split(', '):
    country_dict.append({"country": country_name_to_country_alpha3(x.strip()), "region": "North America"})
    
for x in """Albania, Andorra, Armenia, Austria, Azerbaijan, Belarus, Belgium, Bosnia and Herzegovina, Bulgaria, Croatia, Cyprus, Czech Republic, Denmark, Estonia, Finland, France, Georgia, Germany, Greece, Hungary, Iceland, Ireland, Italy, Kazakhstan, Kosovo, Latvia, Liechtenstein, Lithuania, Luxembourg, Malta, Moldova, Monaco, Montenegro, Netherlands, Macedonia, Norway, Poland, Portugal, Romania, Russia, San Marino, Serbia, Slovakia, Slovenia, Spain, Sweden, Switzerland, Turkey, Ukraine, United Kingdom, Vatican City
""".split(', '):
    try:
        country_dict.append({"country": country_name_to_country_alpha3(x.strip()), "region": "Europe"})
    except:
        if x.strip()=="Kosovo":
            country_dict.append({"country": "XKX", "region": "Europe"})
        elif x.strip()=="Vatican City":
            country_dict.append({"country": "VAT", "region": "Europe"})
    
for x in """Anguilla, Aruba, Antigua and Barbuda, Bahamas, Bahrain, Barbados, Bermuda, British Virgin Islands, Cayman Islands, Northern Mariana Islands, Belize, Comoros, Cuba, Dominica, Grenada, Guyana, Haiti, Jamaica, Saint Kitts and Nevis, Saint Lucia, Saint Vincent and the Grenadines, Suriname, Trinidad and Tobago, Cabo Verde, Curaçao, Comoros, Guinea, Maldives, Mauritius, São Tomé and Príncipe, Seychelles, Singapore, Cook Islands, Fiji, Kiribati, Marshall Islands, Micronesia, Nauru, Niue, Palau, Samoa, Solomon Islands, Seychelles, East Timor, Tonga, Tuvalu, Vanuatu, French Polynesia, Guadeloupe, Guam, Martinique, Montserrat, New Caledonia, Puerto Rico, Saint Martin, Turks and Caicos, U.S. Virgin Islands, Guinea-Bissau, Cabo Verde, Comoros, Madagascar, Mauritius, Sao Tome and Principe, Seychelles
""".split(', '):
    try:
        country_dict.append({"country": country_name_to_country_alpha3(x.strip()), "region": "Small Island States"})
    except:
        if x.strip()=="U.S. Virgin Islands":
            country_dict.append({"country": "VIR", "region": "Small Island States"})


In [47]:
region_df = pd.DataFrame.from_dict(country_dict)
regions = region_df.region.unique()
region_df.head()

Unnamed: 0,country,region
0,DZA,Africa
1,AGO,Africa
2,BEN,Africa
3,BWA,Africa
4,BFA,Africa


In [48]:
places = pd.read_csv('../data/place_df.csv')

In [56]:
merged_df.head()

Unnamed: 0.1,Unnamed: 0,id,content,title,wosarticle__de,wosarticle__wc,ar5,seen,relevant,random_sample,...,18 - Health - lower_pred,18 - Health - upper_pred,18 - Displacement and migration - mean_prediction,18 - Displacement and migration - std_prediction,18 - Displacement and migration - lower_pred,18 - Displacement and migration - upper_pred,18 - Other anthropogenic - mean_prediction,18 - Other anthropogenic - std_prediction,18 - Other anthropogenic - lower_pred,18 - Other anthropogenic - upper_pred
0,0,1298506,"In this study, we evaluated the effects of CO2...",Effects of seawater acidification on the early...,seawater acidification; Glyptocidaris crenular...,['Limnology; Oceanography'],,0.0,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1,1294278,"Mean air temperatures and the frequency, inten...",Nest box design for a changing climate: The va...,artificial hollows; climate change; hollow-nes...,['Ecology'],,0.0,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,1522976,Surging glaciers are potential analogues for l...,Glacial geological studies of surge-type glaci...,Surge-type glacier; Iceland; Landsystem model;...,"['Geosciences, Multidisciplinary']",,0.0,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,3,11008,Deposits of the Last Interglacial on the south...,THE LAST INTERGLACIAL IN THE MEDITERRANEAN AS ...,,"['Geography, Physical; Geosciences, Multidisci...",,0.0,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4,772888,Identifying patterns in the effects of tempera...,Cross-taxa generalities in the relationship be...,climate change; population size; time series; ...,['Biology; Ecology; Evolutionary Biology'],,0.0,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [90]:
index = pd.MultiIndex.from_product([specific_impact_cats, ["Partially attributed","Not attributed"]], names=['IPCC Region', 'Impact'])

table = pd.DataFrame(columns=regions, index=index)

for region in regions:
    countries = region_df.loc[region_df['region']==region,"country"]
    place_ids = places.loc[places['country_predicted'].isin(countries),"doc_id"]
    for impact in specific_impact_cats:
        sub_df = merged_df.loc[
            (merged_df['id'].isin(place_ids)) & 
            (merged_df[impact]>0.5)
        ]
        table.loc[(impact,"Partially attributed"),region] = sub_df[sub_df["da"]>0].shape[0]
        table.loc[(impact,"Not attributed"),region] = sub_df.shape[0] - sub_df[sub_df["da"]>0].shape[0]

                                    
table.head(10)
table.to_excel('../data/human_regions.xlsx')

In [51]:
places

Unnamed: 0,word,spans,country_predicted,country_conf,doc_id,admin1,lat,lon,country_code3,geonameid,place_name,feature_class,feature_code
0,Barents Sea,"[{'start': 329, 'end': 340}]",,0.904877,1340101,,74.00000,36.00000,,630674.0,Barents Sea,H,SEA
1,East Siberia,"[{'start': 1228, 'end': 1240}]",,0.904877,1340101,,74.00000,166.00000,,2127381.0,East Siberian Sea,H,SEA
2,Gulf Coast,"[{'start': 497, 'end': 507}]",USA,0.904877,222401,Mississippi,29.36901,-95.00565,USA,7287689.0,Gulf Coast,L,AREA
3,San Diego,"[{'start': 114, 'end': 123}]",USA,0.963741,3307272,California,33.02820,-116.77021,USA,5391832.0,San Diego County,A,ADM2
4,Alaska,"[{'start': 91, 'end': 97}]",USA,0.959621,355004,Alaska,64.00028,-150.00028,USA,5879092.0,Alaska,A,ADM1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
154884,Sea of Okhotsk,,,0.800000,2354419,,55.00000,150.00000,,2127380.0,Sea of Okhotsk,H,SEA
154885,Sea of Okhotsk,,,0.800000,169562,,55.00000,150.00000,,2127380.0,Sea of Okhotsk,H,SEA
154886,Sea of Okhotsk,,,0.800000,572968,,55.00000,150.00000,,2127380.0,Sea of Okhotsk,H,SEA
154887,Sea of Okhotsk,,,0.800000,1898289,,55.00000,150.00000,,2127380.0,Sea of Okhotsk,H,SEA
