In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import matplotlib as mpl
import cartopy.crs as ccrs
import cartopy

import geopandas
import cartopy.io.shapereader as shpreader
import shapely.vectorized

import seaborn as sns

In [2]:
ipcc_regions = geopandas.read_file("../data/referenceRegions.dbf")
ipcc_regions.head()

Unnamed: 0,NAME,LAB,USAGE,geometry
0,Alaska/N.W. Canada [ALA:1],ALA,land,"POLYGON ((-105.00000 60.00000, -168.00000 60.0..."
1,Amazon [AMZ:7],AMZ,land,"POLYGON ((-66.40000 -20.00000, -79.70000 -1.20..."
2,Central America/Mexico [CAM:6],CAM,land,"POLYGON ((-68.80000 11.40000, -79.70000 -1.200..."
3,small islands regions Caribbean,CAR*,all,"POLYGON ((-68.80000 11.40000, -85.80000 25.000..."
4,Central Asia [CAS:20],CAS,land,"POLYGON ((60.00000 30.00000, 60.00000 50.00000..."


In [3]:
place_gridcells = pd.read_csv("../data/study_gridcell_all_2.5.csv")
place_gridcells.head()

Unnamed: 0,ndf_id,doc_id
0,7574.0,486888.0
1,7574.0,3323227.0
2,7574.0,1358994.0
3,7574.0,2356292.0
4,7574.0,1552852.0


In [25]:
degrees = 2.5
tdf = pd.read_csv(f'../data/study_da_6 - Temperature - upper_pred_{degrees}.csv')
tdf["da_var"] = "Temperature"
pdf = pd.read_csv(f'../data/study_da_6 - Precipitation - upper_pred_{degrees}.csv')
pdf["da_var"] = "Precipitation"

dadf = pd.concat([tdf,pdf])

dadf = dadf[pd.notna(dadf["gridcells"]) & dadf["gridcells"]>0]

dadf["da_trend_p"] = dadf["da_trend_cells"] / dadf["gridcells"]

dadf["da_trend_cat"] = None

dadf.loc[dadf['da_trend_p']==0,"da_trend_cat"] = "0==DA"
dadf.loc[dadf['da_data_cells']==0,"da_trend_cat"] = "NA"
dadf.loc[dadf['da_trend_p']>0,"da_trend_cat"] = "0<DA<0.5"
dadf.loc[dadf['da_trend_p']>0.5,"da_trend_cat"] = "DA>0.5"

places =  pd.read_csv('../data/place_df.csv')

In [24]:
dadf.columns


Index(['Unnamed: 0', 'id', 'content', 'title', 'wosarticle__de',
       'wosarticle__wc', 'ar5', 'seen', 'relevant', 'random_sample',
       ...
       '2 - 2.5. Detection of a regional climate trend (no attribution) - lower_pred',
       '2 - 2.5. Detection of a regional climate trend (no attribution) - upper_pred',
       'df_da', 'gridcells', 'da_trend_cells', 'da_data_cells', 'feature_type',
       'da_var', 'da_trend_p', 'da_trend_cat'],
      dtype='object', length=303)

In [10]:
cat_df = pd.read_csv('../data/1_predicted_category_documents.csv')
predictions = pd.read_csv('../data/1_document_relevance.csv')
df = df.merge(cat_df.merge(predictions), how="outer")

pred_cats = [c for c  in df.columns if "12 - " in c and " - mean_prediction" in c]
for c in pred_cats:
    print(df[c].sum())
    label = c.replace(" - mean_prediction",'')
    cs = [c, c.replace('mean_prediction','lower_pred'), c.replace('mean_prediction','upper_pred')]
    df.loc[df[label]==1,cs] = 1
    print(df[c].sum())
print(pred_cats)

pred_cats = [
    "12 - Terrestrial ES - mean_prediction",
    "12 - Coastal and marine Ecosystems - mean_prediction",
    "12 - Mountains, snow and ice - mean_prediction",
    "12 - Rivers, lakes, and soil moisture - mean_prediction",
    "12 - Human and managed - mean_prediction",
    "12 - Total"
]

pcols = [
    '0 - relevance - mean_prediction',
    '0 - relevance - lower_pred',
    '0 - relevance - upper_pred'
]

df.loc[df['relevant']==1,pcols]=1



12695.1
12868.1
10146.699999999997
10395.699999999997
5095.2
5254.2
13164.1
13456.1
32501.100000000002
32943.100000000006
['12 - Coastal and marine Ecosystems - mean_prediction', '12 - Human and managed - mean_prediction', '12 - Mountains, snow and ice - mean_prediction', '12 - Rivers, lakes, and soil moisture - mean_prediction', '12 - Terrestrial ES - mean_prediction']


In [11]:
ndf = pd.read_csv("../data/gridcell_studies_all_2.5.csv")

ndf.loc[ndf['LON']>180,"LON"]-=360
ndf['ipccreg'] = 0

#df = pd.DataFrame(columns=['IPCC region', 'Documents'])
index = pd.Index(ipcc_regions.NAME, name="IPCC Region")

table = pd.DataFrame(columns=['Documents'], index=index)

for i, row in ipcc_regions.iterrows():
    inplace = shapely.vectorized.contains(row.geometry,ndf['LON'],ndf['LAT'])
    idx = np.argwhere(inplace==True)
    
    ndf.loc[idx[:,0],"ipccreg"] = i+1
    dids = place_gridcells[place_gridcells['ndf_id'].isin(idx)]['doc_id'].unique()
    
    mid = df[(df['id'].isin(dids)) & (df["0 - relevance - mean_prediction"]>=0.5) ].shape[0]
    low = df[(df['id'].isin(dids)) & (df["0 - relevance - lower_pred"]>=0.5) ].shape[0]
    high = df[(df['id'].isin(dids)) & (df["0 - relevance - upper_pred"]>=0.5) ].shape[0]
    
    table.loc[row.NAME,"Documents"] = f"{mid} ({low}-{high})"
    
    break



In [7]:
ndf = pd.read_csv("../data/gridcell_studies_all_2.5.csv")

ndf.loc[ndf['LON']>180,"LON"]-=360
ndf['ipccreg'] = 0


index = pd.MultiIndex.from_product([ipcc_regions.NAME, [x.split(' - ')[1] for x in pred_cats]], names=['IPCC Region', 'Impact'])

table = pd.DataFrame(columns=['Documents'], index=index)

for i, row in ipcc_regions.iterrows():
    inplace = shapely.vectorized.contains(row.geometry,ndf['LON'],ndf['LAT'])
    idx = np.argwhere(inplace==True)
    
    ndf.loc[idx[:,0],"ipccreg"] = i+1
    dids = place_gridcells[place_gridcells['ndf_id'].isin(idx)]['doc_id'].unique()
    
    for j, pc in enumerate(pred_cats):
        
        if "Total" in pc:
            mid = df[(df['id'].isin(dids)) & (df["0 - relevance - mean_prediction"]>=0.5) ].shape[0]
            low = df[(df['id'].isin(dids)) & (df["0 - relevance - lower_pred"]>=0.5) ].shape[0]
            high = df[(df['id'].isin(dids)) & (df["0 - relevance - upper_pred"]>=0.5) ].shape[0]            
        else:
            mid = df[
                (df['id'].isin(dids)) & 
                (df["0 - relevance - mean_prediction"]>=0.5) &
                (df[pc] >= 0.5)
            ].shape[0]
            low = df[
                (df['id'].isin(dids)) & 
                (df["0 - relevance - lower_pred"]>=0.5) &
                (df[pc.replace('mean_prediction','lower_pred')]>=0.5)
            ].shape[0]
            high = df[
                (df['id'].isin(dids)) & 
                (df["0 - relevance - upper_pred"]>=0.5) &
                (df[pc.replace('mean_prediction','upper_pred')]>=0.5)
            ].shape[0]
    
        table.loc[(row.NAME,pc.split(' - ')[1]),"Documents"] = f"{mid} ({low}-{high})"

print(table.shape)
table.head(10)

(198, 1)


Unnamed: 0_level_0,Unnamed: 1_level_0,Documents
IPCC Region,Impact,Unnamed: 2_level_1
Alaska/N.W. Canada [ALA:1],Terrestrial ES,1813 (1461-2202)
Alaska/N.W. Canada [ALA:1],Coastal and marine Ecosystems,367 (258-494)
Alaska/N.W. Canada [ALA:1],"Mountains, snow and ice",541 (431-649)
Alaska/N.W. Canada [ALA:1],"Rivers, lakes, and soil moisture",496 (354-612)
Alaska/N.W. Canada [ALA:1],Human and managed,190 (130-254)
Alaska/N.W. Canada [ALA:1],Total,4269 (3634-4958)
Amazon [AMZ:7],Terrestrial ES,56 (37-66)
Amazon [AMZ:7],Coastal and marine Ecosystems,318 (227-406)
Amazon [AMZ:7],"Mountains, snow and ice",31 (23-34)
Amazon [AMZ:7],"Rivers, lakes, and soil moisture",81 (37-103)


In [30]:
ndf = pd.read_csv("../data/gridcell_studies_all_2.5.csv")

ndf.loc[ndf['LON']>180,"LON"]-=360
ndf['ipccreg'] = 0

#df = pd.DataFrame(columns=['IPCC region', 'Documents'])
index = pd.Index(ipcc_regions.NAME, name="IPCC Region")

table = pd.DataFrame(columns=['D&A Trend', "nonD&A Trend", "NAD&A Trend", "Sensitivity","Detection"], index=index)

for i, row in ipcc_regions.iterrows():
    inplace = shapely.vectorized.contains(row.geometry,ndf['LON'],ndf['LAT'])
    idx = np.argwhere(inplace==True)
    
    ndf.loc[idx[:,0],"ipccreg"] = i+1
    dids = place_gridcells[place_gridcells['ndf_id'].isin(idx)]['doc_id'].unique()
    
    midids = df[(df['id'].isin(dids)) & (df["0 - relevance - mean_prediction"]>=0.5) ]['id']
    lowids = df[(df['id'].isin(dids)) & (df["0 - relevance - lower_pred"]>=0.5) ]['id']
    highids = df[(df['id'].isin(dids)) & (df["0 - relevance - upper_pred"]>=0.5) ]['id']
    
    #D&A Trend
    sub_dadf = dadf[dadf['da_trend_cat']=='DA>0.5']
    mid = sub_dadf[sub_dadf['id'].isin(midids)].shape[0]
    low = sub_dadf[sub_dadf['id'].isin(lowids)].shape[0]
    high = sub_dadf[sub_dadf['id'].isin(highids)].shape[0]
    
    table.loc[row.NAME,"D&A Trend"] = f"{mid} ({low}-{high})"
    
    #NO D&A Trend
    sub_dadf = dadf[dadf['da_trend_cat']=='0==DA']
    mid = sub_dadf[sub_dadf['id'].isin(midids)].shape[0]
    low = sub_dadf[sub_dadf['id'].isin(lowids)].shape[0]
    high = sub_dadf[sub_dadf['id'].isin(highids)].shape[0]
    
    table.loc[row.NAME,"nonD&A Trend"] = f"{mid} ({low}-{high})"
    
    #NO D&A Trend
    sub_dadf = dadf[dadf['da_trend_cat']=='NA']
    mid = sub_dadf[sub_dadf['id'].isin(midids)].shape[0]
    low = sub_dadf[sub_dadf['id'].isin(lowids)].shape[0]
    high = sub_dadf[sub_dadf['id'].isin(highids)].shape[0]
    
    table.loc[row.NAME,"NAD&A Trend"] = f"{mid} ({low}-{high})"
    
    #NO D&A Trend
    sub_dadf = dadf[dadf['da_trend_cat']=='NA']
    mid = sub_dadf[sub_dadf['id'].isin(midids)].shape[0]
    low = sub_dadf[sub_dadf['id'].isin(lowids)].shape[0]
    high = sub_dadf[sub_dadf['id'].isin(highids)].shape[0]
    
    table.loc[row.NAME,"NAD&A Trend"] = f"{mid} ({low}-{high})"
    
    #break
    
table.head(10)




Unnamed: 0_level_0,D&A Trend,nonD&A Trend,NAD&A Trend,Sensitivity,Detection
IPCC Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Alaska/N.W. Canada [ALA:1],454 (399-513),651 (569-726),86 (71-96),,
Amazon [AMZ:7],268 (231-293),126 (109-149),16 (14-19),,
Central America/Mexico [CAM:6],313 (274-344),205 (178-239),32 (29-34),,
small islands regions Caribbean,73 (60-76),93 (79-110),13 (10-15),,
Central Asia [CAS:20],530 (462-600),315 (277-360),28 (24-30),,
Central Europe [CEU:12],816 (719-911),535 (465-612),74 (59-81),,
Canada/Greenland/Iceland [CGI:2],1162 (1030-1297),673 (593-767),123 (105-139),,
Central North America [CNA:4],340 (294-385),339 (291-399),29 (21-32),,
East Africa [EAF:16],640 (546-732),390 (330-448),82 (68-96),,
East Asia [EAS:22],134 (117-150),175 (148-206),25 (20-26),,


In [27]:
dadf.da_trend_cat.unique()

array(['0==DA', 'DA>0.5', '0<DA<0.5', 'NA'], dtype=object)

In [9]:
ndf = pd.read_csv("../data/gridcell_studies_all_2.5.csv")
ndf[ndf['index']==7574].head()

Unnamed: 0,LAT,LON,n_studies,LAT_25,LON_25,da_cat,index,n_study_prop
7674,43.75,286.25,333.0,42.5,287.5,0.0,7574,137.268438


In [7]:
place_gridcells = pd.read_csv("../data/study_gridcell_all_2.5.csv")
place_gridcells.head()

Unnamed: 0,ndf_id,doc_id
0,7574.0,486888.0
1,7574.0,3323227.0
2,7574.0,1358994.0
3,7574.0,2356292.0
4,7574.0,1552852.0


In [17]:
ipcc_regions

Unnamed: 0,Alaska/N.W. Canada [ALA:1],ALA,1,land,-105.0 60.0,-168.0 60.0,-168.0 72.6,-105.0 72.6,Unnamed: 8,Unnamed: 9
0,Amazon [AMZ:7],AMZ,7.0,land,-66.4 -20.0,-79.7 -1.2,-68.8 11.4,-50.0 11.4,-50.0 -20.0,
1,Central America/Mexico [CAM:6],CAM,6.0,land,-68.8 11.4,-79.7 -1.2,-118.3 28.6,-90.3 28.6,,
2,small islands regions Caribbean,CAR*,,all,-68.8 11.4,-85.8 25.0,-60.0 25.0,-60.0 11.4,,
3,Central Asia [CAS:20],CAS,20.0,land,60.0 30.0,60.0 50.0,75.0 50.0,75.0 30.0,,
4,Central Europe [CEU:12],CEU,12.0,land,-10.0 45.0,-10.0 48.0,40.0 61.3,40.0 45.0,,
5,Canada/Greenland/Iceland [CGI:2],CGI,2.0,land,-10.0 50.0,-105.0 50.0,-105.0 85.0,-10.0 85.0,,
6,Central North America [CNA:4],CNA,4.0,land,-85.0 50.0,-85.0 28.6,-105.0 28.6,-105.0 50.0,,
7,East Africa [EAF:16],EAF,16.0,land,25.0 -11.4,25.0 15.0,52.0 15.0,52.0 -11.4,,
8,East Asia [EAS:22],EAS,22.0,land,100.0 20.0,100.0 50.0,145.0 50.0,145.0 20.0,,
9,East North America [ENA:5],ENA,5.0,land,-60.0 25.0,-85.0 25.0,-85.0 50.0,-60.0 50.0,,


In [16]:
cat_df[cat_df['id']==486888]['content'].values[0]

'Three conifer species grown in plantations in the southeastern Adirondack Mountains of New York were chosen to model tree growth. Annual growth of trees was decomposed into several components that reflect various intrinsic or extrinsic factors. Growth signals indicative of climatic effects were used to construct growth-climate models using both multivariate regression analysis and Kalman filter methods. Two growth models were used to simulate tree growth response to future climate change projected by GCMs. The consistent results of both models indicate that different conifer species have individualistic growth responses to future climatic change. The response behaviors of trees are affected greatly by local stand conditions and species tolerance to drought.'