## Sample TCFD Report Generation
This notebook demonstrates fetching Sust Risk Exposure data via API, and converting it into summary exposures for use in TCFD and other climate-related financial disclosures. 

For more information on the hazards included and the summarization thresholds, see the [Data Guide](https://developers.sustglobal.com/dataguide.html#summarization-labeling)


In [74]:
import requests
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, shape, Polygon
from IPython.display import display, HTML
import contextily as ctx
import matplotlib.pyplot as plt
import folium, matplotlib
import os

display_df = lambda df: display(HTML(df.to_html()))

SUST_API_KEY = os.environ.get("SUST_API_KEY")
PORTFOLIO = "Apple_Supply_Chain"
PROJECT = "sust-sales-DEMO"

DATA_GUIDE_THRESHOLDS = {
    'wildfire': {
        'low': 0.01,
        'mid': 0.05
    },
    'cyclone': {
        'low': 0.025,
        'mid': 0.075
    },
    'flood_potential': {
        'low': 0.01,
        'mid': 0.05
    },
    'water_stress': {
        'low': 0.3,
        'mid': 0.6
    },
    'heatwave': {
        'low': 30,
        'mid': 50
    },
}

endpoints = {
    "portfolios": f"https://explorer.sustglobal.io/api/portfolios/",
    "assets"    : f"https://explorer.sustglobal.io/api/portfolios/{PORTFOLIO}/assets?rows=200",
    "summary"   : f"https://explorer.sustglobal.io/api/portfolios/{PORTFOLIO}/datasets/physical/summary?rows=200",
    "physical"  : f"https://explorer.sustglobal.io/api/portfolios/{PORTFOLIO}/datasets/physical/items?rows=200"
}



def get_endpoint_as_df(endpoint_id, params={}, fetch_args={"print_pages": False}):
    
    headers = {
        "X-SustGlobal-APIKey": SUST_API_KEY, 
        "X-SustGlobal-Project": PROJECT
    }
    
    def fetch_with_pagination(endpoint_id, **kwargs):
        page = 1
        buff = []
        if endpoint_id == "portfolios":
            return pd.DataFrame(requests.get(f'{endpoints[endpoint_id]}', headers=headers).json())
        while True:
            response = requests.get(f'{endpoints[endpoint_id]}&page={page}', params=params, headers=headers).json()
            if len(response) > 0:
                if kwargs["print_pages"]:
                    print(page)
                buff = buff + response
                page += 1
            else:
                return pd.DataFrame(buff)
            
    res_df = fetch_with_pagination(endpoint_id, **fetch_args)
    if endpoint_id == "assets":
        if "geometry" in res_df.columns: # Polygons
            return gpd.GeoDataFrame(res_df.copy().drop("geometry", axis=1), geometry=res_df.geometry.apply(shape), crs="EPSG:4326")
        else: # Points
            return gpd.GeoDataFrame(res_df.copy(), geometry=gpd.points_from_xy(res_df.lng, res_df.lat), crs="EPSG:4326")
    elif endpoint_id == "physical":
        return pd.concat([
            res_df.drop('risk_exposure', axis=1),
            pd.DataFrame(list(res_df.risk_exposure))
        ], axis=1)
    else:
        return res_df
    
    
def map_risk_summary_json_to_single_hazard(summary_series, hazard):
    return pd.concat([
        summary_series.drop("risk_summaries", axis=1), 
        pd.DataFrame(list(summary_series.risk_summaries.apply(lambda s: [i for i in s if i["hazard"] == hazard][0])))
    ], axis=1)


def expand_timeseries_json_to_df(timeseries):
    return pd.DataFrame(list(timeseries))


def summarize(physical_timeseries_df, window):
    '''Apply summarization logic over a timeseries dataframe'''
    column_title = f'Summary {window[0]}-{window[1]}'
    
    def max_over_window(timeseries_row):
        return timeseries_row[[str(i) for i in range(window[0], window[1])]].max()
    
    def bucket(row, thresholds=DATA_GUIDE_THRESHOLDS):
        if row[column_title] < thresholds[row['hazard']]['low']:
            return 'Low'
        if row[column_title] < thresholds[row['hazard']]['mid']:
            return 'Medium'
        return 'High'
    
    physical_timeseries_df[column_title] = physical_timeseries_df.apply(
        lambda x: max_over_window(x), axis=1
    )
    physical_timeseries_df[column_title] = physical_timeseries_df.apply(
        lambda x: bucket(x), axis=1
    )
    return physical_timeseries_df

    
        
        
    
    

### Fetch Physical Risk Data

In [112]:
wildfire = get_endpoint_as_df("physical", params={
    'hazard': 'wildfire', 'indicator': 'unified_prob', 'scenario': 'ssp585', 'measure': 'mid'
})
floods = get_endpoint_as_df("physical", params={
    'hazard': 'flood_potential', 'indicator': 'inland_flood_prob', 'scenario': 'ssp585', 'measure': 'mid'
})
cyclones = get_endpoint_as_df("physical", params={
    'hazard': 'cyclone', 'indicator': 'prob', 'scenario': 'ssp585', 'measure': 'mid'
})
water_stress = get_endpoint_as_df("physical", params={
    'hazard': 'water_stress', 'indicator': 'unified_score', 'scenario': 'ssp585', 'measure': 'mid'
})
heatwaves = get_endpoint_as_df("physical", params={
    'hazard': 'heatwave', 'indicator': 'freq', 'scenario': 'ssp585', 'measure': 'mid'
})
exposure_timeseries = pd.concat([wildfire, floods, cyclones, water_stress, heatwaves], axis=0)
exposure_timeseries

# Generate Near, Medium, and Long-term Risk Summaries

In [199]:
summary_df = exposure_timeseries.copy()

windows = (
    [2023, 2030],
    [2030, 2045],
    [2045, 2060]
)

for w in windows:
    summary_df = summarize(
        summary_df.copy(), 
        w
    )
    
def color_formatter(v):
    if v == 'Low':
        return 'background-color:white'
    if v == 'Medium':
        return "background-color:orange"
    return "background-color:red"


summary_df = summary_df[['portfolio_index', 'hazard'] + [i for i in summary_df.columns if 'Summary' in i]].pivot(
    index='portfolio_index',
    columns='hazard'
)

asset_sample = summary_df.iloc[0:10]

borders_vertical1 = {'selector': 'td.data.col4',
         'props': 'border-right: 2px solid #000066'}
borders_vertical2 = {'selector': 'td.data.col9',
         'props': 'border-right: 2px solid #000066'}
asset_sample.style \
    .set_table_styles([borders_vertical1, borders_vertical2]) \
    .applymap(color_formatter)



Unnamed: 0_level_0,Summary 2023-2030,Summary 2023-2030,Summary 2023-2030,Summary 2023-2030,Summary 2023-2030,Summary 2030-2045,Summary 2030-2045,Summary 2030-2045,Summary 2030-2045,Summary 2030-2045,Summary 2045-2060,Summary 2045-2060,Summary 2045-2060,Summary 2045-2060,Summary 2045-2060
hazard,cyclone,flood_potential,heatwave,water_stress,wildfire,cyclone,flood_potential,heatwave,water_stress,wildfire,cyclone,flood_potential,heatwave,water_stress,wildfire
portfolio_index,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
0,Medium,Low,High,Low,Medium,Medium,Low,High,Low,Medium,Medium,Low,High,Medium,Medium
1,Low,Low,Low,Low,Medium,Low,Low,Low,Low,Medium,Low,Low,Medium,Low,Medium
2,Low,Low,Low,Low,Low,Low,Low,Low,Low,Low,Low,Low,Low,Low,Low
3,Low,Low,Low,Low,Medium,Low,Low,Low,Low,Medium,Low,Low,Medium,Low,Medium
4,Low,Low,Low,Low,Low,Low,Low,Low,Low,Low,Low,Low,Medium,Low,Low
5,Low,Low,Low,Low,Low,Low,Medium,Low,Low,Low,Low,Medium,Medium,Low,Low
6,Low,Low,Low,Low,Low,Low,Low,Low,Low,Low,Low,Low,Medium,Medium,Low
7,Low,Low,Low,Medium,Low,Low,Low,Low,Medium,Low,Low,Low,Medium,Medium,Low
8,Low,Low,Low,Medium,Low,Low,Low,Low,High,Low,Low,Low,Medium,High,Low
9,Low,Low,Low,Low,Low,Low,Low,Low,Medium,Low,Low,Low,Medium,Medium,Low


## Aggregate Risk Exposure Summaries
To demonstrate most impactful hazards

In [205]:
build_df = dict()

for hazard_level in ('Medium', 'High'):

    predominant_hazards = pd.DataFrame(
        {}, 
        columns=list(set(c[0] for c in summary_df.columns)),
        index=list(set(c[1] for c in summary_df.columns))
    )
    for c in summary_df.columns:
        if hazard_level == 'Medium':
            n_assets_at_risk = summary_df[c].value_counts()['High'] + summary_df[c].value_counts()['Medium']
        else:
            n_assets_at_risk = summary_df[c].value_counts()['High']
        predominant_hazards.loc[c[1], c[0]] = n_assets_at_risk
    
    predominant_hazards = predominant_hazards[sorted(predominant_hazards.columns)]
    if hazard_level == 'Medium':
        build_df['Medium or High'] = predominant_hazards
    else:
        build_df[hazard_level] = predominant_hazards


display(pd.concat(build_df.values(), axis=1, keys=build_df.keys()))
display(pd.concat(build_df.values(), axis=1, keys=build_df.keys()) / summary_df.shape[0])

Unnamed: 0_level_0,Medium or High,Medium or High,Medium or High,High,High,High
Unnamed: 0_level_1,Summary 2023-2030,Summary 2030-2045,Summary 2045-2060,Summary 2023-2030,Summary 2030-2045,Summary 2045-2060
water_stress,641,758,795,98,161,519
wildfire,240,253,256,4,11,9
cyclone,574,590,593,121,172,203
heatwave,71,694,929,34,78,245
flood_potential,63,67,77,51,53,57


Unnamed: 0_level_0,Medium or High,Medium or High,Medium or High,High,High,High
Unnamed: 0_level_1,Summary 2023-2030,Summary 2030-2045,Summary 2045-2060,Summary 2023-2030,Summary 2030-2045,Summary 2045-2060
water_stress,0.665628,0.787124,0.825545,0.101765,0.167186,0.538941
wildfire,0.249221,0.262721,0.265836,0.004154,0.011423,0.009346
cyclone,0.596054,0.612669,0.615784,0.125649,0.178609,0.2108
heatwave,0.073728,0.720665,0.964694,0.035306,0.080997,0.254413
flood_potential,0.065421,0.069574,0.079958,0.05296,0.055036,0.05919
