# Deriving all KPIs related to cold/winter weather for cities

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy import stats
from sklearn.preprocessing import RobustScaler, QuantileTransformer
import rasterio
from matplotlib import colors
from scipy import stats
from tqdm import *
from pyproj import Transformer
import ast
import glob
from io import StringIO

In [None]:
path = "../input/externald/Supplementary Data/" 
vulnerability = pd.read_csv(path + "cities_updated_geo_us_2020.csv")
acct_number = vulnerability["Account Number"]
counties = (vulnerability['counties'])
counties_weights = (vulnerability['counties_weights'])
states = (vulnerability['state'])
flag_multi_counties = vulnerability['flag_multiple_counties']
match_fips = vulnerability['FIPS']
MSA = (vulnerability['MSA'])
match_fips = [ast.literal_eval(match_fips[i]) if flag_multi_counties[i] else int(match_fips[i]) for i in range(len(match_fips))]
cities = vulnerability["city_bing"]

In [None]:
cities_2020 = pd.read_csv("/kaggle/input/cdp-unlocking-climate-solutions/Cities/Cities Responses/2020_Full_Cities_Dataset.csv")

cities_disc_2020 = pd.read_csv("/kaggle/input/cdp-unlocking-climate-solutions/Cities/Cities Disclosing/2020_Cities_Disclosing_to_CDP.csv")

cities_2020_merged = pd.merge(cities_2020, cities_disc_2020, on="Account Number", how="outer", copy=False)
unique_20 = np.unique(cities_disc_2020[cities_disc_2020["Country"] == "United States of America"]["Account Number"])
cities_2020_merged_sub = cities_2020_merged[np.in1d(cities_2020_merged["Account Number"], unique_20)]

def fetch_answer_individual(account, question_number, org_type="city", year=2018, corp_res_type=None, column_number=None, row_number=None):
    if(org_type=="city"):
        df = cities_2020_merged_sub
        subset = df[df['Question Number'] == question_number]
        answer = subset[subset["Account Number"] == account]
        if((column_number is not None) and (row_number is None)):
            answer = answer[answer["Column Number"] == column_number]
        elif((column_number is not None) and (row_number is not None)):
            answer = answer[(answer["Column Number"] == column_number) & (answer["Row Number"] == row_number)]
        elif((column_number is None) and (row_number is not None)):
            answer = answer[answer["Row Number"] == row_number]
        else:
            pass
    elif(org_type=="corp" and corp_res_type=="cc"):
        df = corporations_2020
        subset = df[df['question_number'] == question_number]
        answer = subset[subset["account_number"] == account]
        if((column_number is not None) and (row_number is None)):
            answer = answer[answer["column_number"] == column_number]
        elif((column_number is not None) and (row_number is not None)):
            answer = answer[(answer["column_number"] == column_number) & (answer["row_number"] == row_number)]
        elif((column_number is None) and (row_number is not None)):
            answer = answer[answer["row_number"] == row_number]
        else:
            pass
    elif(org_type=="corp" and corp_res_type=="ws"):
        df = all_corps_cc[year]
        subset = df[df['question_number'] == question_number]
        answer = subset[subset["account_number"] == account]
    else:
        print("Something went wrong. Try again.")
    
    return answer

In [None]:
def score_hazard_risk(cdf):
    prob = scores_lo_hi[str(cdf[cdf["Column Number"] == 3.0]["Response Answer"].iloc[0])]
    mag = scores_lo_hi[str(cdf[cdf["Column Number"] == 4.0]["Response Answer"].iloc[0])]
    
    future_freq = scores_increasing_decreasing[str(cdf[cdf["Column Number"] == 8.0]["Response Answer"].iloc[0])]
    future_intensity = scores_increasing_decreasing[str(cdf[cdf["Column Number"] == 9.0]["Response Answer"].iloc[0])]
    
    future_mag = scores_lo_hi[str(cdf[cdf["Column Number"] == 10.0]["Response Answer"].iloc[0])]
    return (prob*mag) + (prob*future_freq*future_intensity*future_mag)

def return_risk_and_score_per_county(hazard_type):
    risk = []
    self_reported_score = []

    for i in range(len(vulnerability)):
        df = fetch_answer_individual(vulnerability["Account Number"].iloc[i], '2.1')
        idx = np.where(np.array(df["Response Answer"]) == hazard_type)[0]
        if(len(idx) > 0):
            risk.append(1)
            cdf = df[df["Row Number"] == df["Row Number"].iloc[idx[0]]]
            self_reported_score.append(score_hazard_risk(cdf))
        else:
            risk.append(np.nan)
            self_reported_score.append(np.nan)
    return risk, np.nan_to_num(np.array(self_reported_score))

scores_lo_hi = {
    "Low" : 1, 
    "Medium Low": 2, 
    "Medium":3,
    "Medium High":4,
    "High":5,
    "Do not know":0.5,
    "Does not currently impact the city":0.5,
    "nan":0,
}

scores_increasing_decreasing = {
    "Increasing" : 2,
    "Decreasing" : 0.5,
    "None" : 1, 
    "Do not know" : 1, 
    "Not expected to happen in the future" : 0.1,
    "nan" : 0.0
}

### Scoring survey response 

We can derive survey score using the equation:
#### (probability x magnitude) + (probability x future_frequency x future_intensity x future_mag)

In [None]:
risk_cold_wave, score_cold_wave = return_risk_and_score_per_county("Extreme cold temperature > Cold wave")
risk_cold_days, score_cold_days = return_risk_and_score_per_county("Extreme cold temperature > Extreme cold days")
risk_winter_conditions, score_winter_conditions = return_risk_and_score_per_county("Extreme cold temperature > Extreme winter conditions")
risk_heavy_snow, score_heavy_snow = return_risk_and_score_per_county("Extreme Precipitation > Heavy snow")


In [None]:
def standardize_rank(arr, direction=1):
    rank = (stats.mstats.rankdata(np.ma.masked_invalid(arr)))
    rank[rank == 0] = np.nan
    if(direction == 1):
        return rank/(np.nanmax(rank)) 
    else:
        return 1-(rank/(np.nanmax(rank)))

    
cold_wave_ranked = standardize_rank(score_cold_wave, direction=1)
cold_days_ranked = standardize_rank(score_cold_wave, direction=1)
winter_conditions_ranked = standardize_rank(score_cold_wave, direction=1)
heavy_snow_ranked = standardize_rank(score_cold_wave, direction=1)

In [None]:
fema = pd.read_csv("../input/femadisasters/DisasterDeclarationsSummaries.csv")

ct = np.char.zfill(np.array(fema['fipsCountyCode']).astype(str), 3)
st = np.char.zfill(np.array(fema['fipsStateCode']).astype(str), 2)
fema['fips'] = np.core.defchararray.add(st, ct).astype(int)

In [None]:
fema_cold_events = []
for i in range(len(match_fips)):
    if(not flag_multi_counties[i]):
        fips = int(match_fips[i])
        cdf = fema[(fema["incidentType"] == "Severe Ice Storm") | (fema["incidentType"] == "Snow") | (fema["incidentType"] == "Freezing")]
        cdf = cdf[cdf["fips"] == fips]
        fema_cold_events.append(len(cdf))
    else:
        fips = match_fips[i]
        temp = []
        for j in range(len(fips)):
            cdf = fema[(fema["incidentType"] == "Severe Ice Storm") | (fema["incidentType"] == "Snow")| (fema["incidentType"] == "Freezing")]
            cdf = cdf[cdf["fips"] == fips[j]]
            temp.append(len(cdf))
        ar = np.array(temp)
        weighted_avg = np.average(ar, weights=np.array(ast.literal_eval(counties_weights[i])))
        fema_cold_events.append(weighted_avg)

fema_cold_events_ranked = standardize_rank(fema_cold_events, direction=1)

In [None]:
str_tmin = open('../input/tmindatasetnoaa/climdiv-tmincy-v1.0.0-20201104', 'r').read()
columns = ["Code", "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
for df in pd.read_fwf(StringIO(str_tmin), header=None, chunksize=500000, names=columns,
                      converters={h:str for h in columns}):
    tmin = df

years = []
fips = []
for i in range(len(tmin)):
    fips.append(int(str(tmin["Code"].iloc[i])[0:5]))
    years.append(int(str(tmin["Code"].iloc[i])[7:11]))
    
tmin["year"] = np.array(years).astype(int)
tmin["fips"] = np.array(fips).astype(int)
state_fips = pd.read_csv("../input/statefips/state_fips.csv")
df_states = state_fips.drop([1,10])
df_states = df_states.reset_index()
df_states["index"] = np.array(df_states.index) + 1
fips_to_code = dict(zip(df_states["fips"], df_states["index"]))
fips_to_code[15] = 99 # HI
fips_to_code[11] = 99 # DC
fips_to_code[2] = 99 # AK

In [None]:
tmin_averaged = []
for i in range(len(match_fips)):
    if(not flag_multi_counties[i]):
        fips = int(match_fips[i])
        padstr = str(np.char.zfill(str(fips), width=5))
        st_code = int((padstr)[0:2])
        fips = int(str(fips_to_code[st_code]) + padstr[2:])
        cdf = tmin[(tmin['fips'] == fips) & (tmin['year'] > 2000)]
        cdf = cdf.drop(columns=["Code", "year", "fips"]).astype(np.float)  
        cdf = cdf[(cdf != 0.00) & (cdf != -9.99)]
        tmin_averaged.append(np.mean(np.min(cdf, axis=1)))
    else:
        fips = match_fips[i]
        temp = []
        for j in range(len(fips)):
            padstr = str(np.char.zfill(str(fips[j]), width=5))
            st_code = int((padstr)[0:2])
            tfips = int(str(fips_to_code[st_code]) + padstr[2:])
            cdf = tmin[(tmin['fips'] == tfips) & (tmin['year'] > 2000)]
            cdf = cdf.drop(columns=["Code", "year", "fips"]).astype(np.float) 
            cdf = cdf[(cdf != 0.00) & (cdf != -9.99)]
            temp.append(np.mean(np.min(cdf, axis=1)))
        ar = np.array(temp)
        weighted_avg = np.average(ar, weights=np.array(ast.literal_eval(counties_weights[i])))
        tmin_averaged.append(weighted_avg)

tmin_std = []
for i in range(len(match_fips)):
    if(not flag_multi_counties[i]):
        fips = int(match_fips[i])
        padstr = str(np.char.zfill(str(fips), width=5))
        st_code = int((padstr)[0:2])
        fips = int(str(fips_to_code[st_code]) + padstr[2:])
        cdf = tmin[(tmin['fips'] == fips) & (tmin['year'] > 2000)]
        cdf = cdf.drop(columns=["Code", "year", "fips"]).astype(np.float)  
        cdf = cdf[(cdf != 0.00) & (cdf != -9.99)]
        tmin_std.append(np.std(np.min(cdf, axis=1)))
    else:
        fips = match_fips[i]
        temp = []
        for j in range(len(fips)):
            padstr = str(np.char.zfill(str(fips[j]), width=5))
            st_code = int((padstr)[0:2])
            tfips = int(str(fips_to_code[st_code]) + padstr[2:])
            cdf = tmin[(tmin['fips'] == tfips) & (tmin['year'] > 2000)]
            cdf = cdf.drop(columns=["Code", "year", "fips"]).astype(np.float) 
            cdf = cdf[(cdf != 0.00) & (cdf != -9.99)]
            temp.append(np.std(np.min(cdf, axis=1)))
        ar = np.array(temp)
        weighted_avg = np.average(ar, weights=np.array(ast.literal_eval(counties_weights[i])))
        tmin_std.append(weighted_avg)
        
tmin_dt = []
for i in range(len(match_fips)):
    if(not flag_multi_counties[i]):
        fips = int(match_fips[i])
        padstr = str(np.char.zfill(str(fips), width=5))
        st_code = int((padstr)[0:2])
        fips = int(str(fips_to_code[st_code]) + padstr[2:])
        cdf = tmin[(tmin['fips'] == fips) & (tmin['year'] > 1980)]
        cdf = cdf.drop(columns=["Code", "year", "fips"]).astype(np.float)  
        cdf = cdf[(cdf != 0.00) & (cdf != -9.99)]
        tmin_dt.append(np.mean(np.diff(np.max(cdf, axis=1))))
    else:
        fips = match_fips[i]
        temp = []
        for j in range(len(fips)):
            padstr = str(np.char.zfill(str(fips[j]), width=5))
            st_code = int((padstr)[0:2])
            tfips = int(str(fips_to_code[st_code]) + padstr[2:])
            cdf = tmin[(tmin['fips'] == tfips) & (tmin['year'] > 1980)]
            cdf = cdf.drop(columns=["Code", "year", "fips"]).astype(np.float) 
            cdf = cdf[(cdf != 0.00) & (cdf != -9.99)]
            temp.append(np.mean(np.diff(np.max(cdf, axis=1))))
        ar = np.array(temp)
        weighted_avg = np.average(ar, weights=np.array(ast.literal_eval(counties_weights[i])))
        tmin_dt.append(weighted_avg)


In [None]:
tmin_averaged_ranked = standardize_rank(tmin_averaged, 1)
tmin_std_ranked = standardize_rank(tmin_std, 1)
tmin_dt_ranked = standardize_rank(tmin_dt, 1)

In [None]:
occupation = pd.read_excel(path + "Occupation_Data_OECS/MSA_M2019_dl.xlsx", sheet_name="All May 2019 Data")

In [None]:
all_locs_construction = []
for i in range(len(MSA)):
    o = (occupation[occupation["area"] == MSA[i]])
    locs = o[o['occ_code'].str.startswith("47")]["loc_quotient"]
    locs = np.array([float(locs.iloc[x]) for x in range(len(locs)) if type(locs.iloc[x]) is float])
    all_locs_construction.append(np.nanmean(locs))

all_locs_transportation = []
for i in range(len(MSA)):
    o = (occupation[occupation["area"] == MSA[i]])
    locs = o[o['occ_code'].str.startswith("53")]["loc_quotient"]
    locs = np.array([float(locs.iloc[x]) for x in range(len(locs)) if type(locs.iloc[x]) is float])
    all_locs_transportation.append(np.nanmean(locs))

In [None]:
all_locs_construction_ranked = standardize_rank(all_locs_construction, 1)
all_locs_transportation_ranked = standardize_rank(all_locs_transportation, 1)


In [None]:
nri = pd.read_csv(path + "NRI_Table_Counties.csv")

def nri_select(field):
    res = []
    for i in range(len(match_fips)):
        if(not flag_multi_counties[i]):
            fips = int(match_fips[i])
            cdf = nri[nri["STCOFIPS"] == fips]
            try:
                res.append(float(cdf[field]))
            except:
                res.append(np.nan)
        else:
            fips = match_fips[i]
            temp = []
            for j in range(len(fips)):
                cdf = nri[nri["STCOFIPS"] == fips[j]]
                try:
                    temp.append(float(cdf[field]))
                except:
                    temp.append(np.nan)
            ar = np.array(temp)
            weighted_avg = np.average(ar, weights=np.array(ast.literal_eval(counties_weights[i])))
            res.append(weighted_avg)
    return np.array(res)

winter_weather_risk = nri_select("WNTW_EALS")
icestorm_risk = nri_select("ISTM_EALS")
hail_risk = nri_select("HAIL_EALS")
cold_wave_risk = nri_select("CWAV_EALS")

In [None]:
winter_weather_risk_ranked = standardize_rank(winter_weather_risk, 1)
icestorm_risk_ranked = standardize_rank(icestorm_risk, 1)
hail_risk_ranked = standardize_rank(hail_risk, 1)
cold_wave_risk_ranked = standardize_rank(cold_wave_risk, 1)

In [None]:
vulnerability_raw = vulnerability.copy()
vulnerability_ranked = vulnerability.copy()
vulnerability_aggr = vulnerability.copy()

In [None]:
### EXPOSURE

vulnerability_raw["score_cold_days"] = score_cold_days
vulnerability_raw["score_cold_wave"] = score_cold_wave
vulnerability_raw["score_heavy_snow"] = score_heavy_snow
vulnerability_raw["score_winter_conditions"] = score_winter_conditions

vulnerability_raw["fema_cold_events"] = fema_cold_events
vulnerability_raw["tmin_averaged"] = tmin_averaged
vulnerability_raw["tmin_std"] = tmin_std
vulnerability_raw["tmin_dt"] = tmin_dt


### SENSITIVITY

vulnerability_raw["LOC_construction"] = all_locs_construction
vulnerability_raw["LOC_transportation"] = all_locs_transportation
vulnerability_raw["winter_weather_expected_annual_loss"] = winter_weather_risk
vulnerability_raw["icestorm_expected_annual_loss"] = icestorm_risk
vulnerability_raw["hail_risk_expected_annual_loss"] = hail_risk
vulnerability_raw["cold_wave_expected_annual_loss"] = cold_wave_risk
vulnerability_raw.to_csv("vulnerability_cities_us_cold_kpis_raw.csv")

In [None]:
### EXPOSURE

vulnerability_ranked["cold_days_ranked"] = cold_days_ranked
vulnerability_ranked["cold_wave_ranked"] = cold_wave_ranked
vulnerability_ranked["heavy_snow_ranked"] = heavy_snow_ranked
vulnerability_ranked["winter_conditions_ranked"] = winter_conditions_ranked

vulnerability_ranked["fema_cold_events_ranked"] = fema_cold_events_ranked
vulnerability_ranked["tmin_averaged_ranked"] = tmin_averaged_ranked
vulnerability_ranked["tmin_std_ranked"] = tmin_std_ranked
vulnerability_ranked["tmin_dt_ranked"] = tmin_dt_ranked

### SENSITIVITY

vulnerability_ranked["LOC_construction_ranked"] = all_locs_construction_ranked
vulnerability_ranked["LOC_transportation_ranked"] = all_locs_transportation_ranked
vulnerability_ranked["winter_weather_expected_annual_loss_ranked"] = winter_weather_risk_ranked
vulnerability_ranked["icestorm_expected_annual_loss_ranked"] = icestorm_risk_ranked
vulnerability_ranked["hail_risk_expected_annual_loss_ranked"] = hail_risk_ranked
vulnerability_ranked["cold_wave_expected_annual_loss_ranked"] = cold_wave_risk_ranked
vulnerability_ranked.to_csv("vulnerability_cities_us_cold_kpis_ranked.csv")

In [None]:
def aggr_and_rescale(arr):
    avg_arr = np.nanmean(arr, axis=1)
    rescaled_avg_arr = standardize_rank(avg_arr)
    return rescaled_avg_arr

In [None]:
all_expo_cold_kpis = np.c_[cold_days_ranked, cold_wave_ranked, heavy_snow_ranked, winter_conditions_ranked,
                          fema_cold_events_ranked,
                          tmin_averaged_ranked, tmin_std_ranked, tmin_dt_ranked]

aggr_expo_cold_kpis = aggr_and_rescale(all_expo_cold_kpis)

all_sens_cold_kpis = np.c_[all_locs_construction_ranked, all_locs_transportation_ranked, winter_weather_risk_ranked, 
                           icestorm_risk_ranked, hail_risk_ranked, cold_wave_risk_ranked]

aggr_sens_cold_kpis = aggr_and_rescale(all_sens_cold_kpis)

aggr_cold_kpi = aggr_and_rescale(np.c_[aggr_expo_cold_kpis, aggr_sens_cold_kpis])

In [None]:
vulnerability_aggr = vulnerability.copy()

vulnerability_aggr["aggr_cold_kpis"] = aggr_cold_kpi
vulnerability_aggr["aggr_exposure_cold_kpis"] = aggr_expo_cold_kpis
vulnerability_aggr["aggr_sensitivity_cold_kpis"] = aggr_sens_cold_kpis

vulnerability_aggr.to_csv("vulnerability_cities_us_cold_aggr_kpis.csv")