In [9]:
#load packages
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [10]:
#load the dataset we will use
combined = pd.read_csv("/Users/sahana_krishnaraj/Github/siting-optimizer-lmichallange/Datasets/Cleaned Data/final_combined_data.csv")

#subset to get relevant columns
'''
Wildfire Hazard Potential (WHP)
H + VH Pct = Percent of area in the geographic unit mapped as high or very high WHP

Drought Levels (Percentiles)
Nnone = No Drought
DO = Drought Level 0 (Abnormally Dry)
D1 = Drought Level 1 (Moderate Drought)
D2 = Drought Level 2 (Severe Drought)
D3 = Drought Level 3 (Extreme Drought)
'''
subset = combined[["County","State","H + VH Pct","None","D0","D1","D2","D3","D4"]]
subset = subset.copy()

#rename the columns and create a drought risk score
subset = subset.rename(columns={"H + VH Pct": "Wildfire Hazard Potential"})

###SHOULD I NORMALIZE THE SCORES BEFORE COMBINING?###
subset["Drought Risk Score"] = 0.0*subset["None"] + 0.1*subset["D0"] + 0.3*subset["D1"] + 0.4*subset["D2"] + 0.6*subset["D3"] + 0.9*subset["D4"]
subset.head()

Unnamed: 0,County,State,Wildfire Hazard Potential,None,D0,D1,D2,D3,D4,Drought Risk Score
0,Roberts County,TX,84,46.256226,53.743774,45.19566,39.867925,17.161509,0.0,45.177151
1,El Dorado County,CA,81,81.408679,18.591321,11.421698,3.46283,0.816981,0.0,7.160962
2,Hemphill County,TX,79,23.453962,76.546038,51.04566,36.529811,13.685849,0.0,45.791736
3,Richmond County,NC,74,50.528302,49.471698,17.520189,4.396226,0.0,0.0,11.961717
4,Idaho County,ID,73,4.069245,95.930755,68.907358,0.07566,0.0,0.0,30.295547


In [11]:
#assign pre-weights to each environmental factor
'''
Geographic and Terrain Factors
* Wildfire Hazard Potential (WHP) -- 0.5
* Drought Risk Score -- 0.5
'''

#create a dictionary of all the pre-assigned weights 
weights = {
    "Wildfire Hazard Potential": 0.5, 
    "Drought Risk Score": 0.5}

#SHOULD I NORMALIZE THE SCORE OVER HERE?###
#compute the overall Environmental Risk Score
subset["Overall Score"] = (weights["Wildfire Hazard Potential"] * subset["Wildfire Hazard Potential"] +
                           weights["Drought Risk Score"] * subset["Drought Risk Score"])

#minmax normalization of the overall score
scaler = MinMaxScaler()
subset["Overall Score"] = scaler.fit_transform(subset[["Overall Score"]])
subset.head()

Unnamed: 0,County,State,Wildfire Hazard Potential,None,D0,D1,D2,D3,D4,Drought Risk Score,Overall Score
0,Roberts County,TX,84,46.256226,53.743774,45.19566,39.867925,17.161509,0.0,45.177151,0.526275
1,El Dorado County,CA,81,81.408679,18.591321,11.421698,3.46283,0.816981,0.0,7.160962,0.359173
2,Hemphill County,TX,79,23.453962,76.546038,51.04566,36.529811,13.685849,0.0,45.791736,0.508409
3,Richmond County,NC,74,50.528302,49.471698,17.520189,4.396226,0.0,0.0,11.961717,0.350213
4,Idaho County,ID,73,4.069245,95.930755,68.907358,0.07566,0.0,0.0,30.295547,0.420832


In [12]:
#create a function that allows the user to input weights

def environmental_score(wildfire_user, drought_user, rank, high=True):
    #compute a proportion of the weight so it is scalable
    total_weight = wildfire_user + drought_user
    wildfire_weight = wildfire_user / total_weight
    drought_weight = drought_user / total_weight

    #compute the risk score
    subset["User Overall Score"] = (wildfire_weight * subset["Wildfire Hazard Potential"] +
                                   drought_weight * subset["Drought Risk Score"])
    
    #standardize the user overall score
    subset["User Overall Score"] = scaler.fit_transform(subset[["User Overall Score"]])
    
    #rank the counties
    if high: #sort the counties in descending order based on the number of counties requested
        ranked_counties = subset[["County","State", "User Overall Score"]].sort_values(by="User Overall Score", ascending=False).head(rank)
    else: #sort the counties in ascending order based on the number of counties requested
        ranked_counties = subset[["County","State", "User Overall Score"]].sort_values(by="User Overall Score", ascending=True).head(rank)

    return ranked_counties

#test the function
environmental_score(5, 5, 10, high=True)

Unnamed: 0,County,State,User Overall Score
186,Elk County,KS,1.0
460,Chautauqua County,KS,0.936285
1753,Kendall County,TX,0.845337
408,Cowley County,KS,0.814032
1158,Colfax County,NE,0.762225
854,Stanton County,NE,0.754957
1078,Cuming County,NE,0.738414
1194,Comal County,TX,0.718438
96,Greenwood County,KS,0.710786
79,Comanche County,KS,0.709683
