In [59]:
import pandas as pd

In [60]:
data = pd.read_csv('PrimaryLandAndVegetationInventory_ExportTable.csv')

In [61]:
df = pd.DataFrame(data)

In [62]:
df.head()

Unnamed: 0,POLYGON_ID,NSR,PRIME_CLS1,LAND_CLS1,SITE_TYP1,SITE_PCT1,RNG_SITE1,MAP_CODE1,ECO_PH1_ID,ECO_PHASE1,...,SER_SUCC3,DIST3,DIST_PCT3,DIST_YR3,HARV_YR3,ANTH_IMP,IMAGE_YR,UTM_HA,SUBMISSION,FireRisk
0,212,CM,VEG,WET,TF,8,,9C,3071,j1,...,XS,UK,0,0,0,0,0,48.013,2,2
1,213,CM,VEG,NAW,FT,8,,5C,3030,d1,...,,,0,0,0,0,0,35.7348,2,1
2,214,CM,VEG,NAW,FT,8,,5C,3030,d1,...,,,0,0,0,0,0,33.5634,2,1
3,215,CM,VEG,NNW,OS,5,,5D,3106,e4,...,,,0,0,0,0,0,14.4911,2,2
4,216,CM,VEG,NAW,FT,8,,5C,3030,d1,...,,,0,0,0,0,0,76.6711,2,1


In [63]:
df.columns

Index(['POLYGON_ID', 'NSR', 'PRIME_CLS1', 'LAND_CLS1', 'SITE_TYP1',
       'SITE_PCT1', 'RNG_SITE1', 'MAP_CODE1', 'ECO_PH1_ID', 'ECO_PHASE1',
       'DENSITY1', 'CONIF_PCT1', 'LEAD_SP1', 'PNR_SUCC1', 'SER_SUCC1', 'DIST1',
       'DIST_PCT1', 'DIST_YR1', 'HARV_YR1', 'PINE_PCT', 'PRIME_CLS2',
       'LAND_CLS2', 'SITE_TYP2', 'SITE_PCT2', 'RNG_SITE2', 'MAP_CODE2',
       'ECO_PH2_ID', 'ECO_PHASE2', 'DENSITY2', 'CONIF_PCT2', 'LEAD_SP2',
       'PNR_SUCC2', 'SER_SUCC2', 'DIST2', 'DIST_PCT2', 'DIST_YR2', 'HARV_YR2',
       'PRIME_CLS3', 'LAND_CLS3', 'SITE_TYP3', 'SITE_PCT3', 'RNG_SITE3',
       'MAP_CODE3', 'ECO_PH3_ID', 'ECO_PHASE3', 'DENSITY3', 'CONIF_PCT3',
       'LEAD_SP3', 'PNR_SUCC3', 'SER_SUCC3', 'DIST3', 'DIST_PCT3', 'DIST_YR3',
       'HARV_YR3', 'ANTH_IMP', 'IMAGE_YR', 'UTM_HA', 'SUBMISSION', 'FireRisk'],
      dtype='object')

In [64]:
def classify_fire_risk(sp1, sp2, sp3, cp1, cp2, cp3, dist_yr):
    import datetime
    conifers = {'SW', 'SB', 'PL', 'JP', 'PW', 'LW'}
    deciduous = {'AW', 'BW', 'PB', 'PO', 'WI'}
    
    current_year = datetime.datetime.now().year

    # Normalize species
    species = {sp1.upper(), sp2.upper(), sp3.upper()}
    
    # Total conifer percent (handle None as 0)
    conif_total = (cp1 or 0) + (cp2 or 0) + (cp3 or 0)
    
    # Calculate years since disturbance
    if dist_yr and 1900 < dist_yr <= current_year:
        years_since = current_year - dist_yr
    else:
        years_since = None  # No reliable disturbance info

    # If recent disturbance within 10 years, reduce fire risk
    if years_since is not None and years_since <= 10:
        return 1  # Recent disturbance = lower risk

    if species & conifers or conif_total >= 60:
        return 3  # High risk
    elif species <= deciduous and conif_total < 25:
        return 1  # Low risk
    else:
        return 2  # Moderate risk


In [65]:
required_cols = ["LEAD_SP1", "LEAD_SP2", "LEAD_SP3", "CONIF_PCT1", "CONIF_PCT2", "CONIF_PCT3", "DIST_YR1"]

In [66]:
df['FireRisk'] = df.apply(lambda row: classify_fire_risk(*[row[col] for col in required_cols]), axis=1)

In [67]:
df[required_cols + ['FireRisk']].head()

Unnamed: 0,LEAD_SP1,LEAD_SP2,LEAD_SP3,CONIF_PCT1,CONIF_PCT2,CONIF_PCT3,DIST_YR1,FireRisk
0,,,Aw,0,0,0,0,2
1,Aw,Aw,,1,1,0,0,2
2,Aw,Aw,,1,1,0,0,2
3,,,,0,0,0,0,2
4,Aw,Pb,,1,0,0,0,2


In [68]:
df.to_csv('FireRiskResults.csv', index=False)

In [69]:
number_of_polygons_per_fire_risk = df.groupby('FireRisk')['POLYGON_ID'].count()

In [70]:
number_of_polygons_per_fire_risk

FireRisk
1       642
2    159452
3     18993
Name: POLYGON_ID, dtype: int64

In [71]:
number_of_polygons_per_fire_risk.index = ['Low Risk', 'Moderate Risk', 'High Risk']

In [72]:
number_of_polygons_per_fire_risk.to_csv('polygon_count_per_FireRisk.csv')