In [None]:
import sqlite3
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report, f1_score
import datetime
import geopandas as gpd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pickle
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from imblearn.over_sampling import RandomOverSampler

To run this file with test set please load in the block below your data and run the entire notebook, classification report will be printed in the end of the notebook

In [None]:
# conn = sqlite3.connect('FPA_FOD_20170508.sqlite')
# test_df = pd.read_sql_query("SELECT * from Fires", conn) # <- example for loading data

test_df = None  # todo: please load test data here as a dataframe

In [None]:
def to_date(x):
    date = datetime.datetime(x['FIRE_YEAR'], 1, 1) + datetime.timedelta(x['DISCOVERY_DOY'] - 1)
    return date

In [None]:
def july4(x):
    if x['MONTH']==7:
        if 2<=x['DAY'] and x['DAY']<=6:
            return 1
    return 0

In [None]:
def to_cont_date(x):
    if x['CONT_DOY']>0:
        date = datetime.datetime(x['FIRE_YEAR'], 1, 1) + datetime.timedelta(x['CONT_DOY'] - 1)
        if (date-x['DATE']).days<0:
            date = datetime.datetime(x['FIRE_YEAR']+1, 1, 1) + datetime.timedelta(x['CONT_DOY'] - 1)
        return date

    return datetime.datetime(1990, 1, 1)

In [None]:
def is_cont_date(x):
    if x['C_DATE']==datetime.datetime(1990, 1, 1):
        return 0
    return 1

In [None]:
def exists_times(x):
    if not x['DISCOVERY_TIME'] or not x['CONT_TIME']:
        return 0
    return 1

In [None]:
def add_hour(x):
    if x['DISCOVERY_TIME']:
        if int(x['DISCOVERY_TIME'][2:])>30:
            return int(x['DISCOVERY_TIME'][:2])+1
        return int(x['DISCOVERY_TIME'][:2])
    return -1

In [None]:
def add_hour_do_dis_date(x):
    total = x['DATE']
    if x['HOUR']>=0:
        total += datetime.timedelta(hours = int(x['DISCOVERY_TIME'][:2]), minutes=int(x['DISCOVERY_TIME'][2:]))
    return total

In [None]:
def add_cont_hour(x):
    if x['CONT_TIME']:
        if int(x['CONT_TIME'][2:])>30:
            return int(x['CONT_TIME'][:2])+1
        return int(x['CONT_TIME'][:2])
    return -1

In [None]:
def add_hour_to_cont_date(x):
    if x['C_DATE']==datetime.datetime(1990, 1, 1):
        return x['C_DATE']
    total = x['C_DATE']
    if x['C_HOUR']>=0:
        total += datetime.timedelta(hours = int(x['CONT_TIME'][:2]), minutes=int(x['CONT_TIME'][2:]))
    return total 

In [None]:
def duration(x):
    if x['C_DATE']==datetime.datetime(1990, 1, 1):
        return -1
    else:
        if x['EXISTS_TIMES']==1:
            difference = x['C_DATE']-x['DATE']
        else:
            difference = (x['C_DATE']+ datetime.timedelta(hours = 23, minutes=59))-x['DATE']  
        return difference

In [None]:
def duration_days(x):
    if x['DURATION']==-1:
        return -1
    return x['DURATION'].days

In [None]:
def duration_hours(x):
    if x['DURATION']==-1:
        return -1
    return x['DURATION'].days*24+x['DURATION'].seconds//3600

In [None]:
def duration_30m(x):
    if x['DURATION']==-1:
        return 0
    if x['DURATION']== datetime.timedelta(hours = 0, minutes=30):
        return 1
    return 0

In [None]:
def duration_1h(x):
    if x['DURATION']==-1:
        return 0
    if x['DURATION']== datetime.timedelta(hours = 1, minutes=0):
        return 1
    return 0

In [None]:
def duration_same_d(x):
    if x['DURATION_DAYS']==-1:
        return 0
    if x['DURATION_DAYS']==0:
        return 1
    return 0

In [None]:
def get_gdf(file_path):
        gdf = pd.read_csv(file_path)
        return gpd.GeoDataFrame(gdf.loc[:, [c for c in gdf.columns if c != "geometry"]], 
                                geometry=gpd.GeoSeries.from_wkt(gdf["geometry"]))

In [None]:
def get_distance_to_rails_feature(df):
    gdf = get_gdf("datasets/rail_north_america/rails_geo.csv")

    gdf.crs = "EPSG:4326"

    gdf = gdf.to_crs("EPSG:3857")
    df.crs = gdf.crs
    res = gpd.sjoin_nearest(df, gdf, distance_col="distance_to_rails", how="left")
    res = res.drop(["index_right", "scalerank", "featurecla", "sov_a3", "uident", "add", "natrlscale", "continent"], axis=1)
    return res

In [None]:
def get_pop(df):
    gdf = get_gdf("datasets/population_density/data_populations_usa.csv")
    gdf.crs = "EPSG:4326"

    gdf = gdf.to_crs("EPSG:3857")
    
    res = gpd.sjoin_nearest(df, gdf, how="left")
    res = res.drop(["index_right"], axis=1)
    
    return res

In [None]:
def get_distance_feature(df, file_path, feature_name):
    gdf = get_gdf(file_path)

    gdf.crs = "EPSG:4326"

    gdf = gdf.to_crs("EPSG:3857")
    df.crs = gdf.crs

    res = gpd.sjoin_nearest(df, gdf, distance_col=feature_name, how="left")
    res = res.drop(["OBJECTID", "index_right"], axis=1)
    return res

In [None]:
def get_city_and_distance_feature(df, file_path, feature_name):
    gdf = get_gdf(file_path)

    gdf.crs = "EPSG:4326"

    gdf = gdf.to_crs("EPSG:3857")
    df.crs = gdf.crs

    res = gpd.sjoin_nearest(df, gdf, distance_col=feature_name, how="left")
    res = res.drop(["index_right"], axis=1)
    return res

In [None]:
def date_to_check(row):
    day_to_check = row['DISCOVERY_DOY'] - (1 + (row['DISCOVERY_DOY'] - 1) % 3)
    date = datetime.datetime(row['YEAR'], 1, 1) + datetime.timedelta(int(day_to_check))
    
    if date.year < 1992:
        return 1992, 1, 1
    else:
        return date.year, date.month, date.day

def temps_func(df):
    gdf = pd.read_csv("datasets/temps_dfs/temps_area_codes.csv")
    gdf = gpd.GeoDataFrame(gdf.loc[:, [c for c in gdf.columns if c != "geometry"]],
                           geometry=gpd.GeoSeries.from_wkt(gdf["geometry"]))
    
    gdf.crs = "EPSG:4326"

    gdf = gdf.to_crs("EPSG:3857")
    df.crs = gdf.crs

    df = gpd.sjoin_nearest(df, gdf, how="left")
    
    df = df.drop(["index_right"], axis=1)

    df["day_remove"] = df["DAY"]
    df["month_remove"] = df["MONTH"]
    df["year_remove"] = df["YEAR"]

    dates = np.array(list(df.apply(date_to_check, axis=1)))

    df["DAY"] = dates[:, 2]
    df["MONTH"] = dates[:, 1]
    df["YEAR"] = dates[:, 0]

    gdf = pd.read_csv(f"datasets/temps_dfs/temps_area_code_dates.csv")

    gdf["YEAR"] = gdf["YEAR"].astype("int32")
    df["YEAR"] = df["YEAR"].astype("int32")
    gdf["MONTH"] = gdf["MONTH"].astype("int32")
    df["MONTH"] = df["MONTH"].astype("int32")
    gdf["DAY"] = gdf["DAY"].astype("int32")
    df["DAY"] = df["DAY"].astype("int32")

    df = pd.merge(df, gdf, on=["DAY", "MONTH", "YEAR", "area_code"], how="left")

    df["DAY"] = df["day_remove"]
    df["MONTH"] = df["month_remove"]
    df["YEAR"] = df["year_remove"]
    df = df.drop(["area_code", "day_remove", "month_remove", "year_remove"], axis=1)
    df = df.drop_duplicates(subset=["FOD_ID"])
    
    return df

In [None]:
us_state_to_abbrev = {
    "Alabama": "AL",
    "Alaska": "AK",
    "Arizona": "AZ",
    "Arkansas": "AR",
    "California": "CA",
    "Colorado": "CO",
    "Connecticut": "CT",
    "Delaware": "DE",
    "Florida": "FL",
    "Georgia": "GA",
    "Hawaii": "HI",
    "Idaho": "ID",
    "Illinois": "IL",
    "Indiana": "IN",
    "Iowa": "IA",
    "Kansas": "KS",
    "Kentucky": "KY",
    "Louisiana": "LA",
    "Maine": "ME",
    "Maryland": "MD",
    "Massachusetts": "MA",
    "Michigan": "MI",
    "Minnesota": "MN",
    "Mississippi": "MS",
    "Missouri": "MO",
    "Montana": "MT",
    "Nebraska": "NE",
    "Nevada": "NV",
    "New Hampshire": "NH",
    "New Jersey": "NJ",
    "New Mexico": "NM",
    "New York": "NY",
    "North Carolina": "NC",
    "North Dakota": "ND",
    "Ohio": "OH",
    "Oklahoma": "OK",
    "Oregon": "OR",
    "Pennsylvania": "PA",
    "Rhode Island": "RI",
    "South Carolina": "SC",
    "South Dakota": "SD",
    "Tennessee": "TN",
    "Texas": "TX",
    "Utah": "UT",
    "Vermont": "VT",
    "Virginia": "VA",
    "Washington": "WA",
    "West Virginia": "WV",
    "Wisconsin": "WI",
    "Wyoming": "WY",
    "District of Columbia": "DC",
    "American Samoa": "AS",
    "Guam": "GU",
    "Northern Mariana Islands": "MP",
    "Puerto Rico": "PR",
    "Virgin Islands": "PR",
    "United States Minor Outlying Islands": "UM",
    "U.S. Virgin Islands": "VI",
    "United States": "United States"
}

def map_states(y):
    return us_state_to_abbrev.get(y, "____")

# adds data from https://www.kff.org/statedata/
def get_info_by_states(df, address):
    df = df.sort_values(by=["YEAR"])
    df["YEAR"] = pd.to_numeric(df["YEAR"])

    csv_files = ['citenzship',
                 'race',
                 'tobacco',
                 'demographics_with_years']
    
    for table in csv_files:
        curr = pd.read_csv(f"{address}/{table}.csv")
        curr = curr.dropna()
        if table == 'tobacco':
            curr.rename(columns = {'State':'STATE', "Year":"YEAR"}, inplace = True)
        else:
            curr.rename(columns = {'Location':'STATE'}, inplace = True)
            
        curr['STATE'] = curr['STATE'].apply(map_states)
        
        if (table == 'tobacco') or (table == "demographics_with_years"):
            curr["YEAR"] = pd.to_numeric(curr["YEAR"])
            curr = curr.sort_values(by=["YEAR"])
            
            df = pd.merge_asof(df, curr, on="YEAR", by='STATE', direction="nearest")
            
        else:
            df = pd.merge(df, curr, on=['STATE'], how="left")
    
    df = df.drop_duplicates(subset=["FOD_ID"])
    df = df.applymap(lambda x: x.strip('%') if isinstance(x, str) else x)
    return df

In [None]:
def turn_fire_class_to_ordinal(row):
    if row["FIRE_SIZE_CLASS"] == "A":
        return 0
    elif row["FIRE_SIZE_CLASS"] == "B":
        return 1
    elif row["FIRE_SIZE_CLASS"] == "C":
        return 2
    elif row["FIRE_SIZE_CLASS"] == "D":
        return 3
    elif row["FIRE_SIZE_CLASS"] == "E":
        return 4
    elif row["FIRE_SIZE_CLASS"] == "F":
        return 5
    elif row["FIRE_SIZE_CLASS"] == "G":
        return 6
    else:
        return -1

In [None]:
def get_features(df):
    df = df.to_crs("EPSG:3857")
    print("getting pop feature...", end="")
    df = get_pop(df)
    df = df.groupby("FOD_ID").first().reset_index()
    print("Done")
    
    print("getting rails feature...", end="")
    df = get_distance_to_rails_feature(df)
    df = df.groupby("FOD_ID").first().reset_index()
    print("Done")
    
    print("getting powerline feature...", end="")
    df = get_distance_feature(df, "datasets/powerlines/powerlines.csv", "distance_to_powerline")
    df = df.groupby("FOD_ID").first().reset_index()
    print("Done")
    
    print("getting landfill feature...", end="")
    df = get_distance_feature(df, "datasets/landfill_locations/Landfill_Locations.csv", "distance_to_landfill")
    df = df.groupby("FOD_ID").first().reset_index()
    print("Done")
    
    print("getting home_parks_distance feature...", end="")
    df = get_distance_feature(df, "datasets/mobile_home_parks/Mobile_Home_Parks.csv", "home_parks_distance")
    df = df.groupby("FOD_ID").first().reset_index()
    print("Done")
    
    print("getting public school distance feature...", end="")
    df = get_distance_feature(df, "datasets/schools/Public_Schools.csv", "public_school_distance")
    df = df.groupby("FOD_ID").first().reset_index()
    print("Done")
    
    print("getting city and distance feature...", end="")
    df = get_city_and_distance_feature(df, "datasets/cities/CityBoundaries.csv", "city_distance")
    df = df.groupby("FOD_ID").first().reset_index()
    print("Done")
    
    print("getting temperature features...", end="")
    df = temps_func(df)
    print("Done")
    
    print("getting ordinal fire class...", end="")
    df["fire_class_ordinal"] = df.apply(turn_fire_class_to_ordinal, axis=1)
    print("Done")
    
    print("getting usa by state features...", end="")
    df = get_info_by_states(df, "datasets/usa_by_state")
    print("Done")
    
    df["max_temp"].fillna(value=df["max_temp"].mean(), inplace=True)
    df["min_temp"].fillna(value=df["min_temp"].mean(), inplace=True)
    df["prcp"].fillna(value=df["prcp"].mean(), inplace=True)
    df["Pan_evaporation"].fillna(value=df["Pan_evaporation"].mean(), inplace=True)
    df["avg_temp"].fillna(value=df["avg_temp"].mean(), inplace=True)
    
    df = df.fillna(0)
    
    return df

In [None]:
def preprocess(df, encoder):
    print("working on dates...", end="")
    df['DATE']=df.apply(to_date, axis=1)
    df['CONT_DATE'] = df.apply(to_date, axis=1)
    
    df['DATE'] = pd.to_datetime(df['DATE'])
    df['DAY'] = df['DATE'].dt.day
    df['MONTH'] = df['DATE'].dt.month
    df['YEAR'] = df['DATE'].dt.year
    
    df['JULY4']=df.apply(july4, axis=1)
    
    df['C_DATE'] = df.apply(to_cont_date, axis=1)
    df['C_DATE_EXISTS'] = df.apply(is_cont_date, axis=1)
    df['C_DATE'] = pd.to_datetime(df['C_DATE'])
    df['EXISTS_TIMES']=df.apply(exists_times, axis=1)
    df['HOUR']=df.apply(add_hour, axis=1)
    df['DATE']=df.apply(add_hour_do_dis_date, axis=1)
    df['C_HOUR']=df.apply(add_cont_hour, axis=1)
    df['C_DATE']=df.apply(add_hour_to_cont_date, axis=1)
    df['DURATION']=df.apply(duration, axis=1)
    df['DURATION_DAYS']=df.apply(duration_days, axis=1)
    df['DURATION_HOURS']=df.apply(duration_hours, axis=1)
    df['DUR_30M']=df.apply(duration_30m, axis=1)
    df['DUR_1H']=df.apply(duration_1h, axis=1)
    df['DUR_1D']=df.apply(duration_same_d, axis=1)
    print("Done")
    
    df = df.drop(columns=["FPA_ID", "SOURCE_SYSTEM_TYPE", "SOURCE_SYSTEM", "NWCG_REPORTING_AGENCY",
                         "NWCG_REPORTING_UNIT_ID", "NWCG_REPORTING_UNIT_NAME", "SOURCE_REPORTING_UNIT",
                         "SOURCE_REPORTING_UNIT_NAME", "Shape", "OBJECTID", "LOCAL_FIRE_REPORT_ID",
                         "LOCAL_INCIDENT_ID", "FIRE_CODE", "FIRE_NAME", "ICS_209_INCIDENT_NUMBER",
                         "ICS_209_NAME", "MTBS_ID", "MTBS_FIRE_NAME", "COMPLEX_NAME", "FIPS_CODE", "FIPS_NAME",
                         "OWNER_CODE", "DATE", "FIRE_YEAR", "DISCOVERY_DATE", "DISCOVERY_TIME", 
                         "CONT_DATE", "CONT_DOY", "CONT_TIME", "C_DATE", "DURATION"])
    
    df = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.LONGITUDE, df.LATITUDE), crs="EPSG:4326")
    
    df = get_features(df)
    
    df = df.rename(columns={"z": "population density"})
    df = df.drop(columns=["FOD_ID", "geometry"])
    
    print("One hot encoding...", end="")
    COLUMNS_NAME_ONE_HOT = list(encoder.categories_[0]) + list(encoder.categories_[1]) + list(encoder.categories_[2])
    one_hots = pd.DataFrame(encoder.transform(df[["STATE", "OWNER_DESCR", "NAME"]]),
                            index=df.index, columns=COLUMNS_NAME_ONE_HOT)
    print("Done")

    df = pd.concat([df, one_hots], axis=1)

    df = df.drop(columns=["COUNTY", "STATE", "FIRE_SIZE_CLASS", "OWNER_DESCR", "NAME", "Unnamed: 0"])
    
    features = [i for i in df.columns if i not in ['STAT_CAUSE_CODE', 'STAT_CAUSE_DESCR']]
    df[features] = df[features].astype("float")
    
    print("Interaction of smokers and population size...", end="")
    df["Smoke everyday number"] = df["Smoke everyday"] * df["Total"]
    df["Smoke some days number"] = df["Smoke some days"] * df["Total"]
    df["Former smoker number"] = df["Former smoker"] * df["Total"]
    df["Never smoked number"] = df["Never smoked"] * df["Total"]
    print("Done")
    
    features = [i for i in df.columns if i not in ['STAT_CAUSE_CODE', 'STAT_CAUSE_DESCR']]
    
    print("Normalizing data...", end="")
    scaler = StandardScaler()
    df_normalized = pd.DataFrame(scaler.fit_transform(df[features]), columns=[features])
    df[features] = df_normalized
    print("Done")
    
    return df

In [None]:
class ConfusionMatrixMixtureOfExperts:
    def __init__(self, initial_model, initial_model_params, distance_threshold):
        self.initial_model = initial_model(**initial_model_params)
        self.clusters = None
        self.class2expert_classifier = {}
        self.distance_threshold = distance_threshold
        self.label2idx = {}
        self.idx2label = {}
        self.class2cluster = None
        self.cluster2classes = {}
    
    def get_labels(self, y_train):
        for idx, label in enumerate(set(y_train)):
            self.idx2label[idx] = label
            self.label2idx[label] = idx
            
    def choose_clf(self, X, y):
        X_train, X_test, y_train, y_test = train_test_split(X, y)
        best_clf = None
        best_f1 = 0
        # random forest:
        max_depth = [10, 20, 30, 40, 50]
        for depth in max_depth:
            rf = RandomForestClassifier(max_depth=depth, n_jobs=-1)
            rf.fit(X_train, y_train)
            f1 = f1_score(y_test, rf.predict(X_test), average='weighted')
            if f1 > best_f1:
                best_clf = rf
                best_f1 = f1
        return best_clf
    
    def fit(self, X, y):
        self.get_labels(y)
        y = np.array(list(map(self.label2idx.get, y)))
        self.initial_model.fit(X, y)
        y_pred = self.initial_model.predict(X)
        cm = confusion_matrix(y_pred, y)
        dists = 1 - cm
        cm = (cm + cm.transpose()) / 2
        self.class2cluster = AgglomerativeClustering(n_clusters=None, linkage='average', distance_threshold=self.distance_threshold,
                                                     affinity='precomputed').fit_predict(dists)
        # self.class2cluster[i] is cluster of class i
        for class_, cluster in enumerate(self.class2cluster):
            classes = self.cluster2classes.get(cluster, [])
            self.cluster2classes[cluster] = classes + [class_]
        # self.cluster2classes[c] is classes belonging to cluster c
        print(self.cluster2classes)
        print(f"found {len(set(self.class2cluster))} cluster")
        for cluster, classes in self.cluster2classes.items():
            if len(classes) > 1:
                # selecting examples with class value in clsuter
                X_cluster = X[[y_ in classes for y_ in y]]  
                y_cluster = y[[y_ in classes for y_ in y]]
                # training an expert for classes
                expert = self.choose_clf(X_cluster, y_cluster)
                print(f"fitting expert for cluster {cluster}")
                expert.fit(X_cluster, y_cluster)
                for y_ in classes:
                    self.class2expert_classifier[y_] = expert
            else:
                # the cluster contains a single class, no need for expert
                y_ = classes[0]
                self.class2expert_classifier[y_] = None
                
        return self

    def predict(self, X):
        y_pred_initial = self.initial_model.predict(X)
        y_pred_final = y_pred_initial.copy()
        
        for i, y_init in enumerate(y_pred_initial):
            # getting expert for predicted class, and predicting with expert
            expert = self.class2expert_classifier[y_init]
            if expert is not None:
                y = expert.predict([X[i]])[0]
                y_pred_final[i] = y
                
        y_pred_final = np.array(list(map(self.idx2label.get, y_pred_final)))
                
        return y_pred_final

In [None]:
def test_model_on_test_set(test_df):
    print("Preprocessing test df.")
    encoder = pickle.load(open("encoder.p", "rb" ))
    test_df = preprocess(test_df, encoder)
    
    X_test = test_df.drop(columns=['STAT_CAUSE_CODE', 'STAT_CAUSE_DESCR'])
    y_test = test_df['STAT_CAUSE_DESCR']
    
    X_test = np.array(X_test)
    y_test = np.array(y_test)
    
    
    print("Training our chosen models:")
    df = pd.read_csv('datasets/preprocessed_final.csv')
    
    X = df.drop(columns=['STAT_CAUSE_CODE', 'STAT_CAUSE_DESCR'])
    y = df['STAT_CAUSE_DESCR']
    
    print("Using random over sampler...", end="")
    ros = RandomOverSampler(random_state=0)
    X, y = ros.fit_resample(X, y)
    print("Done.")
    
    print("Training Random forest:")
    rf = RandomForestClassifier(random_state= 666, n_estimators= 400, min_samples_split= 2, min_samples_leaf= 2,
                                max_features= 'sqrt', max_depth= 55, bootstrap= True, n_jobs=-1)
    
    rf.fit(X,y)
    print("Predicting labels of test set using random forest...", end="")
    preds_rf = rf.predict(X_test)
    print("Done.")
    
    print(classification_report(y_test, preds_rf))
    
    print("Training Confusion Matrix Mixture of Experts:")
    model = ConfusionMatrixMixtureOfExperts(RandomForestClassifier, {"n_jobs":-1}, distance_threshold=0.9)
    model.fit(X, y)
    
    print("Predicting labels of test set...", end="")
    preds_conf = model.predict(X_test)
    print("Done.")
    
    print(classification_report(y_test, preds_conf))
    
    return X_test, y_test, preds_conf, preds_rf

The block below runs the model on the test_df provided above, it returns the preprocessed X and y from the test df and the predictions returned by the model.
The Confusion Matrix Mixture of Experts might take awhile to finish predicting, which is why we first present results acquired through RandomForestClassifier using parameters found using random search cv.

In [None]:
X_test, y_test, preds_conf, preds_rf = test_model_on_test_set(test_df)

Preprocessing test df.
working on dates...Done
getting pop feature...Done
getting rails feature...Done
getting powerline feature...Done
getting landfill feature...Done
getting home_parks_distance feature...Done
getting public school distance feature...Done
getting city and distance feature...Done
getting temperature features...Done
getting ordinal fire class...Done
getting usa by state features...Done
One hot encoding...Done
Interaction of smokers and population size...Done
Normalizing data...Done
Training our chosen models:
Using random over sampler...Done.
Training Random forest:
Predicting labels of test set using random forest...Done.
                   precision    recall  f1-score   support

Missing/Undefined       1.00      1.00      1.00         1

         accuracy                           1.00         1
        macro avg       1.00      1.00      1.00         1
     weighted avg       1.00      1.00      1.00         1

Training Confusion Matrix Mixture of Experts:
{1: [0], 