In [4]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import classification_report, confusion_matrix, multilabel_confusion_matrix
from sklearn.metrics import mean_squared_error, accuracy_score, precision_score, recall_score

In [5]:
filename = "Updated Fire Data - First 10k.csv"
dataset = pd.read_csv(filename)

targetFeatures = ["FIRE_SIZE_CLASS"]
inputFeatures = ["FIRE_YEAR", "DISCOVERY_DOY", "LATITUDE", "LONGITUDE", "DURATION_HOURS", "Precipitation_In_Month", "Avg_Temp_In_Month"]

# Drop NaN values from columns in targetFeatures and inputFeatures
dataset.dropna(subset=targetFeatures + inputFeatures, inplace=True)

X = dataset[inputFeatures]
y = dataset[targetFeatures]

# normalize data
scaler = MinMaxScaler(feature_range=(0, 1))
X_rescaled = scaler.fit_transform(X)
X = pd.DataFrame(data = X_rescaled, columns = X.columns)

categories = [['A', 'B', 'C', 'D', 'E', 'F', 'G']]
encoder = OneHotEncoder(categories=categories, sparse=False)
y = encoder.fit_transform(y.values.reshape(-1,1))

print("Pre-processed data :")
print(X)

print("Pre-processed class :")
print(y)

KeyError: ['Latitude', 'Longitude']

In [None]:
data_train, data_test, class_train, class_test = train_test_split(X, y, test_size=0.3)

mlp = MLPClassifier(solver = 'sgd', random_state = 42, activation = 'logistic', learning_rate_init = 0.2, batch_size = 10, hidden_layer_sizes = (10, 10), max_iter = 500)

mlp.fit(data_train, class_train)

pred = mlp.predict(data_test)
pred

In [None]:
print("Accuracy : ", accuracy_score(class_test, pred))
print("Mean Square Error : ", mean_squared_error(class_test, pred))

print("Confusion Matrix for each label : ")
#print(multilabel_confusion_matrix(class_test, pred))

classNum = 0
for matrix in multilabel_confusion_matrix(class_test, pred):
  print(f"Class: {categories[0][classNum]}")
  classNum += 1

  tn, fp, fn, tp = matrix.ravel()
  print(f"         Actual Positive | Actual Negative")
  print(f"-------------------|-----|----------------")
  print(f"Predicted Positive | {str(tp).rjust(3)} | {str(fp).rjust(3)}")
  print(f"-------------------|-----|----------------")
  print(f"Predicted Negative | {str(fn).rjust(3)} | {str(tn).rjust(3)}")
  print(f"TP: {tp}, FP: {fp}, FN: {fn}, TN: {tn}\n")

print("Classification Report : ")
print(classification_report(class_test, pred))

# Original Model

In [168]:
filename = "Updated Fire Data Sample - First 100 Only - Sheet1.csv"
dataset = pd.read_csv(filename)

targetFeatures = ["FIRE_SIZE_CLASS"]
inputFeatures = ["FIRE_YEAR", "DISCOVERY_DOY", "DISCOVERY_TIME", "Precipitation_In_Month", "Avg_Temp_In_Month"]

# Drop NaN values from columns in targetFeatures and inputFeatures
dataset.dropna(subset=targetFeatures + inputFeatures, inplace=True)

X = dataset[inputFeatures]
y = dataset[targetFeatures]

# normalize data
scaler = MinMaxScaler(feature_range=(0, 1))
X_rescaled = scaler.fit_transform(X)
X = pd.DataFrame(data = X_rescaled, columns = X.columns)

categories = [['A', 'B', 'C', 'D', 'G']]
encoder = OneHotEncoder(categories=categories, sparse=False)
y = encoder.fit_transform(y.values.reshape(-1,1))

print("Pre-processed data :")
print(X)

print("Pre-processed class :")
print(y)

Pre-processed data :
    FIRE_YEAR  DISCOVERY_DOY  DISCOVERY_TIME  Precipitation_In_Month  \
0         1.0       0.095975        0.588101                0.666055   
1         0.0       0.405573        0.379863                0.003670   
2         0.0       0.464396        0.872311                0.003670   
3         0.0       0.551084        0.725400                0.000000   
4         0.0       0.551084        0.725400                0.000000   
..        ...            ...             ...                     ...   
95        1.0       0.383901        0.517162                0.111927   
96        1.0       0.578947        0.000000                0.042202   
97        1.0       0.495356        0.863158                0.168807   
98        1.0       0.529412        0.826087                0.168807   
99        1.0       0.560372        0.588101                0.216514   

    Avg_Temp_In_Month  
0            0.416961  
1            0.674912  
2            0.674912  
3            0.791



In [169]:
data_train, data_test, class_train, class_test = train_test_split(X, y, test_size=0.3)

mlp = MLPClassifier(solver = 'sgd', random_state = 42, activation = 'logistic', learning_rate_init = 0.2, batch_size = 10, hidden_layer_sizes = (10, 10), max_iter = 500)

mlp.fit(data_train, class_train)

pred = mlp.predict(data_test)
pred

array([[0, 0, 1, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 1, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0],
       [1, 0, 0, 0, 0]])

In [170]:
print("Accuracy : ", accuracy_score(class_test, pred))
print("Mean Square Error : ", mean_squared_error(class_test, pred))

print("Confusion Matrix for each label : ")
#print(multilabel_confusion_matrix(class_test, pred))

classNum = 0
for matrix in multilabel_confusion_matrix(class_test, pred):
  print(f"Class: {categories[0][classNum]}")
  classNum += 1

  tn, fp, fn, tp = matrix.ravel()
  print(f"         Actual Positive | Actual Negative")
  print(f"-------------------|-----|----------------")
  print(f"Predicted Positive | {str(tp).rjust(3)} | {str(fp).rjust(3)}")
  print(f"-------------------|-----|----------------")
  print(f"Predicted Negative | {str(fn).rjust(3)} | {str(tn).rjust(3)}")
  print(f"TP: {tp}, FP: {fp}, FN: {fn}, TN: {tn}\n")

print("Classification Report : ")
print(classification_report(class_test, pred))

Accuracy :  0.7
Mean Square Error :  0.11333333333333333
Confusion Matrix for each label : 
Class: A
         Actual Positive | Actual Negative
-------------------|-----|----------------
Predicted Positive |  22 |   7
-------------------|-----|----------------
Predicted Negative |   1 |   0
TP: 22, FP: 7, FN: 1, TN: 0

Class: B
         Actual Positive | Actual Negative
-------------------|-----|----------------
Predicted Positive |   0 |   1
-------------------|-----|----------------
Predicted Negative |   6 |  23
TP: 0, FP: 1, FN: 6, TN: 23

Class: C
         Actual Positive | Actual Negative
-------------------|-----|----------------
Predicted Positive |   0 |   1
-------------------|-----|----------------
Predicted Negative |   1 |  28
TP: 0, FP: 1, FN: 1, TN: 28

Class: D
         Actual Positive | Actual Negative
-------------------|-----|----------------
Predicted Positive |   0 |   0
-------------------|-----|----------------
Predicted Negative |   0 |  30
TP: 0, FP: 0, FN: 0, 

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
