In [1]:
import pandas as pd
import urllib.request
import datetime as dt
import requests
from bs4 import BeautifulSoup
from datetime import datetime, timedelta
from haversine import haversine, Unit
from shapely.geometry import Point
import geopandas as gpd
from geopandas import GeoDataFrame
import pytz
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, precision_score
import os
import glob

## 1) Baseline and Hard Vote Ensemble Accuracies

In [2]:
path = os.getcwd()
csv_files = glob.glob(os.path.join(path, "*_all_hard_voting_35.csv"))
  
master_df = pd.DataFrame()
    
# loop over the list of csv files
for f in csv_files:
    df = pd.read_csv(f)
    df["file"] = f
    master_df = pd.concat([master_df, df])
    
master_df = master_df.reset_index()
master_df = master_df.drop(columns=["index", "Unnamed: 0"])
master_df

Unnamed: 0,timestamp,image_gt,image_pred,goes16_pred,goes17_pred,final_pred,type,file
0,2020-08-29 17:13:00+00:00,0.0,0.0,0.0,0.0,0.0,test,/home/ezen/Documents/DSE/2020-ezen/DSE260/smok...
1,2020-08-29 17:14:00+00:00,0.0,0.0,0.0,0.0,0.0,test,/home/ezen/Documents/DSE/2020-ezen/DSE260/smok...
2,2020-08-29 17:15:00+00:00,0.0,0.0,0.0,0.0,0.0,test,/home/ezen/Documents/DSE/2020-ezen/DSE260/smok...
3,2020-08-29 17:16:00+00:00,0.0,0.0,0.0,0.0,0.0,test,/home/ezen/Documents/DSE/2020-ezen/DSE260/smok...
4,2020-08-29 17:17:00+00:00,0.0,0.0,0.0,0.0,0.0,test,/home/ezen/Documents/DSE/2020-ezen/DSE260/smok...
...,...,...,...,...,...,...,...,...
8513,2020-08-28 16:16:00+00:00,1.0,1.0,0.0,0.0,0.0,test,/home/ezen/Documents/DSE/2020-ezen/DSE260/smok...
8514,2020-08-28 16:17:00+00:00,1.0,1.0,0.0,0.0,0.0,test,/home/ezen/Documents/DSE/2020-ezen/DSE260/smok...
8515,2020-08-28 16:18:00+00:00,1.0,1.0,0.0,0.0,0.0,test,/home/ezen/Documents/DSE/2020-ezen/DSE260/smok...
8516,2020-08-28 16:19:00+00:00,1.0,1.0,0.0,0.0,0.0,test,/home/ezen/Documents/DSE/2020-ezen/DSE260/smok...


In [3]:
master_df = master_df.rename(columns={"image_pred":"smokeynet_pred", "final_pred":"hard_vote_pred"})
master_df

Unnamed: 0,timestamp,image_gt,smokeynet_pred,goes16_pred,goes17_pred,hard_vote_pred,type,file
0,2020-08-29 17:13:00+00:00,0.0,0.0,0.0,0.0,0.0,test,/home/ezen/Documents/DSE/2020-ezen/DSE260/smok...
1,2020-08-29 17:14:00+00:00,0.0,0.0,0.0,0.0,0.0,test,/home/ezen/Documents/DSE/2020-ezen/DSE260/smok...
2,2020-08-29 17:15:00+00:00,0.0,0.0,0.0,0.0,0.0,test,/home/ezen/Documents/DSE/2020-ezen/DSE260/smok...
3,2020-08-29 17:16:00+00:00,0.0,0.0,0.0,0.0,0.0,test,/home/ezen/Documents/DSE/2020-ezen/DSE260/smok...
4,2020-08-29 17:17:00+00:00,0.0,0.0,0.0,0.0,0.0,test,/home/ezen/Documents/DSE/2020-ezen/DSE260/smok...
...,...,...,...,...,...,...,...,...
8513,2020-08-28 16:16:00+00:00,1.0,1.0,0.0,0.0,0.0,test,/home/ezen/Documents/DSE/2020-ezen/DSE260/smok...
8514,2020-08-28 16:17:00+00:00,1.0,1.0,0.0,0.0,0.0,test,/home/ezen/Documents/DSE/2020-ezen/DSE260/smok...
8515,2020-08-28 16:18:00+00:00,1.0,1.0,0.0,0.0,0.0,test,/home/ezen/Documents/DSE/2020-ezen/DSE260/smok...
8516,2020-08-28 16:19:00+00:00,1.0,1.0,0.0,0.0,0.0,test,/home/ezen/Documents/DSE/2020-ezen/DSE260/smok...


In [4]:
# get only the test data
master_copy_df = master_df[master_df["type"] == "test"].copy()

In [5]:
# Calculate the baseline accuracies of the individual models and the hard vote ensemble
image_labels = master_copy_df[~master_copy_df["image_gt"].isna()]["image_gt"]
smokeynet_preds = master_copy_df[~master_copy_df["image_gt"].isna()]["smokeynet_pred"]
goes16_preds = master_copy_df[~master_copy_df["image_gt"].isna()]["goes16_pred"]
goes17_preds = master_copy_df[~master_copy_df["image_gt"].isna()]["goes17_pred"]
hard_vote_ensemble_preds = master_copy_df[~master_copy_df["image_gt"].isna()]["hard_vote_pred"]

smokeynet_baseline_score = accuracy_score(image_labels, smokeynet_preds)
goes16_baseline_score = accuracy_score(image_labels, goes16_preds)
goes17_baseline_score = accuracy_score(image_labels, goes17_preds)
hard_vote_ensemble_score = accuracy_score(image_labels, hard_vote_ensemble_preds)

print("Baseline score SmokeyNet:", smokeynet_baseline_score)
print("Baseline score GOES-16:", goes16_baseline_score)
print("Baseline score GOES-17:", goes17_baseline_score)
print("Hard Vote Ensemble score:", hard_vote_ensemble_score)

Baseline score SmokeyNet: 0.7857481060606061
Baseline score GOES-16: 0.5165719696969697
Baseline score GOES-17: 0.5717329545454546
Hard Vote Ensemble score: 0.5674715909090909


In [6]:
# smokeynet baseline precision, recall, F1
precision_recall_fscore_support(image_labels, smokeynet_preds, average = 'binary')

(0.8540145985401459, 0.7022160664819944, 0.7707119331137572, None)

In [7]:
# goes16 baseline precision, recall, F1
precision_recall_fscore_support(image_labels, goes16_preds, average = 'binary')

(0.9920634920634921, 0.05771006463527239, 0.10907504363001745, None)

In [8]:
# goes17 baseline precision, recall, F1
precision_recall_fscore_support(image_labels, goes17_preds, average = 'binary')

(0.9863760217983651, 0.16712834718374883, 0.28582708251085664, None)

In [9]:
#hard voting ensemble precision, recall, F1
precision_recall_fscore_support(image_labels, hard_vote_ensemble_preds, average = 'binary')

(0.9970674486803519, 0.1569713758079409, 0.27124052652572794, None)

## 2) Weighted Voting Ensemble Model Accuracies

In [10]:
# weighted average voting

#Voting weights based off of the accuracies of the baseline SmokeyNet, WFABBA GOES-16, & WFABBA GOES-17 models
smokeynet_weight = 0.7857481060606061
goes16_weight = 0.5165719696969697
goes17_weight = 0.5717329545454546

master_copy_df["weighted_avg"]  = \
    ((smokeynet_weight * smokeynet_preds) + (goes16_weight * goes16_preds) + (goes17_weight * goes17_preds)) \
    / (smokeynet_weight + goes16_weight + goes17_weight)

# voting threshold 0.2
master_copy_df["thresh2"] = master_copy_df["weighted_avg"] >= 0.2
master_copy_df["thresh2"] = master_copy_df["thresh2"].astype(int)

# voting threshold 0.3
master_copy_df["thresh3"] = master_copy_df["weighted_avg"] >= 0.3
master_copy_df["thresh3"] = master_copy_df["thresh3"].astype(int)

# voting threshold 0.4
master_copy_df["thresh4"] = master_copy_df["weighted_avg"] >= 0.4
master_copy_df["thresh4"] = master_copy_df["thresh4"].astype(int)

# voting threshold 0.5
master_copy_df["thresh5"] = master_copy_df["weighted_avg"] >= 0.5
master_copy_df["thresh5"] = master_copy_df["thresh5"].astype(int)

# voting threshold 0.6
master_copy_df["thresh6"] = master_copy_df["weighted_avg"] >= 0.6
master_copy_df["thresh6"] = master_copy_df["thresh6"].astype(int)

# voting threshold 0.7
master_copy_df["thresh7"] = master_copy_df["weighted_avg"] >= 0.7
master_copy_df["thresh7"] = master_copy_df["thresh7"].astype(int)

master_copy_df

Unnamed: 0,timestamp,image_gt,smokeynet_pred,goes16_pred,goes17_pred,hard_vote_pred,type,file,weighted_avg,thresh2,thresh3,thresh4,thresh5,thresh6,thresh7
0,2020-08-29 17:13:00+00:00,0.0,0.0,0.0,0.0,0.0,test,/home/ezen/Documents/DSE/2020-ezen/DSE260/smok...,0.000000,0,0,0,0,0,0
1,2020-08-29 17:14:00+00:00,0.0,0.0,0.0,0.0,0.0,test,/home/ezen/Documents/DSE/2020-ezen/DSE260/smok...,0.000000,0,0,0,0,0,0
2,2020-08-29 17:15:00+00:00,0.0,0.0,0.0,0.0,0.0,test,/home/ezen/Documents/DSE/2020-ezen/DSE260/smok...,0.000000,0,0,0,0,0,0
3,2020-08-29 17:16:00+00:00,0.0,0.0,0.0,0.0,0.0,test,/home/ezen/Documents/DSE/2020-ezen/DSE260/smok...,0.000000,0,0,0,0,0,0
4,2020-08-29 17:17:00+00:00,0.0,0.0,0.0,0.0,0.0,test,/home/ezen/Documents/DSE/2020-ezen/DSE260/smok...,0.000000,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8513,2020-08-28 16:16:00+00:00,1.0,1.0,0.0,0.0,0.0,test,/home/ezen/Documents/DSE/2020-ezen/DSE260/smok...,0.419277,1,1,1,0,0,0
8514,2020-08-28 16:17:00+00:00,1.0,1.0,0.0,0.0,0.0,test,/home/ezen/Documents/DSE/2020-ezen/DSE260/smok...,0.419277,1,1,1,0,0,0
8515,2020-08-28 16:18:00+00:00,1.0,1.0,0.0,0.0,0.0,test,/home/ezen/Documents/DSE/2020-ezen/DSE260/smok...,0.419277,1,1,1,0,0,0
8516,2020-08-28 16:19:00+00:00,1.0,1.0,0.0,0.0,0.0,test,/home/ezen/Documents/DSE/2020-ezen/DSE260/smok...,0.419277,1,1,1,0,0,0


### 0.2 voting threshold accuracy, precision, recall, F1

In [11]:
accuracy_score(image_labels, master_copy_df["thresh2"])

0.7964015151515151

In [12]:
precision_recall_fscore_support(image_labels, master_copy_df["thresh2"], average = 'binary')

(0.8556644880174292, 0.7253000923361034, 0.7851074462768616, None)

### 0.3 voting threshold accuracy, precision, recall, F1

In [13]:
accuracy_score(image_labels, master_copy_df["thresh3"])

0.7959280303030303

In [14]:
precision_recall_fscore_support(image_labels, master_copy_df["thresh3"], average = 'binary')

(0.8555070883315158, 0.724376731301939, 0.7845, None)

### 0.4 voting threshold accuracy, precision, recall, F1

In [15]:
accuracy_score(image_labels, master_copy_df["thresh4"])

0.7878787878787878

In [16]:
precision_recall_fscore_support(image_labels, master_copy_df["thresh4"], average = 'binary')

(0.8543526785714286, 0.7068328716528163, 0.773623041940374, None)

### 0.5 voting threshold accuracy, precision, recall, F1

In [17]:
accuracy_score(image_labels, master_copy_df["thresh5"])

0.5674715909090909

In [18]:
precision_recall_fscore_support(image_labels, master_copy_df["thresh5"], average = 'binary')

(0.9970674486803519, 0.1569713758079409, 0.27124052652572794, None)

### 0.6 voting threshold accuracy, precision, recall, F1

In [19]:
accuracy_score(image_labels, master_copy_df["thresh6"])

0.5653409090909091

In [20]:
precision_recall_fscore_support(image_labels, master_copy_df["thresh6"], average = 'binary')

(1.0, 0.1523545706371191, 0.26442307692307687, None)

### 0.7 voting threshold accuracy, precision, recall, F1

In [21]:
accuracy_score(image_labels, master_copy_df["thresh7"])

0.5615530303030303

In [22]:
precision_recall_fscore_support(image_labels, master_copy_df["thresh7"], average = 'binary')

(1.0, 0.14496768236380425, 0.2532258064516129, None)