In [1]:
import sys
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVR
import shap

## Instance 1 - Case Info

In [58]:
df = pd.read_csv("../datasets/TN_processed/TN_case_info.csv")
df.dtypes

date                              object
testing_facilities                 int64
government_testing_facilities      int64
private_testing_facilities         int64
active_cases_yesterday             int64
positive_tested_cases              int64
discharged_patients                int64
deaths_today                       int64
active_cases_today                 int64
Rt_mean                          float64
dtype: object

In [61]:
df = pd.read_csv("../datasets/TN_processed/TN_airport_surveillance.csv")
df.dtypes

date                object
flights_arrived      int64
passengers           int64
positive_cases       int64
Rt_mean            float64
dtype: object

### Check for format and Test-train split

In [62]:
columns = []
if "Rt_mean" in df.columns:
    columns = [col for col in df.columns]
    columns.remove("date")
    columns.remove("Rt_mean")
else:
    raise Exception("Rt_mean not found")
    sys.exit()
X = df[columns]
Y = df["Rt_mean"]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y)

### Regressor Model Fitting

In [63]:
model1 = RandomForestRegressor()
model1.fit(X_train, Y_train)
pred1 = model1.predict(X_test)
errors1 = abs(pred1 - Y_test)
print("With RandomForestRegressor")
print("Mean Absolute Error: ", round(np.mean(errors1), 2), " degrees")
MAPE = 100 * (errors1/Y_test)
accuracy = 100 - np.mean(MAPE)
print("Accuracy: ", round(accuracy, 2))

With RandomForestRegressor
Mean Absolute Error:  0.01  degrees
Accuracy:  98.88


In [64]:
model2 = ExtraTreesRegressor()
model2.fit(X_train, Y_train)
pred2 = model2.predict(X_test)
errors2 = abs(pred2 - Y_test)
print("With ExtraTreesRegressor")
print("Mean Absolute Error: ", round(np.mean(errors2), 2), " degrees")
MAPE = 100 * (errors2/Y_test)
accuracy = 100 - np.mean(MAPE)
print("Accuracy: ", round(accuracy, 2))

With ExtraTreesRegressor
Mean Absolute Error:  0.01  degrees
Accuracy:  99.34


In [65]:
model3 = DecisionTreeRegressor()
model3.fit(X_train, Y_train)
pred3 = model3.predict(X_test)
errors3 = abs(pred3 - Y_test)
print("With DecisionTreeRegressor")
print("Mean Absolute Error: ", round(np.mean(errors3), 2), " degrees")
MAPE = 100 * (errors3/Y_test)
accuracy = 100 - np.mean(MAPE)
print("Accuracy: ", round(accuracy, 2))

With DecisionTreeRegressor
Mean Absolute Error:  0.02  degrees
Accuracy:  98.47


In [32]:
class ModelInstance:
    def __init__(self, csv_file):
        self.csv_file = csv_file
        self.df = pd.read_csv(self.csv_file)
        self.df = self.df.fillna(0)
        print(self.df.dtypes)
        self.columns = []
        self.X_train = []
        self.X_test = []
        self.Y_train = []
        self.Y_test = []
        self.random_forest = dict()
        self.extra_trees = dict()
        self.decision_tree = dict()
        self.train_test_split_data()
        
    def train_test_split_data(self):
        if "Rt_mean" in self.df.columns:
            self.columns = [col for col in self.df.columns]
            self.columns.remove("date")
            self.columns.remove("Rt_mean")
        else:
            raise Exception("Rt_mean not found")
            sys.exit()
        X = self.df[self.columns]
        Y = self.df["Rt_mean"]
        self.X_train, self.X_test, self.Y_train, self.Y_test = train_test_split(X, Y)
        
    def regress_random_forest(self):
        model = RandomForestRegressor()
        model.fit(self.X_train, self.Y_train)
        pred = model.predict(self.X_test)
        errors = abs(pred - self.Y_test)
        print("\n\nWith RandomForestRegressor")
        print("Mean Absolute Error: ", round(np.mean(errors), 2), " degrees")
        MAPE = 100 * (errors/self.Y_test)
        accuracy = 100 - np.mean(MAPE)
        print("Accuracy: ", round(accuracy, 2))
        self.random_forest = {
            "mape": MAPE,
            "accuracy": accuracy
        }
        
    def regress_decision_tree(self):
        model = DecisionTreeRegressor()
        model.fit(self.X_train, self.Y_train)
        pred = model.predict(self.X_test)
        errors = abs(pred - self.Y_test)
        print("\n\nWith RandomForestRegressor")
        print("Mean Absolute Error: ", round(np.mean(errors), 2), " degrees")
        MAPE = 100 * (errors/self.Y_test)
        accuracy = 100 - np.mean(MAPE)
        print("Accuracy: ", round(accuracy, 2))
        self.decision_tree = {
            "mape": MAPE,
            "accuracy": accuracy
        }

    def regress_extra_trees(self):
        model = ExtraTreesRegressor()
        model.fit(self.X_train, self.Y_train)
        pred = model.predict(self.X_test)
        errors = abs(pred - self.Y_test)
        print("\n\nWith RandomForestRegressor")
        print("Mean Absolute Error: ", round(np.mean(errors), 2), " degrees")
        MAPE = 100 * (errors/self.Y_test)
        accuracy = 100 - np.mean(MAPE)
        print("Accuracy: ", round(accuracy, 2))
        self.extra_trees = {
            "mape": MAPE,
            "accuracy": accuracy
        }

In [33]:
model1 = ModelInstance(csv_file="../datasets/TN_processed/TN_airport_surveillance.csv")
model1.regress_random_forest()
model1.regress_decision_tree()
model1.regress_extra_trees()

date                object
flights_arrived      int64
passengers           int64
positive_cases       int64
Rt_mean            float64
dtype: object


With RandomForestRegressor
Mean Absolute Error:  0.01  degrees
Accuracy:  98.9


With RandomForestRegressor
Mean Absolute Error:  0.02  degrees
Accuracy:  98.53


With RandomForestRegressor
Mean Absolute Error:  0.01  degrees
Accuracy:  99.25


In [34]:
model2 = ModelInstance(csv_file="../datasets/TN_processed/TN_comorbidities_deaths.csv")
model2.regress_random_forest()
model2.regress_decision_tree()
model2.regress_extra_trees()

date                                               object
comorbidities_government_dme                      float64
comorbidities_government_dms                      float64
comorbidities_private                             float64
comorbidities_other_government_institutions       float64
comorbidities_total                               float64
no_comorbidities_government_dme                     int64
no_comorbidities_government_dms                     int64
no_comorbidities_private                            int64
no_comorbidities_other_government_institutions    float64
no_comorbidities_total                              int64
Rt_mean                                           float64
dtype: object


With RandomForestRegressor
Mean Absolute Error:  0.18  degrees
Accuracy:  85.58


With RandomForestRegressor
Mean Absolute Error:  0.2  degrees
Accuracy:  84.44


With RandomForestRegressor
Mean Absolute Error:  0.19  degrees
Accuracy:  85.34
