In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data.sampler import SubsetRandomSampler, SequentialSampler
import warnings
warnings.simplefilter(action='ignore', category=UserWarning)
from sklearn.metrics import f1_score, accuracy_score


In [2]:
import os 
import glob
import pandas as pd
from torch.utils.data import Dataset
import numpy as np
import torch
from glob import glob

class TimeSeriesDataset(Dataset):
    def __init__(self, folder_nolayoff, folder_layoff, src_col):
        
        # csvs_layoffs = glob.glob(folder_layoff + "/*.csv")
        # csvs_nolayoffs = glob.glob(folder_nolayoff + "/*.csv")
        X_train = []
        Y_train = []
        all_csv_files = []
        EXT = "*.csv"  # Define the variable EXT
        for path, subdir, files in os.walk(folder_layoff):
            
            for file in glob(os.path.join(path, EXT)):
                # print(file)
                df = pd.read_csv(file, index_col=0, parse_dates=True).sort_index()
                ## If length of dataset is over 90, then cut it to 90
                if len(df) > 90:
                    print("OVER LENGTH", file)
                    df = df[-90:]
                ## If length of dataset is less than 90, then skip it
                if len(df) < 90:
                    continue
                X_train.append(df[src_col].values)
                Y_train.append(float(1))
                # all_csv_files.append(file)

        for path, subdir, files in os.walk(folder_nolayoff):
            for file in glob(os.path.join(path, EXT)):
                df = pd.read_csv(file, index_col=0, parse_dates=True).sort_index()
                ## If length of dataset is over 90, then cut it to 90
                if len(df) > 90:
                    print("OVER LENGTH", file)
                    df = df[-90:]

                ## If length of dataset is less than 90, then skip it
                if len(df) < 90:
                    continue
                X_train.append(df[src_col].values)
                Y_train.append(float(0))
        X_train = torch.from_numpy(np.array(X_train))
        Y_train = torch.from_numpy(np.array(Y_train))
        self.X_train = X_train
        self.Y_train = Y_train
        self.num_channels = X_train.shape[1]
        self.len = X_train.shape[0]
        
    def __len__(self):
        return self.len
    
    def __getitem__(self, idx):
        return self.X_train[idx].float(), self.Y_train[idx]



In [3]:
class TimeSeriesDatasetEval(Dataset):
    def __init__(self, folder, src_col):
        
        # csvs_layoffs = glob.glob(folder_layoff + "/*.csv")
        # csvs_nolayoffs = glob.glob(folder_nolayoff + "/*.csv")
        X_train = []
        Y_train = []
        all_csv_files = []
        EXT = "*.csv"  # Define the variable EXT
        for path, subdir, files in os.walk(folder):
            
            for file in glob(os.path.join(path, EXT)):
                # print(file)
                df = pd.read_csv(file, index_col=0, parse_dates=True).sort_index()
                ## If length of dataset is over 90, then cut it to 90
                if len(df) > 90:
                    print("OVER LENGTH", file)
                    df = df[-90:]
                ## If length of dataset is less than 90, then skip it
                if len(df) < 90:
                    continue
                X_train.append(df[src_col].values)
                if "layoff" in folder:
                    Y_train.append(float(1))
                else:
                    Y_train.append(float(0))
                # all_csv_files.append(file)

        X_train = torch.from_numpy(np.array(X_train))
        Y_train = torch.from_numpy(np.array(Y_train))
        self.X_train = X_train
        self.Y_train = Y_train
        self.num_channels = X_train.shape[1]
        self.len = X_train.shape[0]
        
    def __len__(self):
        return self.len
    
    def __getitem__(self, idx):
        return self.X_train[idx].float(), self.Y_train[idx]

## Eval Datasets...

In [4]:
dataloader_real_layoffs_minmax = TimeSeriesDatasetEval(folder = "chronos_stocks_layoffs/real", src_col = "Scaled_Price_MinMax")
dataloader_real_layoffs_oipa = TimeSeriesDatasetEval(folder = "chronos_stocks_layoffs/real", src_col = "open_inproportion_to_average")
dataloader_real_layoffs_on = TimeSeriesDatasetEval(folder = "chronos_stocks_layoffs/real", src_col = "open_normalized")
dataloader_real_nolayoffs_on =  TimeSeriesDatasetEval(folder = "chronos_stocks_nolayoffs/real", src_col = "open_normalized")
dataloader_real_nolayoffs_minmax = TimeSeriesDatasetEval(folder = "chronos_stocks_nolayoffs/real", src_col = "Scaled_Price_MinMax")
dataloader_real_nolayoffs_oipa = TimeSeriesDatasetEval(folder = "chronos_stocks_nolayoffs/real", src_col = "open_inproportion_to_average")
dataloader_chronos_layoffs_minmax = TimeSeriesDatasetEval(folder = "chronos_stocks_layoffs/chronos", src_col = "Scaled_Price_MinMax")
dataloader_chronos_layoffs_oipa = TimeSeriesDatasetEval(folder = "chronos_stocks_layoffs/chronos", src_col = "median_inproportion_to_average")
dataloader_chronos_layoffs_on = TimeSeriesDatasetEval(folder = "chronos_stocks_layoffs/chronos", src_col = "median_normalized")
dataloader_chronos_nolayoffs_minmax = TimeSeriesDatasetEval(folder = "chronos_stocks_nolayoffs/chronos", src_col = "Scaled_Price_MinMax")
dataloader_chronos_nolayoffs_oipa = TimeSeriesDatasetEval(folder = "chronos_stocks_nolayoffs/chronos", src_col = "median_inproportion_to_average")
dataloader_chronos_nolayoffs_on = TimeSeriesDatasetEval(folder = "chronos_stocks_nolayoffs/chronos", src_col = "median_normalized")



In [5]:
dataloader_chronos_layoffs_minmax = torch.utils.data.DataLoader(dataloader_chronos_layoffs_minmax, batch_size=1, shuffle=False)
dataloader_chronos_layoffs_oipa = torch.utils.data.DataLoader(dataloader_chronos_layoffs_oipa, batch_size=1, shuffle=False)
dataloader_chronos_layoffs_on = torch.utils.data.DataLoader(dataloader_chronos_layoffs_on, batch_size=1, shuffle=False)
dataloader_chronos_nolayoffs_minmax = torch.utils.data.DataLoader(dataloader_chronos_nolayoffs_minmax, batch_size=1, shuffle=False)
dataloader_chronos_nolayoffs_oipa = torch.utils.data.DataLoader(dataloader_chronos_nolayoffs_oipa, batch_size=1, shuffle=False)
dataloader_chronos_nolayoffs_on = torch.utils.data.DataLoader(dataloader_chronos_nolayoffs_on, batch_size=1, shuffle=False)
dataloader_real_layoffs_minmax = torch.utils.data.DataLoader(dataloader_real_layoffs_minmax, batch_size=1, shuffle=False)
dataloader_real_layoffs_oipa = torch.utils.data.DataLoader(dataloader_real_layoffs_oipa, batch_size=1, shuffle=False)
dataloader_real_layoffs_on = torch.utils.data.DataLoader(dataloader_real_layoffs_on, batch_size=1, shuffle=False)
dataloader_real_nolayoffs_on = torch.utils.data.DataLoader(dataloader_real_nolayoffs_on, batch_size=1, shuffle=False)
dataloader_real_nolayoffs_minmax = torch.utils.data.DataLoader(dataloader_real_nolayoffs_minmax, batch_size=1, shuffle=False)
dataloader_real_nolayoffs_oipa = torch.utils.data.DataLoader(dataloader_real_nolayoffs_oipa, batch_size=1, shuffle=False)


In [6]:
print(dataloader_real_nolayoffs_minmax.dataset.X_train.shape)

torch.Size([228, 90])


In [7]:
print(dataloader_chronos_nolayoffs_minmax.dataset.X_train.shape)

torch.Size([228, 90])


In [8]:
dataloader_chronos_layoffs_minmax.dataset.X_train.shape

torch.Size([260, 90])

In [9]:
dataloader_real_layoffs_minmax.dataset.X_train.shape

torch.Size([260, 90])

In [10]:
df = pd.read_csv("stocks_layoffs/Energy/ENPH0.csv")

In [11]:
rolling_min = list(df['Open'].rolling(window=90).min())[-1]
rolling_max = list(df['Open'].rolling(window=90).max())[-1]

# Calculate the scaled prices
df['Scaled_Price_MinMax'] = (df["Open"] - rolling_min) / (rolling_max - rolling_min)

In [12]:
# avg = list(df['Open'].rolling(window=90).mean())[-1]
# df['Normalized_Price_minmax'] = df['Open'] / avg

In [13]:
# df["Normalized_Price_minmax"]

In [14]:
# dataset_minmax = TimeSeriesDataset("stocks_no_layoffs", "stocks_layoffs", "Scaled_Price_MinMax")
# dataset_oipa = TimeSeriesDataset("stocks_no_layoffs", "stocks_layoffs", "open_inproportion_to_average")
# dataset_on = TimeSeriesDataset("stocks_no_layoffs", "stocks_layoffs", "open_normalized")
# batch_size = 1
# validation_split = .2
# shuffle_dataset = True
# random_seed= 42

# # Creating data indices for training and validation splits:
# dataset_size = len(dataset_minmax)
# indices = list(range(dataset_size))
# split = int(np.floor(validation_split * dataset_size))
# if shuffle_dataset :
#     np.random.seed(random_seed)
#     np.random.shuffle(indices)
# train_indices, val_indices = indices[split:], indices[:split]

# # Creating PT data samplers and loaders:
# train_sampler = SubsetRandomSampler(train_indices)
# valid_sampler = SequentialSampler(val_indices)

# train_loader_minmax = torch.utils.data.DataLoader(dataset_minmax, batch_size=batch_size, 
#                                            sampler=train_sampler)
# train_loader_oipa = torch.utils.data.DataLoader(dataset_oipa, batch_size=batch_size, 
#                                            sampler=train_sampler)
# train_loader_on = torch.utils.data.DataLoader(dataset_on, batch_size=batch_size, 
#                                            sampler=train_sampler)

# validation_loader_minmax = torch.utils.data.DataLoader(dataset_minmax, batch_size=batch_size, sampler=valid_sampler)
# validation_loader_oipa = torch.utils.data.DataLoader(dataset_oipa, batch_size=batch_size, sampler=valid_sampler)
# validation_loader_on = torch.utils.data.DataLoader(dataset_on, batch_size=batch_size,   sampler=valid_sampler)

In [15]:
from torch.utils.data import DataLoader

In [16]:
# train_loader_minmax.dataset.Y_train

In [17]:
# Define the model architecture
class BinaryClassifier(nn.Module):
    def __init__(self):
        super(BinaryClassifier, self).__init__()
        self.fc1 = nn.Linear(90, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.sigmoid(self.fc3(x))
        return x

# Create an instance of the model
model_minmax = BinaryClassifier()
model_open_inproportion_to_average = BinaryClassifier()
model_open_normalized = BinaryClassifier()
#Load in the saved models 
model_minmax.load_state_dict(torch.load("classifier_models/minmax_model.pth"))
model_open_inproportion_to_average.load_state_dict(torch.load("classifier_models/open_inproportion_to_average_model.pth"))
model_open_normalized.load_state_dict(torch.load("classifier_models/open_normalized_model.pth"))





<All keys matched successfully>

In [18]:
# Single model eval
# is_correct = []
# for i, (X_train, y_train) in enumerate(validation_loader_minmax):
    

#         # print(X_train)
#         # print(y_train)
#         # Forward pass
#     outputs = model(X_train)
#     out = outputs.squeeze(1)
#     output_bin = (out >= 0.5).int().item()
#     correct = output_bin == int(0.0)
#     is_correct.append(correct)
# # Count ratio of trues and falses in is_correct
# true_count = sum(is_correct)
# false_count = len(is_correct) - true_count
# print(f"True count: {true_count}")
# print(f"False count: {false_count}")
# print(f"True ratio: {true_count / len(is_correct)}")


## Eval on nolayoff data...

In [38]:
# Majority Voting Ensemble Eval
is_correct_confidence_chronos = []
f1_score_confidence_chronos = []
is_correct_vote_chronos = []
is_correct_minmax_chronos = []
is_correct_oipa_chronos = []
is_correct_on_chronos = []

is_correct_confidence_real = []

is_correct_vote_real = []
is_correct_minmax_real = []
is_correct_oipa_real = []
is_correct_on_real = []
is_matching_vote = []
is_matching_confidence = []
is_matching_minmax = []
is_matching_oipa = []
is_matching_on = []

# is_correct_vote = []
# is_correct_minmax = []
# is_correct_oipa = []
# is_correct_on = []

for i, (X_train, y_train) in enumerate(dataloader_chronos_nolayoffs_minmax):
        # print(X_train)
        # print(y_train)
        # Forward pass
    
    X_train_minmax_chronos = dataloader_chronos_nolayoffs_minmax.dataset.X_train[i].unsqueeze(0).float()
    X_train_oipa_chronos = dataloader_chronos_nolayoffs_oipa.dataset.X_train[i].unsqueeze(0).float()
    X_train_on_chronos = dataloader_chronos_nolayoffs_on.dataset.X_train[i].unsqueeze(0).float()
    X_train_minmax_real = dataloader_real_nolayoffs_minmax.dataset.X_train[i].unsqueeze(0).float()
    X_train_oipa_real = dataloader_real_nolayoffs_oipa.dataset.X_train[i].unsqueeze(0).float()
    X_train_on_real = dataloader_real_nolayoffs_on.dataset.X_train[i].unsqueeze(0).float()



    
    # print(outputs_minmax)
    outputs_minmax_chronos = model_minmax(X_train_minmax_chronos).squeeze(1)
    outputs_oipa_chronos = model_open_inproportion_to_average(X_train_oipa_chronos).squeeze(1)
    outputs_on_chronos = model_open_normalized(X_train_on_chronos).squeeze(1)
    outputs_minmax_real = model_minmax(X_train_minmax_real).squeeze(1)
    outputs_oipa_real = model_open_inproportion_to_average(X_train_oipa_real).squeeze(1)
    outputs_on_real = model_open_normalized(X_train_on_real).squeeze(1)

    

    ## Ensemble confidence based on average of outputs
    confidence_avg_out_chronos = ( outputs_minmax_chronos+ outputs_oipa_chronos) / 2
    confidence_output_bin_chronos = (confidence_avg_out_chronos >= 0.5).int().item()
    confidence_avg_out_real = (outputs_minmax_real + outputs_oipa_real) / 2
    confidence_output_bin_real = (confidence_avg_out_real >= 0.5).int().item()
    
    ## Ensemble voting based on majority voting
    vote_minmax_chronos = (outputs_minmax_chronos >= 0.5).int().item()
    is_correct_minmax_chronos.append(vote_minmax_chronos == int(0.0))
    vote_oipa_chronos = (outputs_oipa_chronos >= 0.5).int().item()
    is_correct_oipa_chronos.append(vote_oipa_chronos == int(0.0))
    vote_on_chronos = (outputs_on_chronos >= 0.5).int().item()
    is_correct_on_chronos.append(vote_on_chronos == int(0.0))
    
    vote_minmax_real = (outputs_minmax_real >= 0.5).int().item()
    is_correct_minmax_real.append(vote_minmax_real == int(0.0))
    vote_oipa_real = (outputs_oipa_real >= 0.5).int().item()
    is_correct_oipa_real.append(vote_oipa_real == int(0.0))
    vote_on_real = (outputs_on_real >= 0.5).int().item()
    is_correct_on_real.append(vote_on_real == int(0.0))

    ## Ensemble voting based on majority voting
    vote_chronos = (vote_minmax_chronos+vote_oipa_chronos + vote_on_chronos) >= 2
    correct_confidence = confidence_output_bin_chronos == int(0.0)
    correct_vote = vote_chronos == int(0.0)
    is_correct_confidence_chronos.append(correct_confidence)
    is_correct_vote_chronos.append(correct_vote)

    vote_real = (vote_minmax_real + vote_oipa_real + vote_on_real) >= 2
    correct_confidence_real = confidence_output_bin_real == int(0.0)
    correct_vote_real = vote_real == int(0.0)
    is_correct_confidence_real.append(correct_confidence_real)
    is_correct_vote_real.append(correct_vote_real)

    is_matching_vote.append(vote_chronos == vote_real)
    is_matching_confidence.append(confidence_output_bin_chronos == confidence_output_bin_real)
    is_matching_minmax.append(vote_minmax_chronos == vote_minmax_real)
    is_matching_oipa.append(vote_oipa_chronos == vote_oipa_real)
    is_matching_on.append(vote_on_chronos == vote_on_real)

    # out = outputs.squeeze(1)
    # output_bin = (out >= 0.5).int().item()
    # correct = output_bin == int(y_train.item())
    # is_correct.append(correct)
# Count ratio of trues and falses in is_correct
with open("outputs_nolayoffs_5.txt", "w") as f:
    chronos_f1_prepro_nolayoff = [0 if i == False else 1 for i in is_correct_confidence_chronos]
    chronos_f1_true_nolayoff = [0 for i in is_correct_confidence_chronos]
    # f1 = f1_score(chronos_f1_prepro, chronos_f1_true, pos_label=0, average="binary")
    # print(f"F1 Score (Confidence) (Chronos): {f1}")
    # f.write(f"F1 Score (Confidence) (Chronos): {f1}\n")
    real_f1_prepro_nolayoffs = [0 if i == False else 1 for i in is_correct_confidence_real]
    real_f1_true_nolayoffs = [0 for i in is_correct_confidence_real]
    # f1 = f1_score(real_f1_prepro, real_f1_true, pos_label=0, average="binary")
    # print(f"F1 Score (Confidence) (Real): {f1}")
    # f.write(f"F1 Score (Confidence) (Real): {f1}\n")
    true_count_confidence_chronos = sum(is_correct_confidence_chronos)
    false_count_confidence_chronos = len(is_correct_confidence_chronos) - true_count_confidence_chronos
    # print(f"True count (Confidence) (Chronos): {true_count_confidence_chronos}")
    # print(f"False count (Confidence) (Chronos): {false_count_confidence_chronos}")
    print(f"True ratio (Confidence) (Chronos): {true_count_confidence_chronos / len(is_correct_confidence_chronos)}")
    f.write(f"True ratio (Confidence) (Chronos): {true_count_confidence_chronos / len(is_correct_confidence_chronos)}\n")
    true_count_vote_chronos = sum(is_correct_vote_chronos)
    false_count_vote_chronos = len(is_correct_vote_chronos) - true_count_vote_chronos
    # print(f"True count (Vote) (Chronos): {true_count_vote_chronos}")
    # print(f"False count (Vote) (Chronos): {false_count_vote_chronos}")
    print(f"True ratio (Vote) (Chronos): {true_count_vote_chronos / len(is_correct_vote_chronos)}")
    f.write(f"True ratio (Vote) (Chronos): {true_count_vote_chronos / len(is_correct_vote_chronos)}\n")
    true_count_minmax_chronos = sum(is_correct_minmax_chronos)
    false_count_minmax_chronos = len(is_correct_minmax_chronos) - true_count_minmax_chronos
    # print(f"True count (minmax) (Chronos): {true_count_minmax_chronos}")
    # print(f"False count (minmax) (Chronos): {false_count_minmax_chronos}")
    print(f"True ratio (minmax) (Chronos): {true_count_minmax_chronos / len(is_correct_minmax_chronos)}")
    f.write(f"True ratio (minmax) (Chronos): {true_count_minmax_chronos / len(is_correct_minmax_chronos)}\n")
    true_count_oipa_chronos = sum(is_correct_oipa_chronos)
    false_count_oipa_chronos = len(is_correct_oipa_chronos) - true_count_oipa_chronos

    print(f"True ratio (OIPA) (Chronos): {true_count_oipa_chronos / len(is_correct_oipa_chronos)}")
    f.write(f"True ratio (OIPA) (Chronos): {true_count_oipa_chronos / len(is_correct_oipa_chronos)}\n")
    true_count_on_chronos = sum(is_correct_on_chronos)
    false_count_on_chronos = len(is_correct_on_chronos) - true_count_on_chronos
    print(f"True ratio (ON) (Chronos): {true_count_on_chronos / len(is_correct_on_chronos)}")
    f.write(f"True ratio (ON) (Chronos): {true_count_on_chronos / len(is_correct_on_chronos)}\n")


    true_count_confidence_real = sum(is_correct_confidence_real)
    false_count_confidence_real = len(is_correct_confidence_real) - true_count_confidence_real
    # print(f"True count (Confidence) (Real): {true_count_confidence_real}")
    # print(f"False count (Confidence) (Real): {false_count_confidence_real}")
    print(f"True ratio (Confidence) (Real): {true_count_confidence_real / len(is_correct_confidence_real)}")
    f.write(f"True ratio (Confidence) (Real): {true_count_confidence_real / len(is_correct_confidence_real)}\n")
    true_count_vote_real = sum(is_correct_vote_real)
    false_count_vote_real = len(is_correct_vote_real) - true_count_vote_real
    # print(f"True count (Vote) (Real): {true_count_vote_real}")
    # print(f"False count (Vote) (Real): {false_count_vote_real}")
    print(f"True ratio (Vote) (Real): {true_count_vote_real / len(is_correct_vote_real)}")
    f.write(f"True ratio (Vote) (Real): {true_count_vote_real / len(is_correct_vote_real)}\n")
    true_count_minmax_real = sum(is_correct_minmax_real)
    false_count_minmax_real = len(is_correct_minmax_real) - true_count_minmax_real
    # print(f"True count (minmax) (Real): {true_count_minmax_real}")
    # print(f"False count (minmax) (Real): {false_count_minmax_real}")
    print(f"True ratio (minmax) (Real): {true_count_minmax_real / len(is_correct_minmax_real)}")
    f.write(f"True ratio (minmax) (Real): {true_count_minmax_real / len(is_correct_minmax_real)}\n")
    true_count_oipa_real = sum(is_correct_oipa_real)
    false_count_oipa_real = len(is_correct_oipa_real) - true_count_oipa_real
    # print(f"True count (OIPA) (Real): {true_count_oipa_real}")
    # print(f"False count (OIPA) (Real): {false_count_oipa_real}")
    print(f"True ratio (OIPA) (Real): {true_count_oipa_real / len(is_correct_oipa_real)}")
    f.write(f"True ratio (OIPA) (Real): {true_count_oipa_real / len(is_correct_oipa_real)}\n")
    true_count_on_real = sum(is_correct_on_real)
    false_count_on_real = len(is_correct_on_real) - true_count_on_real
    # print(f"True count (ON) (Real): {true_count_on_real}")
    # print(f"False count (ON) (Real): {false_count_on_real}")
    print(f"True ratio (ON) (Real): {true_count_on_real / len(is_correct_on_real)}")
    f.write(f"True ratio (ON) (Real): {true_count_on_real / len(is_correct_on_real)}\n")

    true_count_matching_vote = sum(is_matching_vote)
    false_count_matching_vote = len(is_matching_vote) - true_count_matching_vote
    print(f"True ratio (Matching Vote): {true_count_matching_vote / len(is_matching_vote)}")
    f.write(f"True ratio (Matching Vote): {true_count_matching_vote / len(is_matching_vote)}\n")
    true_count_matching_confidence = sum(is_matching_confidence)
    false_count_matching_confidence = len(is_matching_confidence) - true_count_matching_confidence
    print(f"True ratio (Matching Confidence): {true_count_matching_confidence / len(is_matching_confidence)}")
    f.write(f"True ratio (Matching Confidence): {true_count_matching_confidence / len(is_matching_confidence)}\n")
    true_count_matching_minmax = sum(is_matching_minmax)
    false_count_matching_minmax = len(is_matching_minmax) - true_count_matching_minmax
    print(f"True ratio (Matching Minmax): {true_count_matching_minmax / len(is_matching_minmax)}")
    f.write(f"True ratio (Matching Minmax): {true_count_matching_minmax / len(is_matching_minmax)}\n")
    true_count_matching_oipa = sum(is_matching_oipa)
    false_count_matching_oipa = len(is_matching_oipa) - true_count_matching_oipa
    print(f"True ratio (Matching OIPA): {true_count_matching_oipa / len(is_matching_oipa)}")
    f.write(f"True ratio (Matching OIPA): {true_count_matching_oipa / len(is_matching_oipa)}\n")
    true_count_matching_on = sum(is_matching_on)
    false_count_matching_on = len(is_matching_on) - true_count_matching_on
    print(f"True ratio (Matching ON): {true_count_matching_on / len(is_matching_on)}")
    f.write(f"True ratio (Matching ON): {true_count_matching_on / len(is_matching_on)}\n")

f.close()
    # print(f"True count (OIPA) (Chronos): {true_count_oipa_chronos}")
    # print(f"False count (OIPA) (Chronos): {false_count_oipa_chronos}")





# true_count_confidence = sum(is_correct_confidence)
# false_count_confidence = len(is_correct_confidence) - true_count_confidence
# print(f"True count (Confidence): {true_count_confidence}")
# print(f"False count (Confidence): {false_count_confidence}")
# print(f"True ratio (Confidence): {true_count_confidence / len(is_correct_confidence)}")
# true_count_vote = sum(is_correct_vote)
# false_count_vote = len(is_correct_vote) - true_count_vote
# print(f"True count (Vote): {true_count_vote}")
# print(f"False count (Vote): {false_count_vote}")
# print(f"True ratio (Vote): {true_count_vote / len(is_correct_vote)}")
# true_count_minmax = sum(is_correct_minmax)
# false_count_minmax = len(is_correct_minmax) - true_count_minmax
# print(f"True count (minmax): {true_count_minmax}")
# print(f"False count (minmax): {false_count_minmax}")
# print(f"True ratio (minmax): {true_count_minmax / len(is_correct_minmax)}")
# true_count_oipa = sum(is_correct_oipa)
# false_count_oipa = len(is_correct_oipa) - true_count_oipa
# print(f"True count (OIPA): {true_count_oipa}")
# print(f"False count (OIPA): {false_count_oipa}")
# print(f"True ratio (OIPA): {true_count_oipa / len(is_correct_oipa)}")
# true_count_on = sum(is_correct_on)
# false_count_on = len(is_correct_on) - true_count_on
# print(f"True count (ON): {true_count_on}")
# print(f"False count (ON): {false_count_on}")
# print(f"True ratio (ON): {true_count_on / len(is_correct_on)}")


True ratio (Confidence) (Chronos): 0.5263157894736842
True ratio (Vote) (Chronos): 0.4868421052631579
True ratio (minmax) (Chronos): 0.5307017543859649
True ratio (OIPA) (Chronos): 0.0
True ratio (ON) (Chronos): 0.9035087719298246
True ratio (Confidence) (Real): 0.7543859649122807
True ratio (Vote) (Real): 0.7412280701754386
True ratio (minmax) (Real): 0.7543859649122807
True ratio (OIPA) (Real): 0.05263157894736842
True ratio (ON) (Real): 0.9254385964912281
True ratio (Matching Vote): 0.49122807017543857
True ratio (Matching Confidence): 0.5350877192982456
True ratio (Matching Minmax): 0.5394736842105263
True ratio (Matching OIPA): 0.9473684210526315
True ratio (Matching ON): 0.8289473684210527


## Eval on Layoffs

In [39]:
# Majority Voting Ensemble Eval
is_correct_confidence_chronos = []
is_correct_vote_chronos = []
is_correct_minmax_chronos = []
is_correct_oipa_chronos = []
is_correct_on_chronos = []
is_correct_confidence_real = []
is_correct_vote_real = []
is_correct_minmax_real = []
is_correct_oipa_real = []
is_correct_on_real = []
is_matching_vote = []
is_matching_confidence = []
is_matching_minmax = []
is_matching_oipa = []
is_matching_on = []

# is_correct_vote = []
# is_correct_minmax = []
# is_correct_oipa = []
# is_correct_on = []

for i, (X_train, y_train) in enumerate(dataloader_chronos_layoffs_minmax):
        # print(X_train)
        # print(y_train)
        # Forward pass
    
    X_train_minmax_chronos = dataloader_chronos_layoffs_minmax.dataset.X_train[i].unsqueeze(0).float()
    X_train_oipa_chronos = dataloader_chronos_layoffs_oipa.dataset.X_train[i].unsqueeze(0).float()
    X_train_on_chronos = dataloader_chronos_layoffs_on.dataset.X_train[i].unsqueeze(0).float()
    X_train_minmax_real = dataloader_real_layoffs_minmax.dataset.X_train[i].unsqueeze(0).float()
    X_train_oipa_real = dataloader_real_layoffs_oipa.dataset.X_train[i].unsqueeze(0).float()
    X_train_on_real = dataloader_real_layoffs_on.dataset.X_train[i].unsqueeze(0).float()



    
    # print(outputs_minmax)
    outputs_minmax_chronos = model_minmax(X_train_minmax_chronos).squeeze(1)
    outputs_oipa_chronos = model_open_inproportion_to_average(X_train_oipa_chronos).squeeze(1)
    outputs_on_chronos = model_open_normalized(X_train_on_chronos).squeeze(1)
    outputs_minmax_real = model_minmax(X_train_minmax_real).squeeze(1)
    outputs_oipa_real = model_open_inproportion_to_average(X_train_oipa_real).squeeze(1)
    outputs_on_real = model_open_normalized(X_train_on_real).squeeze(1)

    

    ## Ensemble confidence based on average of outputs
    confidence_avg_out_chronos = ( outputs_minmax_chronos+ outputs_oipa_chronos) / 2
    confidence_output_bin_chronos = (confidence_avg_out_chronos >= 0.5).int().item()
    confidence_avg_out_real = ( outputs_minmax_real + outputs_oipa_real) / 2
    confidence_output_bin_real = (confidence_avg_out_real >= 0.5).int().item()
    
    ## Ensemble voting based on majority voting
    vote_minmax_chronos = (outputs_minmax_chronos >= 0.5).int().item()
    is_correct_minmax_chronos.append(vote_minmax_chronos == int(1.0))
    vote_oipa_chronos = (outputs_oipa_chronos >= 0.5).int().item()
    is_correct_oipa_chronos.append(vote_oipa_chronos == int(1.0))
    vote_on_chronos = (outputs_on_chronos >= 0.5).int().item()
    is_correct_on_chronos.append(vote_on_chronos == int(1.0))
    
    vote_minmax_real = (outputs_minmax_real >= 0.5).int().item()
    is_correct_minmax_real.append(vote_minmax_real == int(1.0))
    vote_oipa_real = (outputs_oipa_real >= 0.5).int().item()
    is_correct_oipa_real.append(vote_oipa_real == int(1.0))
    vote_on_real = (outputs_on_real >= 0.5).int().item()
    is_correct_on_real.append(vote_on_real == int(1.0))

    ## Ensemble voting based on majority voting
    vote_chronos = (vote_oipa_chronos + vote_on_chronos) >= 2
    correct_confidence = confidence_output_bin_chronos == int(1.0)
    correct_vote = vote_chronos == int(1.0)
    is_correct_confidence_chronos.append(correct_confidence)
    is_correct_vote_chronos.append(correct_vote)

    vote_real = (vote_minmax_real + vote_oipa_real + vote_on_real) >= 2
    correct_confidence_real = confidence_output_bin_real == int(1.0)
    correct_vote_real = vote_real == int(1.0)
    is_correct_confidence_real.append(correct_confidence_real)
    is_correct_vote_real.append(correct_vote_real)

    is_matching_vote.append(vote_chronos == vote_real)
    is_matching_confidence.append(confidence_output_bin_chronos == confidence_output_bin_real)
    is_matching_minmax.append(vote_minmax_chronos == vote_minmax_real)
    is_matching_oipa.append(vote_oipa_chronos == vote_oipa_real)
    is_matching_on.append(vote_on_chronos == vote_on_real)

    # out = outputs.squeeze(1)
    # output_bin = (out >= 0.5).int().item()
    # correct = output_bin == int(y_train.item())
    # is_correct.append(correct)
# Count ratio of trues and falses in is_correct
with open("outputs_layoffs_5.txt", "w") as f:
    chronos_f1_prepro_layoff = [0 if i == False else 1 for i in is_correct_confidence_chronos]
    chronos_f1_true_layoff = [1 for i in is_correct_confidence_chronos]
    # f1 = f1_score(chronos_f1_prepro, chronos_f1_true, pos_label=1, average="binary")
    # print(f"F1 Score (Confidence) (Chronos): {f1}")
    # f.write(f"F1 Score (Confidence) (Chronos): {f1}\n")
    print("is correct real", is_correct_confidence_real)
    real_f1_prepro_layoff = [0 if i == False else 1 for i in is_correct_confidence_real]
    real_f1_true_layoff = [1 for i in is_correct_confidence_real]
    # f1 = f1_score(real_f1_prepro, real_f1_true, pos_label=1, average="binary")
    # print(f"F1 Score (Confidence) (Real): {f1}")
    # f.write(f"F1 Score (Confidence) (Real): {f1}\n")
    true_count_confidence_chronos = sum(is_correct_confidence_chronos)
    false_count_confidence_chronos = len(is_correct_confidence_chronos) - true_count_confidence_chronos
    # print(f"True count (Confidence) (Chronos): {true_count_confidence_chronos}")
    # print(f"False count (Confidence) (Chronos): {false_count_confidence_chronos}")
    print(f"True ratio (Confidence) (Chronos): {true_count_confidence_chronos / len(is_correct_confidence_chronos)}")
    f.write(f"True ratio (Confidence) (Chronos): {true_count_confidence_chronos / len(is_correct_confidence_chronos)}\n")
    true_count_vote_chronos = sum(is_correct_vote_chronos)
    false_count_vote_chronos = len(is_correct_vote_chronos) - true_count_vote_chronos
    # print(f"True count (Vote) (Chronos): {true_count_vote_chronos}")
    # print(f"False count (Vote) (Chronos): {false_count_vote_chronos}")
    print(f"True ratio (Vote) (Chronos): {true_count_vote_chronos / len(is_correct_vote_chronos)}")
    f.write(f"True ratio (Vote) (Chronos): {true_count_vote_chronos / len(is_correct_vote_chronos)}\n")
    true_count_minmax_chronos = sum(is_correct_minmax_chronos)
    false_count_minmax_chronos = len(is_correct_minmax_chronos) - true_count_minmax_chronos
    # print(f"True count (minmax) (Chronos): {true_count_minmax_chronos}")
    # print(f"False count (minmax) (Chronos): {false_count_minmax_chronos}")
    print(f"True ratio (minmax) (Chronos): {true_count_minmax_chronos / len(is_correct_minmax_chronos)}")
    f.write(f"True ratio (minmax) (Chronos): {true_count_minmax_chronos / len(is_correct_minmax_chronos)}\n")
    true_count_oipa_chronos = sum(is_correct_oipa_chronos)
    false_count_oipa_chronos = len(is_correct_oipa_chronos) - true_count_oipa_chronos

    print(f"True ratio (OIPA) (Chronos): {true_count_oipa_chronos / len(is_correct_oipa_chronos)}")
    f.write(f"True ratio (OIPA) (Chronos): {true_count_oipa_chronos / len(is_correct_oipa_chronos)}\n")
    true_count_on_chronos = sum(is_correct_on_chronos)
    false_count_on_chronos = len(is_correct_on_chronos) - true_count_on_chronos
    print(f"True ratio (ON) (Chronos): {true_count_on_chronos / len(is_correct_on_chronos)}")
    f.write(f"True ratio (ON) (Chronos): {true_count_on_chronos / len(is_correct_on_chronos)}\n")


    true_count_confidence_real = sum(is_correct_confidence_real)
    false_count_confidence_real = len(is_correct_confidence_real) - true_count_confidence_real
    # print(f"True count (Confidence) (Real): {true_count_confidence_real}")
    # print(f"False count (Confidence) (Real): {false_count_confidence_real}")
    print(f"True ratio (Confidence) (Real): {true_count_confidence_real / len(is_correct_confidence_real)}")
    f.write(f"True ratio (Confidence) (Real): {true_count_confidence_real / len(is_correct_confidence_real)}\n")
    true_count_vote_real = sum(is_correct_vote_real)
    false_count_vote_real = len(is_correct_vote_real) - true_count_vote_real
    # print(f"True count (Vote) (Real): {true_count_vote_real}")
    # print(f"False count (Vote) (Real): {false_count_vote_real}")
    print(f"True ratio (Vote) (Real): {true_count_vote_real / len(is_correct_vote_real)}")
    f.write(f"True ratio (Vote) (Real): {true_count_vote_real / len(is_correct_vote_real)}\n")
    true_count_minmax_real = sum(is_correct_minmax_real)
    false_count_minmax_real = len(is_correct_minmax_real) - true_count_minmax_real
    # print(f"True count (minmax) (Real): {true_count_minmax_real}")
    # print(f"False count (minmax) (Real): {false_count_minmax_real}")
    print(f"True ratio (minmax) (Real): {true_count_minmax_real / len(is_correct_minmax_real)}")
    f.write(f"True ratio (minmax) (Real): {true_count_minmax_real / len(is_correct_minmax_real)}\n")
    true_count_oipa_real = sum(is_correct_oipa_real)
    false_count_oipa_real = len(is_correct_oipa_real) - true_count_oipa_real
    # print(f"True count (OIPA) (Real): {true_count_oipa_real}")
    # print(f"False count (OIPA) (Real): {false_count_oipa_real}")
    print(f"True ratio (OIPA) (Real): {true_count_oipa_real / len(is_correct_oipa_real)}")
    f.write(f"True ratio (OIPA) (Real): {true_count_oipa_real / len(is_correct_oipa_real)}\n")
    true_count_on_real = sum(is_correct_on_real)
    false_count_on_real = len(is_correct_on_real) - true_count_on_real
    # print(f"True count (ON) (Real): {true_count_on_real}")
    # print(f"False count (ON) (Real): {false_count_on_real}")
    print(f"True ratio (ON) (Real): {true_count_on_real / len(is_correct_on_real)}")
    f.write(f"True ratio (ON) (Real): {true_count_on_real / len(is_correct_on_real)}\n")

    true_count_matching_vote = sum(is_matching_vote)
    false_count_matching_vote = len(is_matching_vote) - true_count_matching_vote
    print(f"True ratio (Matching Vote): {true_count_matching_vote / len(is_matching_vote)}")
    f.write(f"True ratio (Matching Vote): {true_count_matching_vote / len(is_matching_vote)}\n")
    true_count_matching_confidence = sum(is_matching_confidence)
    false_count_matching_confidence = len(is_matching_confidence) - true_count_matching_confidence
    print(f"True ratio (Matching Confidence): {true_count_matching_confidence / len(is_matching_confidence)}")
    f.write(f"True ratio (Matching Confidence): {true_count_matching_confidence / len(is_matching_confidence)}\n")
    true_count_matching_minmax = sum(is_matching_minmax)
    false_count_matching_minmax = len(is_matching_minmax) - true_count_matching_minmax
    print(f"True ratio (Matching Minmax): {true_count_matching_minmax / len(is_matching_minmax)}")
    f.write(f"True ratio (Matching Minmax): {true_count_matching_minmax / len(is_matching_minmax)}\n")
    true_count_matching_oipa = sum(is_matching_oipa)
    false_count_matching_oipa = len(is_matching_oipa) - true_count_matching_oipa
    print(f"True ratio (Matching OIPA): {true_count_matching_oipa / len(is_matching_oipa)}")
    f.write(f"True ratio (Matching OIPA): {true_count_matching_oipa / len(is_matching_oipa)}\n")
    true_count_matching_on = sum(is_matching_on)
    false_count_matching_on = len(is_matching_on) - true_count_matching_on
    print(f"True ratio (Matching ON): {true_count_matching_on / len(is_matching_on)}")
    f.write(f"True ratio (Matching ON): {true_count_matching_on / len(is_matching_on)}\n")

f.close()
    # print(f"True count (OIPA) (Chronos): {true_count_oipa_chronos}")
    # print(f"False count (OIPA) (Chronos): {false_count_oipa_chronos}")





# true_count_confidence = sum(is_correct_confidence)
# false_count_confidence = len(is_correct_confidence) - true_count_confidence
# print(f"True count (Confidence): {true_count_confidence}")
# print(f"False count (Confidence): {false_count_confidence}")
# print(f"True ratio (Confidence): {true_count_confidence / len(is_correct_confidence)}")
# true_count_vote = sum(is_correct_vote)
# false_count_vote = len(is_correct_vote) - true_count_vote
# print(f"True count (Vote): {true_count_vote}")
# print(f"False count (Vote): {false_count_vote}")
# print(f"True ratio (Vote): {true_count_vote / len(is_correct_vote)}")
# true_count_minmax = sum(is_correct_minmax)
# false_count_minmax = len(is_correct_minmax) - true_count_minmax
# print(f"True count (minmax): {true_count_minmax}")
# print(f"False count (minmax): {false_count_minmax}")
# print(f"True ratio (minmax): {true_count_minmax / len(is_correct_minmax)}")
# true_count_oipa = sum(is_correct_oipa)
# false_count_oipa = len(is_correct_oipa) - true_count_oipa
# print(f"True count (OIPA): {true_count_oipa}")
# print(f"False count (OIPA): {false_count_oipa}")
# print(f"True ratio (OIPA): {true_count_oipa / len(is_correct_oipa)}")
# true_count_on = sum(is_correct_on)
# false_count_on = len(is_correct_on) - true_count_on
# print(f"True count (ON): {true_count_on}")
# print(f"False count (ON): {false_count_on}")
# print(f"True ratio (ON): {true_count_on / len(is_correct_on)}")


is correct real [True, True, False, True, True, False, False, True, True, False, True, False, False, True, True, True, True, True, False, False, True, True, True, True, True, False, True, True, True, True, True, True, True, False, True, True, True, True, True, True, True, True, True, True, True, False, True, True, True, True, True, True, True, False, False, False, False, True, True, True, True, True, True, True, True, True, True, False, False, True, True, True, True, True, False, True, True, True, True, True, False, True, True, True, True, True, True, False, True, True, True, True, True, True, False, True, True, True, True, True, True, True, True, False, True, False, True, True, True, True, False, False, True, False, True, True, True, True, False, True, True, True, True, True, True, True, False, False, False, True, False, True, True, True, False, False, True, False, True, True, True, True, True, True, False, False, False, True, True, True, True, True, True, True, False, False, True, Tr

In [21]:
f1_chronos = f1_score(chronos_f1_prepro_nolayoff+chronos_f1_prepro_layoff, chronos_f1_true_nolayoff+chronos_f1_true_layoff, pos_label=1, average="binary")
f1_real = f1_score(real_f1_prepro_nolayoffs+real_f1_prepro_layoff, real_f1_true_nolayoffs+real_f1_true_layoff, pos_label=1, average="binary")

In [22]:
f1_chronos

0.4426877470355731

In [23]:
f1_real

0.6676096181046676

In [24]:
# # Count ratio of trues and falses in is_correct
# true_count = sum(is_correct)
# false_count = len(is_correct) - true_count
# print(f"True count: {true_count}")
# print(f"False count: {false_count}")
# print(f"True ratio: {true_count / len(is_correct)}")

In [25]:
# Save each model
directory = "classifier_models/"
if not os.path.exists(directory):
    os.makedirs(directory)
torch.save(model_minmax.state_dict(), directory +"minmax_model.pth")
torch.save(model_open_inproportion_to_average.state_dict(), directory +"open_inproportion_to_average_model.pth")
torch.save(model_open_normalized.state_dict(), directory +"open_normalized_model.pth")


In [26]:
# Load in saved models
model_minmax = BinaryClassifier()
model_open_inproportion_to_average = BinaryClassifier()
model_open_normalized = BinaryClassifier()
model_minmax.load_state_dict(torch.load(directory +"minmax_model.pth"))
model_open_inproportion_to_average.load_state_dict(torch.load(directory +"open_inproportion_to_average_model_2.pth"))
model_open_normalized.load_state_dict(torch.load(directory +"open_normalized_model_2.pth"))


FileNotFoundError: [Errno 2] No such file or directory: 'classifier_models/open_inproportion_to_average_model_2.pth'

#