In [1]:
import breizhcrops as bzh
from breizhcrops import BreizhCrops
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tqdm
from tqdm import tqdm
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn
from torch.optim import Adam
import os
from sklearn.metrics import accuracy_score, cohen_kappa_score, f1_score, recall_score, precision_score
import sklearn

In [5]:
frh01 = BreizhCrops("frh01")
frh02 = BreizhCrops("frh02")
frh03 = BreizhCrops("frh03")
frh04 = BreizhCrops("frh04")
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

In [6]:
frh01.domain = 1
frh02.domain = 2
frh03.domain = 3
frh04.domain = 4

In [7]:
def split_dataset(dataset, X=0.8):
    
    total_samples = len(dataset)

    num_test_samples = int(X * total_samples)

    np.random.seed(42)

    test_indices = np.random.choice(total_samples, num_test_samples, replace=False)

    # Find complementary indices
    train_indices = np.setdiff1d(np.arange(total_samples), test_indices)

    test = torch.utils.data.Subset(dataset, test_indices)
    train = torch.utils.data.Subset(dataset, train_indices)
    return train, test

In [8]:
def get_dataloader(batchsize=32, X=0.8, num_workers=0, upperPerformance=False):

    frh01_train, frh01_test = split_dataset(frh01, X=X)

    if upperPerformance:
        traindatasets = frh01_train
    else:
        traindatasets = torch.utils.data.ConcatDataset([frh02, frh03, frh04])

    testdataset = frh01_test
    
  
    

    traindataloader = DataLoader(traindatasets, batch_size=batchsize, shuffle=True, num_workers=num_workers)
    testdataloader = DataLoader(testdataset, batch_size=batchsize, shuffle=False, num_workers=num_workers)

    meta = dict(
        ndims=13,
        num_classes=frh02.classes,
        sequencelength=45
    )

    return traindataloader, testdataloader, meta

In [23]:
train_dataloader, test_dataloader, meta = get_dataloader(batchsize=32, num_workers=0, X=0.2, upperPerformance=False)

In [24]:
len(train_dataloader), len(test_dataloader)

(13809, 1040)

In [9]:
class LSTM_Model(torch.nn.Module):
    def __init__(self, input_size=13, hidden_size=128, num_layers=4, output_size=9, bidirectional=False, dropout=0.2, c_norm=True):
        super(LSTM_Model, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.output_size = output_size
        self.c_norm = c_norm
        
        if num_layers > 1 and dropout > 0:
            self.dropout = dropout
        else:
            self.dropout = 0.0 
            
        self.clayernorm = nn.LayerNorm((hidden_size + hidden_size * bidirectional) * num_layers)
        
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers,
                            bias=False, batch_first=True, dropout=dropout, bidirectional=bidirectional)   
        
        if bidirectional:
            hidden_size = hidden_size * 2
             
        self.fc = nn.Linear(hidden_size * num_layers, output_size, bias=True)
        
    def forward(self, x):
        
        outputs, last_state_list = self.lstm.forward(x)

        h, c = last_state_list

        nlayers, batchsize, n_hidden = c.shape
        
        if self.c_norm:
            x = self.clayernorm(c.transpose(0, 1).contiguous().view(batchsize, nlayers * n_hidden))
        else:
            x = c.transpose(0, 1).contiguous().view(batchsize, nlayers * n_hidden)
        x = self.fc.forward(x)
        return x
        
    def save(self, path="model.pth", **kwargs):
        print("\nsaving model to " + path)
        model_state = self.state_dict()
        os.makedirs(os.path.dirname(path), exist_ok=True)
        torch.save(dict(model_state=model_state, **kwargs), path)
        
model = LSTM_Model(input_size=13, hidden_size=128, num_layers=4, output_size=9, bidirectional=False, dropout=0.2, c_norm=True).to(device)


In [4]:
def metrics(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    kappa = cohen_kappa_score(y_true, y_pred)
    f1_micro = f1_score(y_true, y_pred, average="micro")
    f1_macro = f1_score(y_true, y_pred, average="macro")
    f1_weighted = f1_score(y_true, y_pred, average="weighted")
    recall_micro = recall_score(y_true, y_pred, average="micro")
    recall_macro = recall_score(y_true, y_pred, average="macro")
    recall_weighted = recall_score(y_true, y_pred, average="weighted")
    precision_micro = precision_score(y_true, y_pred, average="micro")
    precision_macro = precision_score(y_true, y_pred, average="macro")
    precision_weighted = precision_score(y_true, y_pred, average="weighted")

    return dict(
        accuracy=accuracy,
        kappa=kappa,
        f1_micro=f1_micro,
        f1_macro=f1_macro,
        f1_weighted=f1_weighted,
        recall_micro=recall_micro,
        recall_macro=recall_macro,
        recall_weighted=recall_weighted,
        precision_micro=precision_micro,
        precision_macro=precision_macro,
        precision_weighted=precision_weighted,
    )


def train_epoch(model, optimizer, criterion, dataloader, device):
    model.train()
    losses = list()
    with tqdm(enumerate(dataloader), total=len(dataloader), leave=True) as iterator:
        for idx, batch in iterator:
            optimizer.zero_grad()
            x, y_true, _ , _= batch
            loss = criterion(model.forward(x.to(device)), y_true.to(device))
            loss.backward()
            optimizer.step()
            iterator.set_description(f"train loss={loss:.2f}")
            losses.append(loss)
    return torch.stack(losses)


def test_epoch(model, criterion, dataloader, device):
    model.eval()
    with torch.no_grad():
        losses = list()
        y_true_list = list()
        y_pred_list = list()
        field_ids_list = list()
        with tqdm(enumerate(dataloader), total=len(dataloader), leave=True) as iterator:
            for idx, batch in iterator:
                x, y_true, field_id, _ = batch
                logits = model.forward(x.to(device))
                loss = criterion(logits, y_true.to(device))
                iterator.set_description(f"test loss={loss:.2f}")
                losses.append(loss)
                y_true_list.append(y_true)
                y_pred_list.append(logits.argmax(-1))
                field_ids_list.append(field_id)
        return torch.stack(losses), torch.cat(y_true_list), torch.cat(y_pred_list), torch.cat(field_ids_list)


# Lower Baseline

## X=0.8

In [27]:
len(train_dataloader.dataset), len(test_dataloader.dataset)
len_train = len(train_dataloader)
n_epochs = 20

In [28]:
idx = len_train; epoch = 20
p = float(idx + epoch * len_train) / n_epochs / len_train
alpha = 2. / (1. + np.exp(-10 * p)) - 1
alpha

0.9999449286177706

In [29]:
device = "mps" if torch.backends.mps.is_available() else "cpu"
model = LSTM_Model(input_size=13, hidden_size=64, num_layers=2, output_size=9, bidirectional=False, dropout=0.2, c_norm=True).to(device)
train_dataloader, test_dataloader, meta = get_dataloader(batchsize=32, num_workers=0, X=0.8, upperPerformance=False)
criterion = torch.nn.CrossEntropyLoss(reduction="mean")
optimizer = Adam(model.parameters(), 0.005, weight_decay=1e-6)
log = list()
for epoch in range(10):
    train_loss = train_epoch(model, optimizer, criterion, train_dataloader, device)
    test_loss, y_true, y_pred, *_ = test_epoch(model, criterion, test_dataloader, device)
    scores1 = metrics(y_true.cpu(), y_pred.cpu())
    scores_msg = ", ".join([f"{k}={v:.2f}" for (k, v) in scores1.items()])
    test_loss = test_loss.cpu().detach().numpy()[0]
    train_loss = train_loss.cpu().detach().numpy()[0]
    print(f"epoch {epoch}: trainloss {train_loss:.2f}, testloss {test_loss:.2f} " + scores_msg)

    scores1["epoch"] = epoch
    scores1["trainloss"] = train_loss
    scores1["testloss"] = test_loss
    log.append(scores1)

    log_df = pd.DataFrame(log).set_index("epoch")
    log_df.to_csv("Lower_08.csv")

  0%|          | 0/15177 [00:00<?, ?it/s]


TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'NoneType'>

## X = 0.6

In [None]:
device = "mps" if torch.backends.mps.is_available() else "cpu"
model = LSTM_Model(input_size=13, hidden_size=64, num_layers=2, output_size=9, bidirectional=False, dropout=0.2, c_norm=True).to(device)
train_dataloader, test_dataloader, meta = get_dataloader(batchsize=32, num_workers=0, X=0.6, upperPerformance=False)
criterion = torch.nn.CrossEntropyLoss(reduction="mean")
optimizer = Adam(model.parameters(), 0.005, weight_decay=1e-6)
log = list()
for epoch in range(10):
    train_loss = train_epoch(model, optimizer, criterion, train_dataloader, device)
    test_loss, y_true, y_pred, *_ = test_epoch(model, criterion, test_dataloader, device)
    scores1 = metrics(y_true.cpu(), y_pred.cpu())
    scores_msg = ", ".join([f"{k}={v:.2f}" for (k, v) in scores1.items()])
    test_loss = test_loss.cpu().detach().numpy()[0]
    train_loss = train_loss.cpu().detach().numpy()[0]
    print(f"epoch {epoch}: trainloss {train_loss:.2f}, testloss {test_loss:.2f} " + scores_msg)

    scores1["epoch"] = epoch
    scores1["trainloss"] = train_loss
    scores1["testloss"] = test_loss
    log.append(scores1)

    log_df = pd.DataFrame(log).set_index("epoch")
    log_df.to_csv("Lower_06.csv")

## X = 0.4

In [None]:
device = "mps" if torch.backends.mps.is_available() else "cpu"
model = LSTM_Model(input_size=13, hidden_size=64, num_layers=2, output_size=9, bidirectional=False, dropout=0.2, c_norm=True).to(device)
train_dataloader, test_dataloader, meta = get_dataloader(batchsize=32, num_workers=0, X=0.4, upperPerformance=False)
criterion = torch.nn.CrossEntropyLoss(reduction="mean")
optimizer = Adam(model.parameters(), 0.005, weight_decay=1e-6)
log = list()
for epoch in range(10):
    train_loss = train_epoch(model, optimizer, criterion, train_dataloader, device)
    test_loss, y_true, y_pred, *_ = test_epoch(model, criterion, test_dataloader, device)
    scores1 = metrics(y_true.cpu(), y_pred.cpu())
    scores_msg = ", ".join([f"{k}={v:.2f}" for (k, v) in scores1.items()])
    test_loss = test_loss.cpu().detach().numpy()[0]
    train_loss = train_loss.cpu().detach().numpy()[0]
    print(f"epoch {epoch}: trainloss {train_loss:.2f}, testloss {test_loss:.2f} " + scores_msg)

    scores1["epoch"] = epoch
    scores1["trainloss"] = train_loss
    scores1["testloss"] = test_loss
    log.append(scores1)

    log_df = pd.DataFrame(log).set_index("epoch")
    log_df.to_csv("Lower_04.csv")

## X = 0.2

In [21]:
device = "mps" if torch.backends.mps.is_available() else "cpu"
model = LSTM_Model(input_size=13, hidden_size=64, num_layers=2, output_size=9, bidirectional=False, dropout=0.2, c_norm=True).to(device)
train_dataloader, test_dataloader, meta = get_dataloader(batchsize=32, num_workers=0, X=0.2, upperPerformance=False)
criterion = torch.nn.CrossEntropyLoss(reduction="mean")
optimizer = Adam(model.parameters(), 0.005, weight_decay=1e-6)
log = list()
for epoch in range(10):
    train_loss = train_epoch(model, optimizer, criterion, train_dataloader, device)
    test_loss, y_true, y_pred, *_ = test_epoch(model, criterion, test_dataloader, device)
    scores1 = metrics(y_true.cpu(), y_pred.cpu())
    scores_msg = ", ".join([f"{k}={v:.2f}" for (k, v) in scores1.items()])
    test_loss = test_loss.cpu().detach().numpy()[0]
    train_loss = train_loss.cpu().detach().numpy()[0]
    print(f"epoch {epoch}: trainloss {train_loss:.2f}, testloss {test_loss:.2f} " + scores_msg)

    scores1["epoch"] = epoch
    scores1["trainloss"] = train_loss
    scores1["testloss"] = test_loss
    log.append(scores1)

    log_df = pd.DataFrame(log).set_index("epoch")
    log_df.to_csv("Lower_02.csv")

  0%|          | 0/13427 [00:00<?, ?it/s]


TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'NoneType'>

# Upper Baseline

## X = 0.8

In [None]:
device = "mps" if torch.backends.mps.is_available() else "cpu"
model = LSTM_Model(input_size=13, hidden_size=64, num_layers=2, output_size=9, bidirectional=False, dropout=0.2, c_norm=True).to(device)
train_dataloader, test_dataloader, meta = get_dataloader(batchsize=32, num_workers=0, X=0.8, upperPerformance=True)
criterion = torch.nn.CrossEntropyLoss(reduction="mean")
optimizer = Adam(model.parameters(), 0.005, weight_decay=1e-6)
log = list()
for epoch in range(10):
    train_loss = train_epoch(model, optimizer, criterion, train_dataloader, device)
    test_loss, y_true, y_pred, *_ = test_epoch(model, criterion, test_dataloader, device)
    scores1 = metrics(y_true.cpu(), y_pred.cpu())
    scores_msg = ", ".join([f"{k}={v:.2f}" for (k, v) in scores1.items()])
    test_loss = test_loss.cpu().detach().numpy()[0]
    train_loss = train_loss.cpu().detach().numpy()[0]
    print(f"epoch {epoch}: trainloss {train_loss:.2f}, testloss {test_loss:.2f} " + scores_msg)

    scores1["epoch"] = epoch
    scores1["trainloss"] = train_loss
    scores1["testloss"] = test_loss
    log.append(scores1)

    log_df = pd.DataFrame(log).set_index("epoch")
    log_df.to_csv("Upper_08.csv")

# X = 0.6

In [None]:
device = "mps" if torch.backends.mps.is_available() else "cpu"
model = LSTM_Model(input_size=13, hidden_size=64, num_layers=2, output_size=9, bidirectional=False, dropout=0.2, c_norm=True).to(device)
train_dataloader, test_dataloader, meta = get_dataloader(batchsize=32, num_workers=0, X=0.6, upperPerformance=True)
criterion = torch.nn.CrossEntropyLoss(reduction="mean")
optimizer = Adam(model.parameters(), 0.005, weight_decay=1e-6)
log = list()
for epoch in range(10):
    train_loss = train_epoch(model, optimizer, criterion, train_dataloader, device)
    test_loss, y_true, y_pred, *_ = test_epoch(model, criterion, test_dataloader, device)
    scores1 = metrics(y_true.cpu(), y_pred.cpu())
    scores_msg = ", ".join([f"{k}={v:.2f}" for (k, v) in scores1.items()])
    test_loss = test_loss.cpu().detach().numpy()[0]
    train_loss = train_loss.cpu().detach().numpy()[0]
    print(f"epoch {epoch}: trainloss {train_loss:.2f}, testloss {test_loss:.2f} " + scores_msg)

    scores1["epoch"] = epoch
    scores1["trainloss"] = train_loss
    scores1["testloss"] = test_loss
    log.append(scores1)

    log_df = pd.DataFrame(log).set_index("epoch")
    log_df.to_csv("Upper_06.csv")

## X = 0.4

In [38]:
device = "mps" if torch.backends.mps.is_available() else "cpu"
model = LSTM_Model(input_size=13, hidden_size=64, num_layers=2, output_size=9, bidirectional=False, dropout=0.2, c_norm=True).to(device)
train_dataloader, test_dataloader, meta = get_dataloader(batchsize=32, num_workers=0, X=0.4, upperPerformance=True)
criterion = torch.nn.CrossEntropyLoss(reduction="mean")
optimizer = Adam(model.parameters(), 0.005, weight_decay=1e-6)
log = list()
for epoch in range(10):
    train_loss = train_epoch(model, optimizer, criterion, train_dataloader, device)
    test_loss, y_true, y_pred, *_ = test_epoch(model, criterion, test_dataloader, device)
    scores1 = metrics(y_true.cpu(), y_pred.cpu())
    scores_msg = ", ".join([f"{k}={v:.2f}" for (k, v) in scores1.items()])
    test_loss = test_loss.cpu().detach().numpy()[0]
    train_loss = train_loss.cpu().detach().numpy()[0]
    print(f"epoch {epoch}: trainloss {train_loss:.2f}, testloss {test_loss:.2f} " + scores_msg)

    scores1["epoch"] = epoch
    scores1["trainloss"] = train_loss
    scores1["testloss"] = test_loss
    log.append(scores1)

    log_df = pd.DataFrame(log).set_index("epoch")
    log_df.to_csv("Upper_04.csv")

TypeError: get_dataloader() missing 1 required positional argument: 'mode'

## X = 0.2

In [11]:
device = "mps" if torch.backends.mps.is_available() else "cpu"
model = LSTM_Model(input_size=13, hidden_size=64, num_layers=2, output_size=9, bidirectional=False, dropout=0.2, c_norm=True).to(device)
train_dataloader, test_dataloader, meta = get_dataloader(batchsize=32, num_workers=0, X=0.2, upperPerformance=True)
criterion = torch.nn.CrossEntropyLoss(reduction="mean")
optimizer = Adam(model.parameters(), 0.005, weight_decay=1e-6)
log = list()
for epoch in range(10):
    train_loss = train_epoch(model, optimizer, criterion, train_dataloader, device)
    test_loss, y_true, y_pred, *_ = test_epoch(model, criterion, test_dataloader, device)
    scores1 = metrics(y_true.cpu(), y_pred.cpu())
    scores_msg = ", ".join([f"{k}={v:.2f}" for (k, v) in scores1.items()])
    test_loss = test_loss.cpu().detach().numpy()[0]
    train_loss = train_loss.cpu().detach().numpy()[0]
    print(f"epoch {epoch}: trainloss {train_loss:.2f}, testloss {test_loss:.2f} " + scores_msg)

    scores1["epoch"] = epoch
    scores1["trainloss"] = train_loss
    scores1["testloss"] = test_loss
    log.append(scores1)

    log_df = pd.DataFrame(log).set_index("epoch")
    log_df.to_csv("Upper_02.csv")

  0%|          | 0/4466 [00:00<?, ?it/s]

train loss=0.80: 100%|██████████| 4466/4466 [04:25<00:00, 16.80it/s]
test loss=1.38: 100%|██████████| 1117/1117 [01:14<00:00, 15.06it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


epoch 0: trainloss 2.37, testloss 0.47 accuracy=0.74, kappa=0.66, f1_micro=0.74, f1_macro=0.47, f1_weighted=0.71, recall_micro=0.74, recall_macro=0.48, recall_weighted=0.74, precision_micro=0.74, precision_macro=0.49, precision_weighted=0.73


train loss=0.68: 100%|██████████| 4466/4466 [04:39<00:00, 15.95it/s]
test loss=0.79: 100%|██████████| 1117/1117 [00:50<00:00, 22.06it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


epoch 1: trainloss 0.56, testloss 0.38 accuracy=0.77, kappa=0.71, f1_micro=0.77, f1_macro=0.52, f1_weighted=0.76, recall_micro=0.77, recall_macro=0.52, recall_weighted=0.77, precision_micro=0.77, precision_macro=0.52, precision_weighted=0.76


train loss=0.46: 100%|██████████| 4466/4466 [04:27<00:00, 16.68it/s]
test loss=0.77: 100%|██████████| 1117/1117 [01:05<00:00, 17.00it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


epoch 2: trainloss 0.45, testloss 0.38 accuracy=0.78, kappa=0.71, f1_micro=0.78, f1_macro=0.54, f1_weighted=0.77, recall_micro=0.78, recall_macro=0.53, recall_weighted=0.78, precision_micro=0.78, precision_macro=0.54, precision_weighted=0.77


train loss=0.43: 100%|██████████| 4466/4466 [05:31<00:00, 13.48it/s]
test loss=0.92: 100%|██████████| 1117/1117 [01:25<00:00, 13.00it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


epoch 3: trainloss 0.52, testloss 0.43 accuracy=0.79, kappa=0.73, f1_micro=0.79, f1_macro=0.54, f1_weighted=0.79, recall_micro=0.79, recall_macro=0.54, recall_weighted=0.79, precision_micro=0.79, precision_macro=0.55, precision_weighted=0.79


train loss=0.46: 100%|██████████| 4466/4466 [06:23<00:00, 11.64it/s]
test loss=0.68: 100%|██████████| 1117/1117 [01:30<00:00, 12.40it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


epoch 4: trainloss 0.26, testloss 0.32 accuracy=0.80, kappa=0.74, f1_micro=0.80, f1_macro=0.55, f1_weighted=0.79, recall_micro=0.80, recall_macro=0.54, recall_weighted=0.80, precision_micro=0.80, precision_macro=0.55, precision_weighted=0.79


train loss=0.28: 100%|██████████| 4466/4466 [06:09<00:00, 12.08it/s]
test loss=0.79: 100%|██████████| 1117/1117 [01:27<00:00, 12.74it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


epoch 5: trainloss 0.43, testloss 0.31 accuracy=0.79, kappa=0.73, f1_micro=0.79, f1_macro=0.53, f1_weighted=0.76, recall_micro=0.79, recall_macro=0.54, recall_weighted=0.79, precision_micro=0.79, precision_macro=0.55, precision_weighted=0.79


train loss=0.64: 100%|██████████| 4466/4466 [06:19<00:00, 11.76it/s]
test loss=1.12: 100%|██████████| 1117/1117 [01:38<00:00, 11.36it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


epoch 6: trainloss 0.68, testloss 0.45 accuracy=0.80, kappa=0.74, f1_micro=0.80, f1_macro=0.54, f1_weighted=0.79, recall_micro=0.80, recall_macro=0.54, recall_weighted=0.80, precision_micro=0.80, precision_macro=0.55, precision_weighted=0.79


train loss=0.49: 100%|██████████| 4466/4466 [06:40<00:00, 11.16it/s]
test loss=0.71: 100%|██████████| 1117/1117 [01:27<00:00, 12.74it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


epoch 7: trainloss 0.31, testloss 0.36 accuracy=0.79, kappa=0.74, f1_micro=0.79, f1_macro=0.55, f1_weighted=0.80, recall_micro=0.79, recall_macro=0.55, recall_weighted=0.79, precision_micro=0.79, precision_macro=0.55, precision_weighted=0.80


train loss=1.04: 100%|██████████| 4466/4466 [05:51<00:00, 12.71it/s]
test loss=0.98: 100%|██████████| 1117/1117 [01:17<00:00, 14.49it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


epoch 8: trainloss 0.43, testloss 0.40 accuracy=0.80, kappa=0.74, f1_micro=0.80, f1_macro=0.55, f1_weighted=0.79, recall_micro=0.80, recall_macro=0.54, recall_weighted=0.80, precision_micro=0.80, precision_macro=0.56, precision_weighted=0.79


train loss=0.51: 100%|██████████| 4466/4466 [05:55<00:00, 12.55it/s]
test loss=0.84: 100%|██████████| 1117/1117 [01:19<00:00, 14.02it/s]
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


epoch 9: trainloss 0.45, testloss 0.29 accuracy=0.81, kappa=0.76, f1_micro=0.81, f1_macro=0.56, f1_weighted=0.81, recall_micro=0.81, recall_macro=0.56, recall_weighted=0.81, precision_micro=0.81, precision_macro=0.56, precision_weighted=0.80


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [36]:
device = "mps" if torch.backends.mps.is_available() else "cpu"
model = LSTM_Model(input_size=13, hidden_size=64, num_layers=2, output_size=9, bidirectional=False, dropout=0.2, c_norm=True).to(device)
train_dataloader, test_dataloader, meta = get_dataloader(batchsize=32, num_workers=0, X=0.2, upperPerformance=False, mode='124')
criterion = torch.nn.CrossEntropyLoss(reduction="mean")
optimizer = Adam(model.parameters(), 0.005, weight_decay=1e-6)
log = list()
for epoch in range(10):
    train_loss = train_epoch(model, optimizer, criterion, train_dataloader, device)
    test_loss, y_true, y_pred, *_ = test_epoch(model, criterion, test_dataloader, device)
    scores1 = metrics(y_true.cpu(), y_pred.cpu())
    scores_msg = ", ".join([f"{k}={v:.2f}" for (k, v) in scores1.items()])
    test_loss = test_loss.cpu().detach().numpy()[0]
    train_loss = train_loss.cpu().detach().numpy()[0]
    print(f"epoch {epoch}: trainloss {train_loss:.2f}, testloss {test_loss:.2f} " + scores_msg)

    scores1["epoch"] = epoch
    scores1["trainloss"] = train_loss
    scores1["testloss"] = test_loss
    log.append(scores1)

    log_df = pd.DataFrame(log).set_index("epoch")
    log_df.to_csv("Upper_02.csv")
    

  0%|          | 0/13809 [00:00<?, ?it/s]


TypeError: default_collate: batch must contain tensors, numpy arrays, numbers, dicts or lists; found <class 'NoneType'>