In [1]:
import sys
import time
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch import nn, optim
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.utils.data import DataLoader
from Pytorch_dataset import DimReduction_dataset
from Discharge_model import DimReduction_2
from early_stopping import EarlyStopping
from torch.utils.tensorboard import SummaryWriter

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
batch_size = 32

if torch.cuda.is_available():
    device = torch.device('cuda')
    print('cuda is available')

# load dataset
ct_train_dataset = DimReduction_dataset(train=True, pred_target='chargetime', norm=True)
ct_test_dataset = DimReduction_dataset(train=False, pred_target='chargetime', norm=True)

ct_train_loader = DataLoader(ct_train_dataset, batch_size=batch_size, shuffle=True)
ct_test_loader = DataLoader(ct_test_dataset, batch_size=batch_size, shuffle=False)

cuda is available


In [3]:
writer = SummaryWriter('runs/chargetime_0')
model_ct = DimReduction_2(in_ch=4, out_ch=1).to(device)
# draw model graph
example = iter(ct_train_loader)
d, t = next(example)
print(d.shape, t.shape)
writer.add_graph(model_ct, d.to(device))
writer.close()

torch.Size([32, 4, 500]) torch.Size([32])


In [3]:
def train_model(model, writer,
                num_epochs=500,
                lr=1e-3,
                weight_decay=1e-3,
                cosine_period=20,
                min_lr=1e-6,
                delta_huber=1,
                patience=20):

    optimizer = optim.AdamW(model.parameters(), lr=lr, amsgrad=True, weight_decay=weight_decay) # Adam with weight decay
    scheduler = CosineAnnealingLR(optimizer, T_max=cosine_period, eta_min=min_lr) # 依照cosine週期衰減
    criterion = nn.HuberLoss(delta=delta_huber) # combines advantages of both L1Loss and MSELoss

    start = time.time()

    train_losses = []
    valid_losses = []
    avg_train_losses = []
    avg_valid_losses = []
    n_total_steps = len(ct_train_loader)

    # initialize the early_stopping object
    early_stopping = EarlyStopping(patience=patience, verbose=True)

    for epoch in range(num_epochs):

        ##### Training loop #####
        model.train() # prep model for training
        for i, (inputs, targets) in enumerate(ct_train_loader):
            inputs = inputs.to(device)
            targets = targets.view(-1, 1).to(device)

            # forward
            outputs = model(inputs)
            loss = criterion(outputs, targets)

            # backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_losses.append(loss.item())
            
        # update lr
        scheduler.step()

        ##### Validation loop #####
        model.eval() # prep model for evaluation
        for inputs, targets in ct_test_loader:
            inputs = inputs.to(device)
            targets = targets.view(-1, 1).to(device)

            outputs = model(inputs)
            loss = criterion(outputs, targets)
            valid_losses.append(loss.item())
        
        # calculate average loss over an epoch
        train_loss = np.average(train_losses)
        valid_loss = np.average(valid_losses)
        avg_train_losses.append(train_loss)
        avg_valid_losses.append(valid_loss)

        print(f'[Epoch {epoch+1}/{num_epochs}] train_loss: {train_loss:.2f}, valid_loss: {valid_loss:.2f}')

        # tensorboard
        writer.add_scalar('train loss', train_loss, epoch * n_total_steps +i) # global step
        writer.add_scalar('valid loss', valid_loss, epoch * n_total_steps +i)

        # clear lists to track next epoch
        train_losses = []
        valid_losses = []

        # early_stopping needs the validation loss to check if it has decresed, 
        # and if it has, it will make a checkpoint of the current model
        early_stopping(valid_loss, model)
        if early_stopping.early_stop:
            print("Early stopping")
            break
        
    # load the last checkpoint with the best model
    model.load_state_dict(torch.load('Checkpoints/checkpoint.pt'))

    end = time.time()
    print(f'Training is end. Total trainig time: {(end-start)/60:.1f} minutes')

    return  model, avg_train_losses, avg_valid_losses

# New

In [5]:
# TensorBoard
writer = SummaryWriter('runs/ct_new_1')
# load model
model_ct = DimReduction_2(in_ch=4, out_ch=1).to(device)

model, train_loss, valid_loss = train_model(model_ct, writer, patience=50, lr=1e-2, cosine_period=10, min_lr=1e-5, weight_decay=5*1e-4, delta_huber=0.3)

file = 'Model/ct_model_new_1.pth'
torch.save(model, file)
print(file)

[Epoch 1/500] train_loss: 0.94, valid_loss: 0.59
Validation loss decreased (inf --> 0.589608).  Saving model ...
[Epoch 2/500] train_loss: 0.40, valid_loss: 0.84
EarlyStopping counter: 1 out of 50
[Epoch 3/500] train_loss: 0.39, valid_loss: 0.28
Validation loss decreased (0.589608 --> 0.284183).  Saving model ...
[Epoch 4/500] train_loss: 0.35, valid_loss: 0.45
EarlyStopping counter: 1 out of 50
[Epoch 5/500] train_loss: 0.33, valid_loss: 0.56
EarlyStopping counter: 2 out of 50
[Epoch 6/500] train_loss: 0.29, valid_loss: 0.25
Validation loss decreased (0.284183 --> 0.254720).  Saving model ...
[Epoch 7/500] train_loss: 0.27, valid_loss: 0.39
EarlyStopping counter: 1 out of 50
[Epoch 8/500] train_loss: 0.25, valid_loss: 0.35
EarlyStopping counter: 2 out of 50
[Epoch 9/500] train_loss: 0.23, valid_loss: 0.38
EarlyStopping counter: 3 out of 50
[Epoch 10/500] train_loss: 0.22, valid_loss: 0.32
EarlyStopping counter: 4 out of 50
[Epoch 11/500] train_loss: 0.21, valid_loss: 0.31
EarlyStoppin

## norm=True

In [4]:
# TensorBoard
writer = SummaryWriter('runs/ct_new_2')
# load model
model_ct = DimReduction_2(in_ch=4, out_ch=1).to(device)

model, train_loss, valid_loss = train_model(model_ct, writer, patience=50, lr=1e-2, cosine_period=10, min_lr=1e-5, weight_decay=5*1e-4, delta_huber=0.3)

file = 'Model/ct_model_new_2.pth'
torch.save(model, file)
print(file)

[Epoch 1/500] train_loss: 0.58, valid_loss: 0.19
Validation loss decreased (inf --> 0.192373).  Saving model ...
[Epoch 2/500] train_loss: 0.14, valid_loss: 0.25
EarlyStopping counter: 1 out of 50
[Epoch 3/500] train_loss: 0.12, valid_loss: 0.22
EarlyStopping counter: 2 out of 50
[Epoch 4/500] train_loss: 0.11, valid_loss: 0.20
EarlyStopping counter: 3 out of 50
[Epoch 5/500] train_loss: 0.11, valid_loss: 0.23
EarlyStopping counter: 4 out of 50
[Epoch 6/500] train_loss: 0.10, valid_loss: 0.20
EarlyStopping counter: 5 out of 50
[Epoch 7/500] train_loss: 0.09, valid_loss: 0.23
EarlyStopping counter: 6 out of 50
[Epoch 8/500] train_loss: 0.08, valid_loss: 0.14
Validation loss decreased (0.192373 --> 0.135923).  Saving model ...
[Epoch 9/500] train_loss: 0.08, valid_loss: 0.15
EarlyStopping counter: 1 out of 50
[Epoch 10/500] train_loss: 0.07, valid_loss: 0.11
Validation loss decreased (0.135923 --> 0.112225).  Saving model ...
[Epoch 11/500] train_loss: 0.07, valid_loss: 0.14
EarlyStoppin

In [5]:
# TensorBoard
writer = SummaryWriter('runs/ct_new_3')
# load model
model_ct = DimReduction_2(in_ch=4, out_ch=1).to(device)

model, train_loss, valid_loss = train_model(model_ct, writer, patience=50, lr=1e-2, cosine_period=10, min_lr=1e-5, weight_decay=1e-3, delta_huber=0.3)

file = 'Model/ct_model_new_3.pth'
torch.save(model, file)
print(file)

[Epoch 1/500] train_loss: 0.45, valid_loss: 0.25
Validation loss decreased (inf --> 0.252850).  Saving model ...
[Epoch 2/500] train_loss: 0.13, valid_loss: 0.14
Validation loss decreased (0.252850 --> 0.135213).  Saving model ...
[Epoch 3/500] train_loss: 0.11, valid_loss: 0.14
EarlyStopping counter: 1 out of 50
[Epoch 4/500] train_loss: 0.10, valid_loss: 0.12
Validation loss decreased (0.135213 --> 0.115064).  Saving model ...
[Epoch 5/500] train_loss: 0.10, valid_loss: 0.16
EarlyStopping counter: 1 out of 50
[Epoch 6/500] train_loss: 0.09, valid_loss: 0.09
Validation loss decreased (0.115064 --> 0.086479).  Saving model ...
[Epoch 7/500] train_loss: 0.08, valid_loss: 0.09
EarlyStopping counter: 1 out of 50
[Epoch 8/500] train_loss: 0.07, valid_loss: 0.10
EarlyStopping counter: 2 out of 50
[Epoch 9/500] train_loss: 0.06, valid_loss: 0.11
EarlyStopping counter: 3 out of 50
[Epoch 10/500] train_loss: 0.06, valid_loss: 0.10
EarlyStopping counter: 4 out of 50
[Epoch 11/500] train_loss: 0

# Old

In [8]:
# TensorBoard
writer = SummaryWriter('runs/ct_1')
# load model
model_ct = DimReduction_2(in_ch=4, out_ch=1).to(device)

model, train_loss, valid_loss = train_model(model_ct, writer, patience=50, lr=1e-2, cosine_period=10, min_lr=1e-5, weight_decay=5*1e-2, delta_huber=0.3)

[Epoch 1/500] train_loss: 0.91, valid_loss: 0.27
Validation loss decreased (inf --> 0.269869).  Saving model ...
[Epoch 2/500] train_loss: 0.44, valid_loss: 0.91
EarlyStopping counter: 1 out of 50
[Epoch 3/500] train_loss: 0.41, valid_loss: 0.26
Validation loss decreased (0.269869 --> 0.257181).  Saving model ...
[Epoch 4/500] train_loss: 0.41, valid_loss: 0.38
EarlyStopping counter: 1 out of 50
[Epoch 5/500] train_loss: 0.38, valid_loss: 0.36
EarlyStopping counter: 2 out of 50
[Epoch 6/500] train_loss: 0.34, valid_loss: 0.28
EarlyStopping counter: 3 out of 50
[Epoch 7/500] train_loss: 0.32, valid_loss: 0.62
EarlyStopping counter: 4 out of 50
[Epoch 8/500] train_loss: 0.31, valid_loss: 0.30
EarlyStopping counter: 5 out of 50
[Epoch 9/500] train_loss: 0.29, valid_loss: 0.42
EarlyStopping counter: 6 out of 50
[Epoch 10/500] train_loss: 0.26, valid_loss: 0.36
EarlyStopping counter: 7 out of 50
[Epoch 11/500] train_loss: 0.26, valid_loss: 0.34
EarlyStopping counter: 8 out of 50
[Epoch 12/5

In [9]:
file = 'Model/ct_model1.pth'
torch.save(model, file)
print(file)

Model/ct_model1.pth


In [10]:
# TensorBoard
writer = SummaryWriter('runs/ct_2')
# load model
model_ct = DimReduction_2(in_ch=4, out_ch=1).to(device)

model, train_loss, valid_loss = train_model(model_ct, writer, patience=50, lr=5*1e-3, cosine_period=10, min_lr=1e-5, weight_decay=5*1e-2, delta_huber=0.3)

[Epoch 1/500] train_loss: 0.76, valid_loss: 0.42
Validation loss decreased (inf --> 0.423404).  Saving model ...
[Epoch 2/500] train_loss: 0.40, valid_loss: 0.43
EarlyStopping counter: 1 out of 50
[Epoch 3/500] train_loss: 0.37, valid_loss: 0.31
Validation loss decreased (0.423404 --> 0.305915).  Saving model ...
[Epoch 4/500] train_loss: 0.34, valid_loss: 0.38
EarlyStopping counter: 1 out of 50
[Epoch 5/500] train_loss: 0.32, valid_loss: 0.34
EarlyStopping counter: 2 out of 50
[Epoch 6/500] train_loss: 0.29, valid_loss: 0.38
EarlyStopping counter: 3 out of 50
[Epoch 7/500] train_loss: 0.28, valid_loss: 0.45
EarlyStopping counter: 4 out of 50
[Epoch 8/500] train_loss: 0.25, valid_loss: 0.47
EarlyStopping counter: 5 out of 50
[Epoch 9/500] train_loss: 0.24, valid_loss: 0.28
Validation loss decreased (0.305915 --> 0.282719).  Saving model ...
[Epoch 10/500] train_loss: 0.22, valid_loss: 0.37
EarlyStopping counter: 1 out of 50
[Epoch 11/500] train_loss: 0.22, valid_loss: 0.42
EarlyStoppin

In [11]:
file = 'Model/ct_model2.pth'
torch.save(model, file)
print(file)

Model/ct_model2.pth


In [12]:
# TensorBoard
writer = SummaryWriter('runs/ct_3')
# load model
model_ct = DimReduction_2(in_ch=4, out_ch=1).to(device)

model, train_loss, valid_loss = train_model(model_ct, writer, patience=50, lr=1e-2, cosine_period=10, min_lr=1e-5, weight_decay=5*1e-2, delta_huber=0.3)

[Epoch 1/500] train_loss: 0.81, valid_loss: 0.24
Validation loss decreased (inf --> 0.242477).  Saving model ...
[Epoch 2/500] train_loss: 0.44, valid_loss: 0.32
EarlyStopping counter: 1 out of 50
[Epoch 3/500] train_loss: 0.40, valid_loss: 0.69
EarlyStopping counter: 2 out of 50
[Epoch 4/500] train_loss: 0.41, valid_loss: 0.30
EarlyStopping counter: 3 out of 50
[Epoch 5/500] train_loss: 0.38, valid_loss: 0.49
EarlyStopping counter: 4 out of 50
[Epoch 6/500] train_loss: 0.36, valid_loss: 0.75
EarlyStopping counter: 5 out of 50
[Epoch 7/500] train_loss: 0.32, valid_loss: 0.44
EarlyStopping counter: 6 out of 50
[Epoch 8/500] train_loss: 0.29, valid_loss: 0.57
EarlyStopping counter: 7 out of 50
[Epoch 9/500] train_loss: 0.27, valid_loss: 0.55
EarlyStopping counter: 8 out of 50
[Epoch 10/500] train_loss: 0.26, valid_loss: 0.46
EarlyStopping counter: 9 out of 50
[Epoch 11/500] train_loss: 0.25, valid_loss: 0.41
EarlyStopping counter: 10 out of 50
[Epoch 12/500] train_loss: 0.25, valid_loss:

In [13]:
file = 'Model/ct_model3.pth'
torch.save(model, file)
print(file)

Model/ct_model3.pth


In [14]:
# TensorBoard
writer = SummaryWriter('runs/ct_4')
# load model
model_ct = DimReduction_2(in_ch=4, out_ch=1).to(device)

model, train_loss, valid_loss = train_model(model_ct, writer, patience=50, lr=1e-2, cosine_period=10, min_lr=1e-5, weight_decay=1e-3, delta_huber=0.3)

[Epoch 1/500] train_loss: 0.89, valid_loss: 0.33
Validation loss decreased (inf --> 0.332301).  Saving model ...
[Epoch 2/500] train_loss: 0.44, valid_loss: 0.53
EarlyStopping counter: 1 out of 50
[Epoch 3/500] train_loss: 0.37, valid_loss: 0.27
Validation loss decreased (0.332301 --> 0.271904).  Saving model ...
[Epoch 4/500] train_loss: 0.35, valid_loss: 0.25
Validation loss decreased (0.271904 --> 0.253230).  Saving model ...
[Epoch 5/500] train_loss: 0.31, valid_loss: 0.46
EarlyStopping counter: 1 out of 50
[Epoch 6/500] train_loss: 0.29, valid_loss: 0.29
EarlyStopping counter: 2 out of 50
[Epoch 7/500] train_loss: 0.26, valid_loss: 0.18
Validation loss decreased (0.253230 --> 0.184455).  Saving model ...
[Epoch 8/500] train_loss: 0.25, valid_loss: 0.20
EarlyStopping counter: 1 out of 50
[Epoch 9/500] train_loss: 0.23, valid_loss: 0.33
EarlyStopping counter: 2 out of 50
[Epoch 10/500] train_loss: 0.22, valid_loss: 0.29
EarlyStopping counter: 3 out of 50
[Epoch 11/500] train_loss: 0

In [15]:
file = 'Model/ct_model4.pth'
torch.save(model, file)
print(file)

Model/ct_model4.pth


In [16]:
# TensorBoard
writer = SummaryWriter('runs/ct_5')
# load model
model_ct = DimReduction_2(in_ch=4, out_ch=1).to(device)

model, train_loss, valid_loss = train_model(model_ct, writer, patience=50, lr=1e-2, cosine_period=10, min_lr=1e-5, weight_decay=5*1e-4, delta_huber=0.3)

[Epoch 1/500] train_loss: 0.84, valid_loss: 0.26
Validation loss decreased (inf --> 0.261431).  Saving model ...
[Epoch 2/500] train_loss: 0.44, valid_loss: 0.23
Validation loss decreased (0.261431 --> 0.233614).  Saving model ...
[Epoch 3/500] train_loss: 0.40, valid_loss: 0.58
EarlyStopping counter: 1 out of 50
[Epoch 4/500] train_loss: 0.40, valid_loss: 0.42
EarlyStopping counter: 2 out of 50
[Epoch 5/500] train_loss: 0.35, valid_loss: 0.42
EarlyStopping counter: 3 out of 50
[Epoch 6/500] train_loss: 0.31, valid_loss: 0.34
EarlyStopping counter: 4 out of 50
[Epoch 7/500] train_loss: 0.30, valid_loss: 0.35
EarlyStopping counter: 5 out of 50
[Epoch 8/500] train_loss: 0.28, valid_loss: 0.27
EarlyStopping counter: 6 out of 50
[Epoch 9/500] train_loss: 0.27, valid_loss: 0.23
Validation loss decreased (0.233614 --> 0.225337).  Saving model ...
[Epoch 10/500] train_loss: 0.26, valid_loss: 0.26
EarlyStopping counter: 1 out of 50
[Epoch 11/500] train_loss: 0.26, valid_loss: 0.29
EarlyStoppin

In [17]:
file = 'Model/ct_model5.pth'
torch.save(model, file)
print(file)

Model/ct_model5.pth
