## Experiment Class Definition
- Helper class that defines the functions common to all model training(time series)

In [7]:
import pandas as pd
from models.cnn import SimpleCNN
from sklearn.preprocessing import MinMaxScaler

---
## Data Load

In [3]:
solar_power = pd.read_csv('./data/solarpanelspower/PV_Elec_Gas2.csv').rename(columns={'Unnamed: 0':'date'}).set_index('date')

train_set = solar_power[:'2018-10-31']
valid_set = solar_power['2018-11-01':'2019-11-18']
test_set = solar_power['2019-11-18':]
print('Proportion of train_set : {:.2f}%'.format(len(train_set)/len(solar_power)))
print('Proportion of valid_set : {:.2f}%'.format(len(valid_set)/len(solar_power)))
# print('Proportion of test_set : {:.2f}%'.format(len(test_set)/len(solar_power)))

Proportion of train_set : 0.87%
Proportion of valid_set : 0.13%


In [25]:
%%writefile experiment.py
import numpy as np
import time
import torch
from torch import nn
from utils.timeserie import split_sequence
from torch.utils.data import DataLoader
#
from utils.ml import SimpleDataset

class Experiment():

    def __init__(self, config: dict):
        # Set experiment config
        expected_vars = ['model','input_len','feature_dim','frequency',
                         'device','scaler','verbose']
        for v in expected_vars:
            assert v in config.keys(), f'Key "{v}" is missing on params dict'
            vars(self)[v] = config[v]
        self.config = config
        #
        # Pre-configuration (to produce same result in inference/predict)
        #
        np.random.seed(7); torch.manual_seed(7)
        if torch.cuda.is_available():
            torch.cuda.manual_seed_all(7)
        #
        #
        #
        self.model = self.model.to(self.device)



    def split_chunks(self, linear_serie, expand_dim=True):
        x, y = split_sequence(linear_serie, self.input_len)
        x, y = torch.tensor(x, dtype=torch.float32), torch.tensor(y, dtype=torch.float32)
        if expand_dim:
            x, y = x.unsqueeze(-1), y.unsqueeze(-1)
        return x, y

    def set_dataset(self, linear_serie, train=False, validation=False):
        if self.scaler is not None:
            if train: # FIT Scaler
                if self.verbose: print('Scaler FIT')
                linear_serie = self.scaler.fit_transform(linear_serie.reshape(-1,1)).reshape(-1)
            if validation:
                linear_serie = self.scaler.transform(linear_serie.reshape(-1,1)).reshape(-1)

            
        x, y = self.split_chunks(linear_serie)
        x, y = x.to(self.device), y.to(self.device)
        data = SimpleDataset(x, y)
        # Save
        if train:
            self.train_dataset = data
        if validation:
            self.validation_dataset = data
        
        return data
    

    def train(self, train_conf):
        expected_vars = ['epochs','lr','batch_size']
        for v in expected_vars:
            assert v in train_conf.keys(), f'Key "{v}" is missing on params dict'
        #
        epochs = train_conf['epochs']
        verbose = train_conf['verbose']
        #
        optimizer = torch.optim.Adam(self.model.parameters(), lr=train_conf['lr'])
        loss_fn = nn.MSELoss()
        train_loader = DataLoader(self.train_dataset, batch_size=train_conf['batch_size'], shuffle=False)
        
        loss_history = []
        for epoch_i in range(epochs):
            timr = time.time()
            epoch_loss = .0
            for batch_x, batch_y in train_loader:
                optimizer.zero_grad() # current batch zero-out the loss
                pred_y = self.model(batch_x)
                loss = loss_fn(pred_y, batch_y)
                loss.backward()
                optimizer.step()
                epoch_loss += loss
            # end epoch
            epoch_loss = epoch_loss/len(train_loader)
            loss_history.append(epoch_loss.to('cpu').detach().numpy())
            timr = time.time() - timr
            if verbose: print(f'Epoch {epoch_i+1}/{epochs} [{timr:.3f}secs] -> Train loss: {epoch_loss:.5f}')
    
    def predict(self, linear_serie):
        if self.scaler is not None:
            linear_serie = self.scaler.transform(linear_serie.reshape(-1,1)).reshape(-1)

Overwriting experiment.py


---
## Experiment Test

In [27]:
exp = Experiment(
    {
        # Model
        'model': SimpleCNN(3, 64),
        'input_len':3,
        'feature_dim':1,
        # Data
        'frequency':'daily',
        'scaler':MinMaxScaler(),
        # Others
        'device':'cuda',
        'verbose':True,
    })

exp.set_dataset(linear_serie=train_set.Elec_kW.values, train=True)
exp.set_dataset(linear_serie=valid_set.Elec_kW.values, validation=True)

exp.train({
    'epochs':2,
    'lr':1e-5,
    'batch_size':2,
    'verbose':True,
})

In [8]:
MinMaxScaler

sklearn.preprocessing._data.MinMaxScaler

In [None]:
exp.validate()

In [None]:
train_losses = []
valid_losses = []
def Train():
    
    running_loss = .0
    
    model.train()
    
    for idx, (inputs,labels) in enumerate(train_loader):
        inputs = inputs.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        preds = model(inputs)
        loss = criterion(preds,labels)
        loss.backward()
        optimizer.step()
        running_loss += loss
        
    train_loss = running_loss/len(train_loader)
    train_losses.append(train_loss.to('cpu').detach().numpy())
    
    print(f'train_loss {train_loss}')
    
def Valid():
    running_loss = .0
    
    model.eval()
    
    with torch.no_grad():
        for idx, (inputs, labels) in enumerate(valid_loader):
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            preds = model(inputs.float())
            loss = criterion(preds,labels)
            running_loss += loss
            
        valid_loss = running_loss/len(valid_loader)
        valid_losses.append(valid_loss.to('cpu').detach().numpy())
        print(f'valid_loss {valid_loss}')

In [None]:
epochs = 200
for epoch in range(epochs):
    print('epochs {}/{}'.format(epoch+1,epochs))
    Train()
    Valid()
    gc.collect()

In [None]:
import matplotlib.pyplot as plt
plt.plot(train_losses,label='train_loss')
plt.plot(valid_losses,label='valid_loss')
plt.title('MSE Loss')
plt.ylim(0, 100)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0.)

In [None]:
target_x , target_y = split_sequence(train_set.Elec_kW.values,n_steps)
inputs = target_x.reshape(target_x.shape[0],target_x.shape[1],1)
inputs = torch.tensor(inputs, dtype=torch.float32).to('cpu')

model.eval()
model = model.to('cpu')
prediction = []
batch_size = 2
iterations =  int(inputs.shape[0]/2)

for i in range(iterations):
    preds = model(torch.tensor(inputs[batch_size*i:batch_size*(i+1)]))
    prediction.append(preds.detach().numpy())
fig, ax = plt.subplots(1, 2,figsize=(11,4))
ax[0].set_title('predicted one')
ax[0].plot(prediction)
ax[1].set_title('real one')
ax[1].plot(target_y)
plt.show()
