# Training Model-3

## Importing Data and Packages

In [5]:
from pathlib import Path
import plotly.graph_objects as go 
import numpy as np 
import pandas as pd 
import json

In [6]:
nab = Path.cwd()/'NAB'
data_path = nab/'data'
labels_filepath = nab/'labels/combined_labels.json'

training_filename = 'realAWSCloudwatch/rds_cpu_utilization_cc0c53.csv'

valid_filename = 'realAWSCloudwatch/rds_cpu_utilization_e47b3b.csv'

In [7]:
with open(labels_filepath, 'r') as f:
    anomalies_timestamps = json.load(f)

In [8]:
train = pd.read_csv(data_path/training_filename)
valid = pd.read_csv(data_path/valid_filename)

In [None]:
## Model-1 Autoregression statistical model 

In [3]:
class CPUDataset(Dataset):
    def __init__(self, data: pd.DataFrame, size: int):
        self.chunks = torch.FloatTensor(data['stand_value']).unfold(0, size, size)
        
    def __len__(self):
        return self.chunks.size(0)
    
    def __getitem__(self, i):
        x = self.chunks[i]
        return x

train_ds = CPUDataset(train, 64)
valid_ds = CPUDataset(valid, 64)

NameError: name 'Dataset' is not defined

In [10]:
class LSTMModel(nn.Module):
    def __init__(self, in_size, hidden_size, out_size, device):
        super().__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(in_size, hidden_size)
        self.linear = nn.Linear(hidden_size, out_size)
        self.device = device
        self.init_hidden()
        
    def forward(self, x):
        out, self.hidden_state = self.lstm(
            x.view(len(x), 1, -1), self.hidden_state
        )
        self.hidden_state = tuple(
            [h.detach() for h in self.hidden_state]
        )
        out = out.view(len(x), -1)
        out = self.linear(out)
        return out
    
    def init_hidden(self):
        self.hidden_state = (
            torch.zeros((1, 1, self.hidden_size)).to(self.device),
            torch.zeros((1, 1, self.hidden_size)).to(self.device))

In [None]:
def train_model(model: LSTMModel, dataloaders: dict, optimizer: opt.Optimizer, 
                scheduler, criterion, device: torch.device, epochs: int):
    losses_data = {'train': [], 'valid': []}
    model.to(device)
    for epoch in tqdm(range(epochs)):
        print(f'Epoch {epoch}/{epochs-1}')
        for phase in ['train', 'valid']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.
            running_total = 0.
            
        # Here changes start
            for idx, sequence in enumerate(dataloaders[phase]):
                value = sequence
                value = value.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    out = model(value.view(-1, 1))
                    loss = criterion(out.view(-1), value.view(-1))
        # Here changes end

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                        scheduler.step()

                running_loss += loss.item() * out.size(0)
                running_total += out.size(0)

            epoch_loss = running_loss / running_total
            print(f'{phase.capitalize()} Loss: {epoch_loss}')
            losses_data[phase].append(epoch_loss)
    return losses_data



In [None]:
total_epoch_count = 50
model = LSTMModel(1, 128, 1, device)
dataloaders = {
    'train': DataLoader(train_ds, batch_size=1),
    'valid': DataLoader(valid_ds, batch_size=1)
}
optim = opt.Adam(params=model.parameters(), lr=1e-3)
sched = opt.lr_scheduler.OneCycleLR(
  optim, max_lr=1e-3, steps_per_epoch=len(dataloaders['train']), epochs=total_epoch_count
)
criterion = nn.MSELoss()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
losses = train_model(model, dataloaders, optim, sched, criterion, device, epochs)

In [None]:
layout = dict(xaxis=dict(title='Epoch'), yaxis=dict(title='Loss'))
fig = go.Figure(layout=layout)
fig.add_trace(go.Scatter(y=losses['train'], mode='lines', name='Train Loss',))
fig.add_trace(go.Scatter(y=losses['valid'], mode='lines', name='Valid Loss'))

In [None]:
train_values = train['stand_value'].values.astype(np.float32).flatten()
valid_values = valid['stand_value'].values.astype(np.float32).flatten()

In [None]:
model.eval()
with torch.no_grad():
    res_train = model(torch.tensor(train_values).to(device))
res_train = res_train.cpu()
with torch.no_grad():
    res_valid = model(torch.tensor(valid_values).to(device))
res_valid = res_valid.cpu()

### Model-1 Graph-1

In [None]:
train1=pd.DataFrame(res_train.numpy())
layout = dict(xaxis=dict(title='Timestamp'), yaxis=dict(title='CPU Utilization')) 

fig = go.Figure(layout=layout) 

fig.add_trace(go.Scatter(x=train['timestamp'], y=train['stand_value'], 
                         mode='markers', name='Ground Truth',
                         marker=dict(color='blue')))

fig.add_trace(go.Scatter(x=train['timestamp'],
                         y=train1[0], 
                         mode='markers', name='Predicted Value',
                         marker=dict(color='orange')))

### Model-3 Graph-2

In [None]:
valid1=pd.DataFrame(res_valid.numpy())
layout = dict(xaxis=dict(title='Timestamp'), yaxis=dict(title='CPU Utilization')) 

fig = go.Figure(layout=layout) 

fig.add_trace(go.Scatter(x=valid['timestamp'], y=valid['stand_value'], 
                         mode='markers', name='Ground Truth',
                         marker=dict(color='blue')))

fig.add_trace(go.Scatter(x=valid['timestamp'],
                         y=valid1[0], 
                         mode='markers', name='Predicted Value',
                         marker=dict(color='orange')))