# Feature engineering
## Import libraries

In [1]:
%config Completer.use_jedi = False
import pandas as pd
import torch
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import numpy as np
from torch import nn
from torch import optim
from torch.nn import functional as F
from torch.optim.lr_scheduler import _LRScheduler
from torch.utils.data import TensorDataset, DataLoader
import matplotlib.pyplot as plt
from multiprocessing import cpu_count
from pathlib import Path
import tsfresh
from tsfresh.feature_extraction import EfficientFCParameters, MinimalFCParameters, ComprehensiveFCParameters
from tsfresh.utilities.dataframe_functions import roll_time_series
from tqdm import tqdm, tqdm_notebook
from sklearn.metrics import f1_score
pd.set_option('display.max_rows', 10000)
pd.set_option('display.max_columns', 500)
import collections

In [2]:
TRAIN_DATA_DIR = '../FIT5149_A2_data/train_data_withlabels.csv'
TEST_DATA_DIR = "../FIT5149_A2_data/train_data_withlabels.csv"
MODEL_DIR = "./bestf1.pth"

In [3]:
# https://numpy.org/doc/stable/reference/generated/numpy.fft.rfft.html
# Compute the one-dimensional discrete Fourier Transform for real input.
# This function computes the one-dimensional n-point discrete Fourier Transform (DFT)
# of a real-valued array by means of an efficient algorithm called the Fast Fourier Transform (FFT).

def absfft(x):
    return np.abs(np.fft.rfft(x))

# Helper function, used in building training and validating datasets
def build_datasets(data, target, train_size, valid_pct = 0.2, seed = None):
    x, x_fft = data # split the data in to raw and FFT data
    idx = np.arange(train_size) # Create a list of indexes
#     train_idx, val_idx = train_test_split(idx, test_size = valid_pct, random_state = seed)
    # Start spliting the data into training and validating parts using the validate percentage (valid_pct) value
    train_idx, val_idx = idx[round(train_size * valid_pct):], idx[:round(train_size * valid_pct)]
    # Build the train data, which include raw x, FFT x, and target y 
    train_ds = TensorDataset(torch.tensor(x[:train_size][train_idx]).float(),
                            torch.tensor(x_fft[:train_size][train_idx]).float(),
                            torch.tensor(target[:train_size][train_idx]).long())
    print("There are",len(set(target[:train_size][train_idx])),"class in training data")
    # Build the validating data, which include raw x, FFT x, and target y 
    val_ds = TensorDataset(torch.tensor(x[:train_size][val_idx]).float(),
                            torch.tensor(x_fft[:train_size][val_idx]).float(),
                            torch.tensor(target[:train_size][val_idx]).long())
    return train_ds, val_ds 

# Helper function, 
def build_loaders(data, batch_size = 128, jobs = 8):
    train_ds, valid_ds = data
    # Build a train dataloader
    train_dl = DataLoader(train_ds, batch_size = batch_size, shuffle = False, num_workers = jobs)
    # Build a test dataloader
    valid_dl = DataLoader(valid_ds, batch_size = batch_size, shuffle = False, num_workers = jobs)
    return train_dl, valid_dl 

# https://www.kaggle.com/purplejester/pytorch-deep-time-series-classification
class _SepConv1d(nn.Module):
    """A simple separable convolution implementation.
    
    The separable convlution is a method to reduce number of the parameters 
    in the deep learning network for slight decrease in predictions quality.
    """
    def __init__(self, ni, no, kernel, stride, pad):
        super().__init__()
        self.depthwise = nn.Conv1d(ni, ni, kernel, stride, padding=pad, groups=ni)
        self.pointwise = nn.Conv1d(ni, no, kernel_size=1)

    def forward(self, x):
        return self.pointwise(self.depthwise(x))
    
class SepConv1d(nn.Module):
    """Implementes a 1-d convolution with 'batteries included'.
    
    The module adds (optionally) activation function and dropout 
    layers right after a separable convolution layer.
    """
    def __init__(self, ni, no, kernel, stride, pad, 
                 drop=None, bn=False,
                 activ=lambda: nn.ReLU()):
    
        super().__init__()
        # Check the drop out rate
        assert drop is None or (0.0 < drop < 1.0)
        # Build a separable convolution layer, using channel_in, channel_out, kernel size, stride, and padding size 
        layers = [_SepConv1d(ni, no, kernel, stride, pad)]
        # Add an activation function
        if activ:
            layers.append(activ())
        # Apply batch normalization if required
        if bn:
            layers.append(nn.BatchNorm1d(no))
        # Apply drop out rate to prevent the model from overfitting
        if drop is not None:
            layers.append(nn.Dropout(drop))
        # chain all of the layers into one object
        self.layers = nn.Sequential(*layers)
    
    # Define forward function
    def forward(self, x): 
        return self.layers(x)


# Helper function, used to build flatten layers
class Flatten(nn.Module):
    """Converts N-dimensional tensor into 'flat' one."""

    def __init__(self, keep_batch_dim=True):
        super().__init__()
        self.keep_batch_dim = keep_batch_dim
    # Define forward function, which flatten the input into 1 dimension, or 2 dimension which is [batchsize, -1]
    def forward(self, x):
        if self.keep_batch_dim:
            return x.view(x.size(0), -1)
        return x.view(-1)
    
# Helper function to print the shape of a layer's output, this function is useful when building and debugging 
# models
class PrintSize(nn.Module):
    def __init__(self):
        super(PrintSize, self).__init__()
        
    def forward(self, x):
        print(x.shape)
        return x
    
# model building, which includes 2 branches, one brach for raw input with 30 time steps, one branch for 
# FFT input with 30/2 + 1 = 16 timesteps.
class Classifier(nn.Module):
    def __init__(self, raw_ni, fft_ni, no, drop=.5):
        super().__init__()
        #PKS [[4,8,2],[1,3,1],[3,8,2],[1,3,1],[5,8,2],[1,3,1],[2,8,2]]
        self.raw = nn.Sequential( 
            #         (in ,out ,kernel, stride, pad)
            SepConv1d(raw_ni,  32, 8, 2, 4, drop=drop),
            SepConv1d(    32,  64, 8, 2, 3, drop=drop),
            SepConv1d(    64, 128, 8, 2, 5, drop=drop),
            SepConv1d(   128, 256, 8, 2, 2, drop=drop),
            Flatten(),
#             PrintSize(),
            nn.Dropout(drop), nn.Linear(512, 256), nn.ReLU(),
            nn.Dropout(drop), nn.Linear(256, 64), nn.ReLU())
        #PKS [[3,8,2],[1,3,1],[5,8,2],[1,3,1],[4,8,2],[1,3,1],[3,8,2]]
        self.fft = nn.Sequential(
            SepConv1d(fft_ni,  32, 8, 2, 3, drop=drop),
            SepConv1d(    32,  64, 8, 2, 5, drop=drop),
            SepConv1d(    64, 128, 8, 2, 4, drop=drop),
            SepConv1d(   128, 128, 8, 2, 5, drop=drop),
            SepConv1d(   128, 256, 8, 2, 3, drop=drop),
            Flatten(),
#             PrintSize(),
            nn.Dropout(drop), nn.Linear(512, 256), nn.ReLU(),
            nn.Dropout(drop), nn.Linear(256, 64), nn.ReLU())
        
        self.out = nn.Sequential(
            nn.Linear(128, 64), nn.ReLU(inplace=True), nn.Linear(64, no))
        self.init_weights(nn.init.kaiming_normal_)
        

    def init_weights(self, init_fn):
        def init(m): 
            for child in m.children():
                if isinstance(child, nn.Conv1d):
                    # Fills the input Tensor with values according to the method described in Delving deep into 
                    # rectifiers: Surpassing human-level performance on 
                    # ImageNet classification - He, K. et al. (2015), using a normal distribution
                    init_fn(child.weights)
        init(self)
        
    # Define a forward function for the model
    def forward(self, t_raw, t_fft):
        # raw x branch
        raw_out = self.raw(t_raw)
        # fft x branch
        fft_out = self.fft(t_fft)
        # concat 2 branches into one
        t_in = torch.cat([raw_out, fft_out], dim=1)
        # push it through fully connected layers to get the output
        out = self.out(t_in)
        return out
    
# https://github.com/gokulprasadthekkel/pytorch-multi-class-focal-loss/blob/master/focal_loss.py
# Helper function, implementation of focal loss in order to solve class imbalance problem
class FocalLoss(nn.modules.loss._WeightedLoss):
    def __init__(self, weight=None, gamma=2,reduction='mean'):
        super(FocalLoss, self).__init__(weight,reduction=reduction)
        self.gamma = gamma
        self.weight = weight #weight parameter will act as the alpha parameter to balance class weights

    def forward(self, input, target):

        ce_loss = F.cross_entropy(input, target,reduction=self.reduction,weight=self.weight)
        pt = torch.exp(-ce_loss)
        focal_loss = ((1 - pt) ** self.gamma * ce_loss).mean()
        return focal_loss

In [18]:
# Load the testing data
x_test = pd.read_csv(TEST_DATA_DIR)
x_test.rename(columns={'Unnamed: 0': 'id'}, inplace= 1) # Rename 
x_test['id'] = x_test['id'] + 28 # Increase the ID column by 28 for later rolling
x_test

Unnamed: 0,id,load,ac,ev,oven,wash,dryer,hourofday,dayofweek,dif,absdif,max,var,entropy,nonlinear,hurst
0,105569,2.245,0,0,0,0,0,0,Sun,0.987,0.987,6.215,3.074549,0.678886,0.052903,0.994071
1,105570,2.259,0,0,0,0,0,0,Sun,0.014,0.014,6.215,3.172867,0.667450,0.054829,0.994154
2,105571,2.269,0,0,0,0,0,0,Sun,0.010,0.010,6.215,3.270112,0.647777,0.056991,0.994220
3,105572,2.268,0,0,0,0,0,0,Sun,-0.001,0.001,6.215,3.303763,0.629227,0.057606,0.994150
4,105573,2.270,0,0,0,0,0,0,Sun,0.002,0.002,6.215,3.302744,0.621295,0.082640,0.994041
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
417715,523284,2.543,0,0,0,0,0,21,Tue,-0.003,0.003,0.000,0.000000,0.000000,0.000000,0.000000
417716,523285,2.417,0,0,0,0,0,21,Tue,-0.126,0.126,0.000,0.000000,0.000000,0.000000,0.000000
417717,523286,0.999,0,0,0,0,0,21,Tue,-1.418,1.418,0.000,0.000000,0.000000,0.000000,0.000000
417718,523287,0.966,0,0,0,0,0,21,Tue,-0.033,0.033,0.000,0.000000,0.000000,0.000000,0.000000


In [19]:
# Build the Label encoder and Standard scaler from training data
data = pd.read_csv(TRAIN_DATA_DIR)
data.rename(columns={"Unnamed: 0":"id"}, inplace= True)
x_train = data.loc[:,data.columns.difference(['ac', 'ev', 'oven', 'wash', 'dryer'])]
y_train = data[['ac', 'ev', 'oven', 'wash', 'dryer']]

In [20]:
le = preprocessing.LabelEncoder() # Create a label encoder for variable "dayofweeek"
le.fit(x_train['dayofweek']) # Fit the values
x_train['dayofweek'] = le.transform(x_train['dayofweek']) # Perform transformation
x_test['dayofweek'] = le.transform(x_test['dayofweek'])

le.classes_ # have a look at the classes

array(['Fri', 'Mon', 'Sat', 'Sun', 'Thu', 'Tue', 'Wed'], dtype=object)

In [21]:
# Standardize features by removing the mean and scaling to unit variance
scale_list = ['absdif', 'dayofweek', 'dif', 'entropy', 'hourofday', 'hurst', 'load', 'max', 'nonlinear', 'var']
scaler = preprocessing.StandardScaler().fit(x_train[scale_list]) # Fit the data values
x_train[scale_list] = scaler.transform(x_train[scale_list]) # Perform scaling
x_test[scale_list] = scaler.transform(x_test[scale_list]) # Perform scaling


#### Get the y_train label encoder for later part of predictions decoding

In [22]:
# concatenate the values of the 5 appliances into a string, for example 0,1,0,0,1 will be 01001
y_train['transformed'] = y_train.apply(lambda x: ''.join(x.astype(str)),axis = 1)
# create a label encoder for y_train
le_y = preprocessing.LabelEncoder()
# fit and transform the values of the concatenated values 
y_train['encoded'] = le_y.fit_transform(y_train['transformed']) 


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  y_train['transformed'] = y_train.apply(lambda x: ''.join(x.astype(str)),axis = 1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  y_train['encoded'] = le_y.fit_transform(y_train['transformed'])


In [23]:
# Since the design choice is to use 30 minutes at each time-step, therefore each of the first 29 observations 
# will not have enough 29 observations before it in order to create a series of 30 observations.
# Thus I will add 29 copies of the first observation to the top of the testing dataframe.
extra_len = pd.DataFrame(x_test.iloc[0:1,:].values.repeat(29, axis = 0), columns=x_test.columns)
extra_len['id'] = list(range(105569-29,105569))

# concatenate the testing and the additional data
x_test = pd.concat([extra_len, x_test])
x_test['dummy_id'] = 1 # Create a dummy ID column for rolling
x_test.reset_index(drop= True, inplace=True) # Reset index
x_test = x_test[['id', 'load', 'absdif', 'dayofweek', 'dif', 'entropy', 'hourofday', 'hurst', 
                 'max', 'nonlinear', 'var','dummy_id']]

In [24]:
x_test.head(100)

Unnamed: 0,id,load,absdif,dayofweek,dif,entropy,hourofday,hurst,max,nonlinear,var,dummy_id
0,105540,0.031914,1.633999,-0.006818,1.859011,-0.306037,-1.659524,0.325913,1.050126,-0.542338,0.673127,1
1,105541,0.031914,1.633999,-0.006818,1.859011,-0.306037,-1.659524,0.325913,1.050126,-0.542338,0.673127,1
2,105542,0.031914,1.633999,-0.006818,1.859011,-0.306037,-1.659524,0.325913,1.050126,-0.542338,0.673127,1
3,105543,0.031914,1.633999,-0.006818,1.859011,-0.306037,-1.659524,0.325913,1.050126,-0.542338,0.673127,1
4,105544,0.031914,1.633999,-0.006818,1.859011,-0.306037,-1.659524,0.325913,1.050126,-0.542338,0.673127,1
5,105545,0.031914,1.633999,-0.006818,1.859011,-0.306037,-1.659524,0.325913,1.050126,-0.542338,0.673127,1
6,105546,0.031914,1.633999,-0.006818,1.859011,-0.306037,-1.659524,0.325913,1.050126,-0.542338,0.673127,1
7,105547,0.031914,1.633999,-0.006818,1.859011,-0.306037,-1.659524,0.325913,1.050126,-0.542338,0.673127,1
8,105548,0.031914,1.633999,-0.006818,1.859011,-0.306037,-1.659524,0.325913,1.050126,-0.542338,0.673127,1
9,105549,0.031914,1.633999,-0.006818,1.859011,-0.306037,-1.659524,0.325913,1.050126,-0.542338,0.673127,1


In [None]:
# roll_time_series method creates sub windows of the time series. It rolls the (sorted) data frames for each 
# kind and each id separately in the “time” domain (which is represented by the sort order of the sort column given
# by column_sort).
# For example when applying the roll_time_series to a data [a,b,c,d,e,f,g] with time_shift = 3, the result will be 
# [a,b,c, b,c,d, c,d,e, d,e,f, e,f,g]. The input of the CNN model will have a shape of [30,10] (30 timestep, 10
# variables) and its prediction will be the appliances status at time_step 30.
# Notice: run these 3 lines of code will take hours


x_test_rolled = roll_time_series(x_test, column_id="dummy_id", column_sort="id",
                            max_timeshift = 29, min_timeshift = 29, n_jobs = 16)

In [25]:
x_test_rolled.to_hdf('../x_test_rolled.h5','x_test_rolled')

Rolling: 100%|██████████| 80/80 [1:02:39<00:00, 47.00s/it] 


In [26]:
x_test_rolled.to_hdf('../x_test_rolled.h5','x_test_rolled')

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block0_values] [items->Index(['id'], dtype='object')]

  pytables.to_hdf(


In [27]:
# check if there're any shape differences between rolled data and original data
print(len(x_test_rolled)/30)
print(len(x_test)-29)

417720.0
417720


In [28]:
# Drop the ID and dummy_id columns
x_test_rolled.drop(['id','dummy_id'], axis=1, inplace=True)
# reshape the rolled x_train to a new shape of [105540,30,10]
x_test_rolled = np.reshape(x_test_rolled.to_numpy(dtype=np.float64), [-1 , 30, int(x_test_rolled.shape[1])])
x_test_rolled.shape

(417720, 30, 10)

In [None]:
# https://numpy.org/doc/stable/reference/generated/numpy.fft.rfft.html
# Compute the one-dimensional discrete Fourier Transform for real input.
# This function computes the one-dimensional n-point discrete Fourier Transform (DFT)
# of a real-valued array by means of an efficient algorithm called the Fast Fourier Transform (FFT).
x_test_fft = np.copy(x_test_rolled)
x_test_fft = np.apply_along_axis(absfft, 1, x_test_fft)
x_test_fft

In [30]:
print(x_test_rolled.shape)
print(x_test_fft.shape)

(417720, 30, 10)
(417720, 16, 10)


In [31]:
# Transpose the shape x_test and x_test_fft to [observations, features, time_steps]
x_test = x_test_rolled.transpose(0,2,1)
x_test_fft = x_test_fft.transpose(0,2,1)

In [32]:
print(x_test.shape)
print(x_test_fft.shape)

(417720, 10, 30)
(417720, 10, 16)


In [33]:
device_test = torch.device('cpu')

In [34]:
# Get the testing dataset
test_ds = TensorDataset(torch.tensor(x_test).float(), torch.tensor(x_test_fft).float())
# Get the testing dataloader
test_dl = DataLoader(test_ds, batch_size=1, shuffle=False)

test_preds = [] # to store prediction

In [35]:
# Build a new classifier model
model = Classifier(10, 10, 20)
# load the state dict of the previously trained models
model.load_state_dict(torch.load("./bestf1.pth"))

<All keys matched successfully>

In [36]:
# Change the model mode to evaluate and send to the device
model.eval()
model.to(device_test)

Classifier(
  (raw): Sequential(
    (0): SepConv1d(
      (layers): Sequential(
        (0): _SepConv1d(
          (depthwise): Conv1d(10, 10, kernel_size=(8,), stride=(2,), padding=(4,), groups=10)
          (pointwise): Conv1d(10, 32, kernel_size=(1,), stride=(1,))
        )
        (1): ReLU()
        (2): Dropout(p=0.5, inplace=False)
      )
    )
    (1): SepConv1d(
      (layers): Sequential(
        (0): _SepConv1d(
          (depthwise): Conv1d(32, 32, kernel_size=(8,), stride=(2,), padding=(3,), groups=32)
          (pointwise): Conv1d(32, 64, kernel_size=(1,), stride=(1,))
        )
        (1): ReLU()
        (2): Dropout(p=0.5, inplace=False)
      )
    )
    (2): SepConv1d(
      (layers): Sequential(
        (0): _SepConv1d(
          (depthwise): Conv1d(64, 64, kernel_size=(8,), stride=(2,), padding=(5,), groups=64)
          (pointwise): Conv1d(64, 128, kernel_size=(1,), stride=(1,))
        )
        (1): ReLU()
        (2): Dropout(p=0.5, inplace=False)
      )
   

In [37]:
# Perform predicting on testing date
for test_batch in tqdm(test_dl): # Iterate through every batch of testing dataloader
    x_raw, x_fft = [t.to(device_test) for t in test_batch] # Send the data to device
    out = model(x_raw, x_fft) # Get the output
    preds = F.log_softmax(out, dim=1).argmax(dim=1) # Get the final prediction
    # decode its value for submisson compatability
    decoded_preds = le_y.inverse_transform([preds.detach().cpu().clone().numpy()]) 
    # Save the prediction
    test_preds.append([i for i in decoded_preds[0]])

  return f(**kwargs)
100%|██████████| 417720/417720 [14:54<00:00, 466.76it/s]


In [38]:
# Convert the prediction list to dataframe
predictions = pd.DataFrame(np.array(test_preds)).reset_index()
predictions.columns = ['id', 'ac', 'ev', 'oven', 'wash', 'dryer'] # Add column names
predictions['id'] = predictions['id'] + 1 # For submission compatability
predictions.set_index('id',inplace=True) # set ID column as index
predictions.head() #have a look to check its format

Unnamed: 0_level_0,ac,ev,oven,wash,dryer
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,1,0,0,0,0
2,0,0,0,0,0
3,0,0,0,0,0
4,0,0,0,0,0
5,0,0,0,0,0


In [39]:
# Check how many classes are predicted
predictions[['ac', 'ev', 'oven', 'wash', 'dryer']].drop_duplicates()

Unnamed: 0_level_0,ac,ev,oven,wash,dryer
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,1,0,0,0,0
2,0,0,0,0,0


In [40]:
# Export to .csv file for submission
predictions.to_csv('../CNN_predictions.csv')

In [None]:
# Read data
data = pd.read_csv(TEST_DATA_DIR)
data.rename(columns={"Unnamed: 0":"id"}, inplace= True)
data.head(100)

In [None]:
# Have a look at the datatypes, seeing that the variable "dayofweek" has data type "object" which is unfarvorable
# when training deep learning models, will need to encode and change it to float datatype
data.dtypes

In [None]:
# Split the data into x_train and y_train
x_train = data.loc[:,data.columns.difference(['ac', 'ev', 'oven', 'wash', 'dryer'])]
y_train = data[['ac', 'ev', 'oven', 'wash', 'dryer']]

In [None]:
le = preprocessing.LabelEncoder() # Create a label encoder for variable "dayofweeek"
le.fit(x_train['dayofweek']) # Fit the values
x_train['dayofweek'] = le.transform(x_train['dayofweek']) # Perform transformation
le.classes_ # have a look at the classes

In [None]:
# Create a dummy ID columns to make it compatible with tsfresh library
x_train['dummy_id'] = 1 
x_train.columns

In [None]:
# Standardize features by removing the mean and scaling to unit variance
scale_list = ['absdif', 'dayofweek', 'dif', 'entropy', 'hourofday', 'hurst',
       'load', 'max', 'nonlinear', 'var']
scaler = preprocessing.StandardScaler().fit(x_train[scale_list]) # Fit the data values
x_train[scale_list] = scaler.transform(x_train[scale_list]) # Perform transforming
x_train.head(5)

In [None]:
# Since the design choice is to use 30 minutes at each time-step, therefore each of the first 29 observations 
# will not have enough 29 observations before it in order to make a series of 30 observations.
# Thus I will add 29 copies of the first observation to the top of the dataframe.
extra_len = pd.DataFrame(x_train.iloc[0:1,:].values.repeat(29, axis = 0), columns=x_train.columns)

extra_len['id'] = list(range(105541-29,105541)) # Adjust the indexes
x_train = pd.concat([extra_len, x_train]) # Concatenate the x_train and the extra data
x_train.head(5)

In [None]:
# concatenate the values of the 5 appliances into a string, for example 0,1,0,0,1 will be 01001
y_train['transformed'] = y_train.apply(lambda x: ''.join(x.astype(str)),axis = 1)
# create a label encoder for y_train
le_y = preprocessing.LabelEncoder()
# fit and transform the values of the concatenated values 
y_train['encoded'] = le_y.fit_transform(y_train['transformed'])
# Extract the encoded values to use them as training target 
y_train_transformed = y_train['encoded']
y_train_transformed = np.array(y_train_transformed)
y_train_transformed

### Rolling

In [None]:
# roll_time_series method creates sub windows of the time series. It rolls the (sorted) data frames for each 
# kind and each id separately in the “time” domain (which is represented by the sort order of the sort column given
# by column_sort).
# For example when applying the roll_time_series to a data [a,b,c,d,e,f,g] with time_shift = 3, the result will be 
# [a,b,c, b,c,d, c,d,e, d,e,f, e,f,g]. The input of the CNN model will have a shape of [30,10] (30 timestep, 10
# variables) and its prediction will be the appliances status at time_step 30.
# Notice: run these 3 lines of code will take hours

x_train = x_train[['id', 'load', 'absdif', 'dayofweek', 'dif', 'entropy', 'hourofday', 'hurst', 
        'max', 'nonlinear', 'var']]
x_train_rolled = roll_time_series(x_train, column_id="dummy_id", column_sort="id",
                            max_timeshift = 29, min_timeshift = 29)
x_train_rolled.to_hdf('../x_train_rolled.h5','x_train_rolled')


### Load rolled data

In [None]:
x_train_rolled = pd.read_hdf("../x_train_rolled.h5","x_train_rolled")
x_train_rolled.head(5)

In [None]:
# The training data now has 12531600 observation, which is 30 times bigger than the original dataset, this is 
# because of the rolling method mentioned above.
print(len(x_train_rolled)/30) 
print(len(x_train)-29)
del x_train
# Let's have a look to see if the amount of data is the same as the original one

In [None]:
# Let's have a look at the data after being encoded, we only use the last columns as y_train.
y_train.drop_duplicates()

In [None]:
# Next, let's go back to the rolled x_train data
x_train_rolled.tail(60)
# It can be seen that for each 30 observations, they all have the same ID, which mean an observation K and
# K-1, K-2, ... K-29 observation will be used as input for the model to predict the y value of observation K.
# There are 12531600 rows in this dataset, which is 30 times bigger than the original one because that each 
# row and its 29 previous rows will be used with 10 columns to create

In [None]:
x_train_rolled.drop('id', axis=1, inplace=True) # drop the ID column
# reshape the rolled x_train to a new shape of [417720,30,10]
# 417720 is the number of observation, 30 is the number of timestep of each input, 10 is number of features
x_train_rolled = np.reshape(x_train_rolled.to_numpy(dtype=np.float64), [-1 , 30, int(x_train_rolled.shape[1])])
x_train_rolled.shape #(417720, 30, 10)

### Extract the Fast Fourier Transform features

In [None]:
# https://numpy.org/doc/stable/reference/generated/numpy.fft.rfft.html
# Compute the one-dimensional discrete Fourier Transform for real input.
# This function computes the one-dimensional n-point discrete Fourier Transform (DFT)
# of a real-valued array by means of an efficient algorithm called the Fast Fourier Transform (FFT).

def absfft(x):
    return np.abs(np.fft.rfft(x))
x_train_fft = np.copy(x_train_rolled)
x_train_fft = np.apply_along_axis(absfft, 1, x_train_fft)
x_train_fft

There are 16 (n/2+1) Fast Fourier Transform values computed from the 30 (n) rows, We will use these FFT as 
another input for our CNN model.

In [None]:
print(x_train_rolled.shape)
print(x_train_fft.shape)

In [None]:
# Transpose the shape x_train_rolled and x_train_fft to [observations, features, time_steps]
x_train = x_train_rolled.transpose(0,2,1)
x_train_fft = x_train_fft.transpose(0,2,1)

In [None]:
print(x_train.shape)
print(x_train_fft.shape)

In [None]:
# Have a look at the distribution of target values, the data is heavily imbalanced
plt.hist(y_train_transformed)

In [None]:
# Helper function, used in building training and validating datasets
def build_datasets(data, target, train_size, valid_pct = 0.2, seed = None):
    x, x_fft = data # split the data in to raw and FFT data
    idx = np.arange(train_size) # Create a list of indexes
#     train_idx, val_idx = train_test_split(idx, test_size = valid_pct, random_state = seed)
    # Start spliting the data into training and validating parts using the validate percentage (valid_pct) value
    train_idx, val_idx = idx[round(train_size * valid_pct):], idx[:round(train_size * valid_pct)]
    # Build the train data, which include raw x, FFT x, and target y 
    train_ds = TensorDataset(torch.tensor(x[:train_size][train_idx]).float(),
                            torch.tensor(x_fft[:train_size][train_idx]).float(),
                            torch.tensor(target[:train_size][train_idx]).long())
    print("There are",len(set(target[:train_size][train_idx])),"class in training data")
    # Build the validating data, which include raw x, FFT x, and target y 
    val_ds = TensorDataset(torch.tensor(x[:train_size][val_idx]).float(),
                            torch.tensor(x_fft[:train_size][val_idx]).float(),
                            torch.tensor(target[:train_size][val_idx]).long())
    return train_ds, val_ds 

In [None]:
# Helper function, 
def build_loaders(data, batch_size = 128, jobs = 8):
    train_ds, valid_ds = data
    # Build a train dataloader
    train_dl = DataLoader(train_ds, batch_size = batch_size, shuffle = False, num_workers = jobs)
    # Build a test dataloader
    valid_dl = DataLoader(valid_ds, batch_size = batch_size, shuffle = False, num_workers = jobs)
    return train_dl, valid_dl 

In [None]:
# https://www.kaggle.com/purplejester/pytorch-deep-time-series-classification
class _SepConv1d(nn.Module):
    """A simple separable convolution implementation.
    
    The separable convlution is a method to reduce number of the parameters 
    in the deep learning network for slight decrease in predictions quality.
    """
    def __init__(self, ni, no, kernel, stride, pad):
        super().__init__()
        self.depthwise = nn.Conv1d(ni, ni, kernel, stride, padding=pad, groups=ni)
        self.pointwise = nn.Conv1d(ni, no, kernel_size=1)

    def forward(self, x):
        return self.pointwise(self.depthwise(x))

https://www.kaggle.com/purplejester/pytorch-deep-time-series-classification  

To design a classifier, we'll first develop a few of helper classes. There are no specialised SeparableConv layers in the torch framework. However, we can simply reproduce them using the following class. (This was taken from a topic on the PyTorch forum.)

In [None]:
class SepConv1d(nn.Module):
    """Implementes a 1-d convolution with 'batteries included'.
    
    The module adds (optionally) activation function and dropout 
    layers right after a separable convolution layer.
    """
    def __init__(self, ni, no, kernel, stride, pad, 
                 drop=None, bn=False,
                 activ=lambda: nn.ReLU()):
    
        super().__init__()
        # Check the drop out rate
        assert drop is None or (0.0 < drop < 1.0)
        # Build a separable convolution layer, using channel_in, channel_out, kernel size, stride, and padding size 
        layers = [_SepConv1d(ni, no, kernel, stride, pad)]
        # Add an activation function
        if activ:
            layers.append(activ())
        # Apply batch normalization if required
        if bn:
            layers.append(nn.BatchNorm1d(no))
        # Apply drop out rate to prevent the model from overfitting
        if drop is not None:
            layers.append(nn.Dropout(drop))
        # chain all of the layers into one object
        self.layers = nn.Sequential(*layers)
    
    # Define forward function
    def forward(self, x): 
        return self.layers(x)

In [None]:
# Helper function, used to build flatten layers
class Flatten(nn.Module):
    """Converts N-dimensional tensor into 'flat' one."""

    def __init__(self, keep_batch_dim=True):
        super().__init__()
        self.keep_batch_dim = keep_batch_dim
    # Define forward function, which flatten the input into 1 dimension, or 2 dimension which is [batchsize, -1]
    def forward(self, x):
        if self.keep_batch_dim:
            return x.view(x.size(0), -1)
        return x.view(-1)

In [None]:
# Helper function to print the shape of a layer's output, this function is useful when building and debugging 
# models
class PrintSize(nn.Module):
    def __init__(self):
        super(PrintSize, self).__init__()
        
    def forward(self, x):
        print(x.shape)
        return x

In [None]:
# model building, which includes 2 branches, one brach for raw input with 30 time steps, one branch for 
# FFT input with 30/2 + 1 = 16 timesteps.
class Classifier(nn.Module):
    def __init__(self, raw_ni, fft_ni, no, drop=.5):
        super().__init__()
        #PKS [[4,8,2],[1,3,1],[3,8,2],[1,3,1],[5,8,2],[1,3,1],[2,8,2]]
        self.raw = nn.Sequential( 
            #         (in ,out ,kernel, stride, pad)
            SepConv1d(raw_ni,  32, 8, 2, 4, drop=drop),
            SepConv1d(    32,  64, 8, 2, 3, drop=drop),
            SepConv1d(    64, 128, 8, 2, 5, drop=drop),
            SepConv1d(   128, 256, 8, 2, 2, drop=drop),
            Flatten(),
#             PrintSize(),
            nn.Dropout(drop), nn.Linear(512, 256), nn.ReLU(),
            nn.Dropout(drop), nn.Linear(256, 64), nn.ReLU())
        #PKS [[3,8,2],[1,3,1],[5,8,2],[1,3,1],[4,8,2],[1,3,1],[3,8,2]]
        self.fft = nn.Sequential(
            SepConv1d(fft_ni,  32, 8, 2, 3, drop=drop),
            SepConv1d(    32,  64, 8, 2, 5, drop=drop),
            SepConv1d(    64, 128, 8, 2, 4, drop=drop),
            SepConv1d(   128, 128, 8, 2, 5, drop=drop),
            SepConv1d(   128, 256, 8, 2, 3, drop=drop),
            Flatten(),
#             PrintSize(),
            nn.Dropout(drop), nn.Linear(512, 256), nn.ReLU(),
            nn.Dropout(drop), nn.Linear(256, 64), nn.ReLU())
        
        self.out = nn.Sequential(
            nn.Linear(128, 64), nn.ReLU(inplace=True), nn.Linear(64, no))
        self.init_weights(nn.init.kaiming_normal_)
        

    def init_weights(self, init_fn):
        def init(m): 
            for child in m.children():
                if isinstance(child, nn.Conv1d):
                    # Fills the input Tensor with values according to the method described in Delving deep into 
                    # rectifiers: Surpassing human-level performance on 
                    # ImageNet classification - He, K. et al. (2015), using a normal distribution
                    init_fn(child.weights)
        init(self)
        
    # Define a forward function for the model
    def forward(self, t_raw, t_fft):
        # raw x branch
        raw_out = self.raw(t_raw)
        # fft x branch
        fft_out = self.fft(t_fft)
        # concat 2 branches into one
        t_in = torch.cat([raw_out, fft_out], dim=1)
        # push it through fully connected layers to get the output
        out = self.out(t_in)
        return out

In [None]:
# class Classifier(nn.Module):
#     def __init__(self, raw_ni, fft_ni, no, drop=.5):
#         super().__init__()
#         #PKS [[4,8,2],[3,8,2],[5,8,2],[2,8,2]]
#         self.raw = nn.Sequential( #kernel, stride, pad
#             SepConv1d(raw_ni,  32, 8, 2, 4, drop=drop),
#             SepConv1d(    32,  64, 8, 2, 3, drop=drop),
#             SepConv1d(    64, 128, 8, 2, 5, drop=drop),
#             SepConv1d(   128, 256, 8, 2, 2, drop=drop),
#             Flatten(),
# #             PrintSize(),
#             nn.Dropout(drop), nn.Linear(512, 256), nn.ReLU(inplace=True),
#             nn.Dropout(drop), nn.Linear( 256, 64), nn.ReLU(inplace=True))
#         #PKS [3,8,2],[5,8,2],[4,8,2],[5,8,2],[3,8,2]
#         self.fft = nn.Sequential(
#             SepConv1d(fft_ni,  32, 8, 2, 3, drop=drop),
#             SepConv1d(    32,  64, 8, 2, 5, drop=drop),
#             SepConv1d(    64, 128, 8, 2, 4, drop=drop),
#             SepConv1d(   128, 128, 8, 2, 5, drop=drop),
#             SepConv1d(   128, 256, 8, 2, 3, drop=drop),
#             Flatten(),
# #             PrintSize(),
#             nn.Dropout(drop), nn.Linear(512, 256), nn.ReLU(inplace=True),
#             nn.Dropout(drop), nn.Linear( 256, 64), nn.ReLU(inplace=True))
        
#         self.out = nn.Sequential(
#             nn.Linear(128, 64), nn.ReLU(inplace=True), nn.Linear(64, no))
        
#     def forward(self, t_raw, t_fft):
#         raw_out = self.raw(t_raw)
#         fft_out = self.fft(t_fft)
#         t_in = torch.cat([raw_out, fft_out], dim=1)
#         out = self.out(t_in)
#         return out

In [None]:
# https://github.com/gokulprasadthekkel/pytorch-multi-class-focal-loss/blob/master/focal_loss.py
# Helper function, implementation of focal loss in order to solve class imbalance problem
class FocalLoss(nn.modules.loss._WeightedLoss):
    def __init__(self, weight=None, gamma=2,reduction='mean'):
        super(FocalLoss, self).__init__(weight,reduction=reduction)
        self.gamma = gamma
        self.weight = weight #weight parameter will act as the alpha parameter to balance class weights

    def forward(self, input, target):

        ce_loss = F.cross_entropy(input, target,reduction=self.reduction,weight=self.weight)
        pt = torch.exp(-ce_loss)
        focal_loss = ((1 - pt) ** self.gamma * ce_loss).mean()
        return focal_loss

In [None]:
seed = 1
# Since we use the provided data for only training and validating, thus no testing data needed. Thus we set the 
# train size to the full length of x_train.
train_size = len(x_train)
# build data set with validation percentage = 10%
datasets = build_datasets((x_train, x_train_fft), y_train_transformed,
                          train_size = train_size, valid_pct = 0.1, seed=seed)

In [None]:
# Calculate the proportion of different classes in the data
cls_count = dict(collections.Counter(y_train_transformed)) # count the number of appearance per class
cls_weights = np.array([i[1] for i in sorted(cls_count.items())]) # sort the classes by descending order of class 
                                                                  # appearance count
cls_weights = torch.Tensor([cls_weights/np.sum(cls_weights)]) # calculate the proportions.

In [None]:
# Set device for training, this model is trained on a RTX 3080, thus the device will be "cuda"
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
# device = torch.device('cpu')

In [None]:
# Get the input number of features of the x_raw branch of the model, which is 30
raw_feat = x_train.shape[1]
# Get the input number of features of the x_fft branch of the model, which is 16
fft_feat = x_train_fft.shape[1]
# Get the train and validate dataloaders
trn_dl, val_dl = build_loaders(datasets, batch_size=512)

lr = 0.0002 # Set learning rate
n_epochs = 400 # Set maximum epoch
iterations_per_epoch = len(trn_dl) # Iteration per epoch
num_classes = 20 # Number of classes
best_acc = 0 # Set best accuracy for later early stopping purpose
best_f1 = 0 # Set best F1_score for later early stopping purpose
patience, trials = 50, 0 # Set patience and a counter for later early stopping purpose
base = 1 # use to efficiently print the loss and accuracy while training 
step = 2 # use to efficiently print the loss and accuracy while training, only print out the accuracy at
         # step 2,4,8,16,32,...
loss_history = [] # For loss storing
acc_history = [] # For accuracy storing
gstep = -1 # Global step - used to initiate an One Cycle Learning Rate Scheduler. gstep = -1 means entirely new model
model = Classifier(raw_feat, fft_feat, num_classes,drop = .5).to(device) # Create the CNN model 
# criterion = nn.CrossEntropyLoss(reduction='sum')
cls_weights = cls_weights.to(device) # Send the class weight to cuda device
criterion = nn.CrossEntropyLoss(reduction='sum') # Create loss function
opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay= 0.0001) # Initiate Adam optimizer
# Create a one cycle learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.OneCycleLR(opt, 0.004, steps_per_epoch = iterations_per_epoch,
                                                   epochs = n_epochs, pct_start = 0.4,base_momentum = 0.85, 
                                                   max_momentum=0.95, div_factor = 10.0, last_epoch = gstep)
print('Start model training')

for epoch in range(1, n_epochs + 1):
    # Set model to traning mode
    model.train()
    epoch_loss = 0
    # Iterate through every batch of the train data loader
    for i, batch in enumerate(trn_dl):
        # Send the input to CUDA device
        x_raw, x_fft, y_batch = [t.to(device) for t in batch]
        opt.zero_grad()
        out = model(x_raw, x_fft) # Get the output from the model
        loss = criterion(out, y_batch) # Calculate losses
        epoch_loss += loss.item() # Record the loss
        loss.backward() # Back-propagation
        opt.step() # Call step() method to update the parameters
        lr_scheduler.step() # Call step() method to update the parameters
    # average the epoch's loss
    epoch_loss /= train_size
    # Record the loss history
    loss_history.append(epoch_loss)
    # Start validating
    model.eval()
    correct, total, epoch_f1 = 0, 0, []
    for batch in val_dl: # Iterate through every batch of the validating data
        # Send the data to CUDA device
        x_raw, x_fft, y_batch = [t.to(device) for t in batch]
        out = model(x_raw, x_fft) # Get the output from the model
        preds = F.log_softmax(out, dim=1).argmax(dim=1) # Generate final prediction
        # Calculate F1_score of the batch
        batch_f1 = f1_score(y_batch.detach().cpu().clone().numpy(), preds.detach().cpu().clone().numpy(), 
                           average="macro")
        epoch_f1.append(batch_f1) # Record the error
        total += y_batch.size(0) 
        correct += (preds == y_batch).sum().item() # calculate the accuracy
    # average the F1_score of all batches to get the validation F1_score
    cur_f1 = np.mean(epoch_f1)
    epoch_f1 = []
    # Get the validating accuracy   
    acc = correct / total
    acc_history.append(acc)

    if epoch % base == 0:
        print(f'Epoch: {epoch:3d}. Loss: {epoch_loss:.4f}. Acc.: {acc:2.2%}')
        base *= step
    # Keep track of the improvement of the model in term of accuracy for early stopping
    if acc > best_acc:
        trials = 0
        best_acc = acc
        # save the model with best accuracy
        torch.save(model.state_dict(), 'bestaccu.pth')
        print(f'Epoch {epoch} best model saved with accuracy: {best_acc:2.2%}')
    # Keep track of the improvement of the model in term of F1_score for early stopping
    if cur_f1 > best_f1:
        trials = 0
        best_f1 = cur_f1
        # save the model with best F1_score
        torch.save(model.state_dict(), 'bestf1.pth')
        print(f'Epoch {epoch} best F1 saved with F1: {best_f1:2.2%}')
    else:
        trials += 1
        if trials >= patience:
            print(f'Early stopping on epoch {epoch}')
            break
            
print('Done!')

In [None]:
torch.save(model.state_dict(), 'latest.pth')

# Inference

In [None]:
# Delete redundant variables (optional)
del x_train,x_train_fft,x_train_rolled

In [None]:
# Build a new classifier model
model = Classifier(10, 10, 20)
# load the state dict of the previously trained models
model.load_state_dict(torch.load("./bestf1.pth"))

In [None]:
# change its mode to evaluating
model.eval()

In [None]:
# Load the testing data
x_test = pd.read_csv("../FIT5149_A2_data/test_data_nolabels.csv")
x_test.rename(columns={'Unnamed: 0': 'id'}, inplace= 1) # Rename 
x_test['id'] = x_test['id'] + 28 # Increase the ID column by 28 for later rolling
x_test

In [None]:
x_test.head(10000)

In [None]:
# Set the list of variables that will be scaled
scale_list = ['absdif', 'dayofweek', 'dif', 'entropy', 'hourofday', 'hurst', 'load', 'max', 'nonlinear', 'var']
# Use the label encoder of training data to encode the testing one
x_test['dayofweek'] = le.transform(x_test['dayofweek'])
x_test[scale_list] = scaler.transform(x_test[scale_list]) # Perform scaling
x_test

In [None]:
# Since the design choice is to use 30 minutes at each time-step, therefore each of the first 29 observations 
# will not have enough 29 observations before it in order to create a series of 30 observations.
# Thus I will add 29 copies of the first observation to the top of the testing dataframe.
extra_len = pd.DataFrame(x_test.iloc[0:1,:].values.repeat(29, axis = 0), columns=x_test.columns)
extra_len['id'] = list(range(0,29))

# concatenate the testing and the additional data
x_test = pd.concat([extra_len, x_test])
x_test['dummy_id'] = 1 # Create a dummy ID column for rolling
x_test.reset_index(drop= True, inplace=True) # Reset index
x_test.head(100)

In [None]:
# roll_time_series method creates sub windows of the time series. It rolls the (sorted) data frames for each 
# kind and each id separately in the “time” domain (which is represented by the sort order of the sort column given
# by column_sort).
# For example when applying the roll_time_series to a data [a,b,c,d,e,f,g] with time_shift = 3, the result will be 
# [a,b,c, b,c,d, c,d,e, d,e,f, e,f,g]. The input of the CNN model will have a shape of [30,10] (30 timestep, 10
# variables) and its prediction will be the appliances status at time_step 30.
# Notice: run these 3 lines of code will take hours


# x_test_rolled = roll_time_series(x_test, column_id="dummy_id", column_sort="id",
#                             max_timeshift = 29, min_timeshift = 29)
# np.save("../np_x_test_rolled.npy",np.array(x_test_rolled))

In [None]:
x_test_rolled = np.load('../np_x_test_rolled.npy', allow_pickle= True)
x_test_rolled = pd.DataFrame(x_test_rolled, columns=x_test.columns)
x_test_rolled = x_test_rolled[['id', 'load', 'absdif', 'dayofweek', 'dif', 'entropy', 'hourofday', 'hurst', 
        'max', 'nonlinear', 'var','dummy_id']]

In [None]:
x_test_rolled.head(100)

In [None]:
# check if there're any shape differences between rolled data and original data
print(len(x_test_rolled)/30)
print(len(x_test)-29)

In [None]:
# Drop the ID and dummy_id columns
x_test_rolled.drop(['id','dummy_id'], axis=1, inplace=True)
# reshape the rolled x_train to a new shape of [105540,30,10]
x_test_rolled = np.reshape(x_test_rolled.to_numpy(dtype=np.float64), [-1 , 30, int(x_test_rolled.shape[1])])
x_test_rolled.shape

In [None]:
# https://numpy.org/doc/stable/reference/generated/numpy.fft.rfft.html
# Compute the one-dimensional discrete Fourier Transform for real input.
# This function computes the one-dimensional n-point discrete Fourier Transform (DFT)
# of a real-valued array by means of an efficient algorithm called the Fast Fourier Transform (FFT).
x_test_fft = np.copy(x_test_rolled)
x_test_fft = np.apply_along_axis(absfft, 1, x_test_fft)
x_test_fft

In [None]:
print(x_test_rolled.shape)
print(x_test_fft.shape)

In [None]:
# num_vars = x_test_rolled.shape[2]
# for i in range(num_vars):
#     mean_s = np.mean(x_test_rolled[:,:,i])
#     sd_s = np.std(x_test_rolled[:,:,i])
#     x_test_rolled[:,:,i] = (x_test_rolled[:,:,i]-mean_s)/sd_s

# num_vars_fft = x_test_fft.shape[2]
# for i in range(num_vars_fft):
#     mean_s = np.mean(x_test_fft[:,:,i])
#     sd_s = np.std(x_test_fft[:,:,i])
#     x_test_fft[:,:,i] = (x_test_fft[:,:,i]-mean_s)/sd_s

In [None]:
# Transpose the shape x_test and x_test_fft to [observations, features, time_steps]
x_test = x_test_rolled.transpose(0,2,1)
x_test_fft = x_test_fft.transpose(0,2,1)

In [None]:
print(x_test.shape)
print(x_test_fft.shape)

In [None]:
# Set the preferred device for testing to CPU

# device_test = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device_test = torch.device('cpu')

In [None]:
# Get the testing dataset
test_ds = TensorDataset(torch.tensor(x_test).float(), torch.tensor(x_test_fft).float())
# Get the testing dataloader
test_dl = DataLoader(test_ds, batch_size=1, shuffle=False)

test_preds = [] # to store prediction

In [None]:
# Change the model mode to evaluate and send to the device
model.eval()
model.to(device_test)

In [None]:
# Perform predicting on testing date
for test_batch in tqdm(test_dl): # Iterate through every batch of testing dataloader
    x_raw, x_fft = [t.to(device_test) for t in test_batch] # Send the data to device
    out = model(x_raw, x_fft) # Get the output
    preds = F.log_softmax(out, dim=1).argmax(dim=1) # Get the final prediction
    # decode its value for submisson compatability
    decoded_preds = le_y.inverse_transform([preds.detach().cpu().clone().numpy()]) 
    # Save the prediction
    test_preds.append([i for i in decoded_preds[0]])

In [None]:
# Convert the prediction list to dataframe
predictions = pd.DataFrame(np.array(test_preds)).reset_index()
predictions.columns = ['id', 'ac', 'ev', 'oven', 'wash', 'dryer'] # Add column names
predictions['id'] = predictions['id'] + 1 # For submission compatability
predictions.set_index('id',inplace=True) # set ID column as index
predictions.head() #have a look to check its format

In [None]:
# Check how many classes are predicted
predictions[['ac', 'ev', 'oven', 'wash', 'dryer']].drop_duplicates()

In [None]:
# Export to .csv file for submission
predictions.to_csv('../CNN_predictions.csv')

In [None]:
out.detach().cpu()
y_batch.detach().cpu()

In [None]:
y_train_transformed

In [None]:
cls_count = dict(collections.Counter(y_train_transformed))

In [None]:
cls_weights = np.array([i[1] for i in sorted(cls_count.items())])

In [None]:
cls_weights = torch.Tensor([cls_weights/np.sum(cls_weights)])
cls_weights

In [None]:
cls_weights.shape

In [None]:
cls_weights_test = torch.Tensor([cls_weights[0],1-cls_weights[0]])
cls_weights_test

In [None]:
class FocalLoss(nn.modules.loss._WeightedLoss):
    def __init__(self, weight=None, gamma=2,reduction='mean'):
        super(FocalLoss, self).__init__(weight,reduction=reduction)
        self.gamma = gamma
        self.weight = weight #weight parameter will act as the alpha parameter to balance class weights

    def forward(self, input, target):

        ce_loss = F.cross_entropy(input, target,reduction=self.reduction,weight=self.weight)
        pt = torch.exp(-ce_loss)
        focal_loss = ((1 - pt) ** self.gamma * ce_loss).mean()
        return focal_loss

In [None]:
temp_loss = FocalLoss(gamma=2, reduction='sum', weight=cls_weights)

In [None]:
temp_loss(out, y_batch)

In [None]:
out

In [None]:
y_batch

In [None]:
torch.randn(3, 5, requires_grad=True)

In [None]:
torch.randint(5, (3,), dtype=torch.int64)

In [None]:
input = torch.randn(3, 5, requires_grad=True)
target = torch.randint(5, (3,), dtype=torch.int64)
loss = F.cross_entropy(input, target)
loss

In [None]:
def prepare_loss_weights(labels,
                         pos_cls_weight=1.0,
                         neg_cls_weight=1.0,
                         loss_norm_type='norm_by_num_positives',
                         dtype=torch.float32):
    """get cls_weights and reg_weights from labels.
    """
    cared = labels >= 0
    # cared: [N, num_anchors]
    positives = labels > 0
    negatives = labels == 0
    negative_cls_weights = negatives.type(dtype) * neg_cls_weight
    cls_weights = negative_cls_weights + pos_cls_weight * positives.type(dtype)
    reg_weights = positives.type(dtype)
    pos_normalizer = positives.sum(1, keepdim=True).type(dtype)
    reg_weights /= torch.clamp(pos_normalizer, min=1.0)
    cls_weights /= torch.clamp(pos_normalizer, min=1.0)

    return cls_weights, reg_weights, cared

In [None]:
prepare_loss_weights(torch.Tensor([y_train_transformed]))

In [None]:
test = np.load('./np_x_train_rolled_new.npy', allow_pickle= True)

In [None]:
np.array(test,dtype = np.float64)

In [None]:
x_train_rolled.to_numpy(dtype=np.float64).dtype

In [None]:
np.vstack(x_train_rolled[:, 0:1]).astype(np.float)


In [None]:
x_train_rolled.drop('id', axis=1, inplace=True)
x_train_rolled

In [None]:
y_train.shape

In [None]:
# x_train_rolled = pd.concat([roll_time_series(temp, column_id="dummy_id", column_sort="id",
#             max_timeshift = 29, min_timeshift = 29),x_train_rolled]).reset_index(drop = True)

In [None]:
# np.save("np_x_train_rolled_new.npy",np.array(x_train_rolled))

In [None]:
print(x_train)

In [None]:
# x_train.drop(x_train.index[0:29])
x_train.head(100)

In [None]:
# 

## no need now - additional features calculators 

In [None]:
x_features = extract_features(x_train_rolled.loc[0:299999],column_id='id',column_sort='ids', 
                 default_fc_parameters=EfficientFCParameters())

In [None]:
x_features.shape

In [None]:
len(x_train_rolled.loc[0:299999,'id'].value_counts())

In [None]:
from tsfresh.feature_selection.significance_tests import target_real_feature_real_test
x_features_dropped = x_features.dropna(axis=1, how='any')
keeps = {}
for col in x_features_dropped.columns:
    p_value = target_real_feature_real_test(x_features_dropped[col].reset_index().drop(columns=["level_0","level_1"],
                                                                       axis = 1).squeeze(), 
                                     pd.Series(y_train_transformed[29:(29+60130)]))
    if p_value < 0.01:
        keeps[col] = p_value

In [None]:
best_features = sorted(list(dict(sorted(keeps.items(), key=lambda item: item[1])).keys())[:255])
x_features = x_features[best_features]

In [None]:
x_features['ids'] = x_features.index
x_train_rolled_temp = x_train_rolled.loc[0:299999]

In [None]:
x_train_temp = x_train_rolled_temp.merge(x_features, how='left', left_on="id", right_on="ids")

In [None]:
x_features

In [None]:
x_minimal = extract_features(x_train_rolled.loc[0:299999],column_id='id',column_sort='ids', 
                             default_fc_parameters=MinimalFCParameters())
x_minimal.shape

In [None]:
len(y_train_transformed)

In [None]:
tsfresh.feature_selection.significance_tests.target_real_feature_real_test(x_minimal['load__standard_deviation'].reset_index().drop(columns=["level_0","level_1"], axis = 1).squeeze(), 
                                                                               pd.Series(y_train_transformed[29:(29+60130)]))

In [None]:
x_minimal.head(10000)

In [None]:
pd.set_option('display.max_rows', 10000)
data.head(10000)

In [None]:
pd.reset_option('all')

In [None]:
l = FocalLoss(alpha=0.25, gamma=2.0)

In [None]:
data.columns

In [None]:
y_batch

In [None]:
y_train.encoded.unique()

In [None]:
criterion(out, torch.tensor([1]))

In [None]:
l = 33
pks = [[4,8,1],[2,8,2],[3,8,2],[3,8,2],[3,8,2]]
for i, layer in enumerate(pks):
    p,k,s = layer
    l = (l + 2*p - k)/s + 1
    print(f"length: {l}")

In [None]:
l = 64
pks = [[3,8,2],[3,8,2],[3,8,2],[3,8,2],[3,8,2]]
for i, layer in enumerate(pks):
    p,k,s = layer
    l = (l + 2*p - k)/s + 1
    print(f"length: {l}")

In [None]:
l = 30
pks = [[4,8,2],[1,3,1],[3,8,2],[1,3,1],[5,8,2],[1,3,1],[2,8,2]]
for i, layer in enumerate(pks):
    p,k,s = layer
#     w = (w + 2*p - k)/s + 1
#     h = (h + 2*p - k)/s + 1
    l = (l + 2*p - k)/s + 1
    print(f"length: {l}")

In [None]:
l = 16
pks = [[3,8,2],[1,3,1],[5,8,2],[1,3,1],[4,8,2],[1,3,1],[3,8,2]]
for i, layer in enumerate(pks):
    p,k,s = layer
#     w = (w + 2*p - k)/s + 1
#     h = (h + 2*p - k)/s + 1
    l = (l + 2*p - k)/s + 1
    print(f"length: {l}")

In [None]:
        #PKS [[4,8,2],[3,8,2],[5,8,2],[2,8,2]]
        self.raw = nn.Sequential( #kernel, stride, pad
            SepConv1d(raw_ni,  32, 8, 2, 4, drop=drop),
            SepConv1d(    32,  64, 8, 2, 3, drop=drop),
            SepConv1d(    64, 128, 8, 2, 5, drop=drop),
            SepConv1d(   128, 256, 8, 2, 2, drop=drop),
            Flatten(),
#             PrintSize(),
            nn.Dropout(drop), nn.Linear(512, 256), nn.ReLU(inplace=True),
            nn.Dropout(drop), nn.Linear( 256, 64), nn.ReLU(inplace=True))
        #PKS [3,8,2],[5,8,2],[4,8,2],[5,8,2],[3,8,2]

In [None]:
count_hourly = x_train.loc[:,:].groupby(["dayofweek",'hourofday']).count()
count_hourly.reset_index(inplace=True)

In [None]:
count_hourly

In [None]:
x_train.sort_values(by=["dayofweek",'hourofday']).head(1000)

In [None]:
x_train = x_train.sort_values(by=["dayofweek",'hourofday'])

In [None]:
x_train[(x_train.dayofweek == "Fri") & (x_train.hourofday == 11)].first_valid_index()

In [None]:
min_hourly_obs = min(count_hourly['var'].values) 
drop_idx = []
for _, row in count_hourly.iterrows():
    drop_idx = [*drop_idx,*x_train[(x_train.dayofweek == row[0]) & (x_train.hourofday == row[1])].index.values[0:row[2] - min_hourly_obs]]
drop_idx

In [None]:
x_train.drop(index = drop_idx, inplace = True)
x_train.loc[:,:].groupby(["dayofweek",'hourofday']).count()

In [None]:
x_train

In [None]:
a = [123]
b = list(range(1,4))
a + b

In [None]:
min_hourly_obs

In [None]:
df = pd.DataFrame({
   "id": [1, 1, 1, 1, 2, 2],
   "time": [1, 2, 3, 4, 8, 9],
   "x": [1, 2, 3, 4, 10, 11],
   "y": [5, 6, 7, 8, 12, 13],
})

In [None]:
x_train.shape

In [None]:
from tsfresh.utilities.dataframe_functions import roll_time_series
df_rolled = roll_time_series(x_train[['load','dummy_id','id']], column_id="dummy_id", column_sort="id",
                            max_timeshift = 29, min_timeshift = 29)
df_rolled

In [None]:
from tsfresh.utilities.dataframe_functions import roll_time_series
df_rolled = roll_time_series(df, column_id="id", column_sort="time", max_timeshift=2, min_timeshift=2)
df_rolled

In [None]:
from tsfresh import extract_features
df_features = extract_features(df_rolled, column_id="id", column_sort="time", )


In [None]:
df_features

In [None]:
x_train.loc[0:500, ['load','dummy_id','id']]