In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.data import Dataset, DataLoader

# Convolutional Neural Network for Time Series

## Univariate CNN 

Univarate time series are datasets contains only one sinle series of observations with a temporal ordering. Model need to learn from past observations to predict next value in sequence.

### Dataset

Monthly sunspot dataset describes a monthly count of number of observed sunspots for just over 230 years from 1749 to 1983. Dataset contains 2820 observations. 

You can download data from: https://datamarket.com/data/set/22ti/zuerich-monthly-sunspot-numbers-1749-1983#!ds=22ti&display=line

In [2]:
uni_df = pd.read_csv('datasets/zuerich-monthly-sunspot-numbers-.csv', 
                     sep=';', names=['month', 'observations'], header=0)
uni_df.dropna(inplace=True)
uni_df.head(5)

Unnamed: 0,month,observations
0,1749-01,58.0
1,1749-02,62.6
2,1749-03,70.0
3,1749-04,55.7
4,1749-05,85.0


In [3]:
uni_df.shape

(2820, 2)

In [4]:
uni_df.dtypes

month            object
observations    float64
dtype: object

### Data preparation

PyTorch provides tool for preparing data. It makes code more readable and reusable. 

In [5]:
class SunspotDataset(Dataset):
    def __init__(self, csv_file, lag, transform=None):
        df = pd.read_csv('datasets/zuerich-monthly-sunspot-numbers-.csv',  
                          sep=';', names=['month', 'observations'], header=0)
        self.len = df.shape[0]
        self.lag = lag
        self.X, self.y = self.prepare(df.observations.values)
        
    def prepare(self, sequence):
        X, y = list(), list()
        for i in range(self.len):
            end_ix = i + self.lag
            if end_ix > self.len - 1:
                break
            seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
            X.append(seq_x)
            y.append(seq_y)
        
        return (X, y)
        
    def __len__(self):
        return int(self.len / self.lag)
    
    def __getitem__(self, idx):
        x = self.X[idx]
        y = self.y[idx]
        return (x, y)
        

 SunspotDataset class can be instantiated and tested.

In [6]:
csv_file = 'datasets/zuerich-monthly-sunspot-numbers-.csv'
sunspot_dataset = SunspotDataset(csv_file, 5, 2820)
len(sunspot_dataset)

564

In [7]:
for i in range(len(sunspot_dataset)):
    sample = sunspot_dataset[i]
    print(sample, type(sample[0]))
    if i == 3:
        break

(array([58. , 62.6, 70. , 55.7, 85. ]), 83.5) <class 'numpy.ndarray'>
(array([62.6, 70. , 55.7, 85. , 83.5]), 94.8) <class 'numpy.ndarray'>
(array([70. , 55.7, 85. , 83.5, 94.8]), 66.3) <class 'numpy.ndarray'>
(array([55.7, 85. , 83.5, 94.8, 66.3]), 75.9) <class 'numpy.ndarray'>


### One-Dimensional Convolutional Neural Network Model (1D CNN)

1D CNN is a model that has convolutional hidden layer that calculates over one dimensional sequence. 

Select device.

In [8]:
device = 'cuda:0'

In [9]:
class OneDCNN(nn.Module):
    def __init__(self, D_in):
        super(OneDCNN, self).__init__()
        self.conv1 = nn.Conv1d(D_in, 4, 1, stride=1, padding=1)
        self.pool1 = nn.MaxPool1d(kernel_size=2)
        self.fc1 = nn.Linear(16, 50)
        self.fc2 = nn.Linear(50, 4)
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.pool1(x)
        x = F.relu(x)
        x = x.view(-1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
        
one_dcnn = OneDCNN(5)
one_dcnn

OneDCNN(
  (conv1): Conv1d(5, 4, kernel_size=(1,), stride=(1,), padding=(1,))
  (pool1): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=16, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=4, bias=True)
)

Define a loss function and optimizer

In [12]:
criterion = nn.MSELoss()
optimizer = optim.Adam(one_dcnn.parameters(), lr=0.0001)

Prepare dataloader

In [13]:
sunspot_dataloader = DataLoader(sunspot_dataset, batch_size=4, shuffle=False, num_workers=8)

Train network

In [16]:
n_epoch = 10
for epoch in range(n_epoch):
    running_loss = 0.0
    for i, data in enumerate(sunspot_dataloader):
        inputs, labels = data
        inputs = inputs.view(4, 5, 1).float()
        labels = labels.float()
        optimizer.zero_grad()
        outputs = one_dcnn(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        if i % 100 == 99:
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 99))
            running_loss = 0.0

[1,   100] loss: 106.268
[2,   100] loss: 106.185
[3,   100] loss: 106.141
[4,   100] loss: 106.101
[5,   100] loss: 106.037
[6,   100] loss: 105.998
[7,   100] loss: 105.955
[8,   100] loss: 105.884
[9,   100] loss: 105.862
[10,   100] loss: 105.823
