In [25]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch
from torch import nn
from tqdm.notebook import tqdm
from datetime import datetime
import sys
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader
# from torch.optim.lr_scheduler import _LRScheduler
import torch.optim as optim

In [26]:
sys.path.append('..')
from gen.fishs_config import fishs

fishs = list(map(str.capitalize, fishs))

In [28]:
def one_hot(df, cols):
    """
    @param df pandas DataFrame
    @param cols a list of columns to encode 
    @return a DataFrame with one-hot encoding
    """
    for each in cols:
        dummies = pd.get_dummies(df[each], prefix=each, drop_first=False)
        df = pd.concat([df, dummies], axis=1)
        del df[each]
    return df

def one_hot_phenomenon(df, col):
    keys = set()
    for _, row in df.iterrows():
        phenomenon = row[col]
        for key in phenomenon.split('.'):
            keys.add(key)
    result = {key: [] for key in keys}
    for _, row in df.iterrows():
        phenomenon = row[col]
        for key in keys:
            if key in phenomenon.split('.'):
                result[key].append(1)
            else:
                result[key].append(0)
    for key in result:
        df[key] = result[key]
    del result
    del df[col]
    return df
            

def get_time_idx(row):
    try:
        return datetime(day=row['day'], month=row['month'], year=2019).timetuple().tm_yday
    except Exception:
        return -1

In [29]:
data = pd.read_csv(r'..\time_multiple_forecasts0.csv', sep=';')
data

Unnamed: 0,time,pressure,temperature,wind,gust,wind_direction,humidity,phenomenon,uv_index,moon_direction,...,Красноперка,Налим,Густера,Амур,Ерш,Сазан,Подуст,Толстолобик,Вобла,Хариус
0,0,749,-6,2,5,Ю,54,пасмурно,0,1,...,0.051212,0.635556,0.371429,0.0,0.485734,0.025809,0.0,0.0,0.055714,0.033183
1,3,749,-4,3,9,ЮЗ,55,пасмурно,0,1,...,0.128030,0.635556,0.417857,0.0,0.485734,0.029035,0.0,0.0,0.074286,0.044244
2,6,749,-4,3,10,ЮЗ,54,пасмурно,0,1,...,0.281667,0.577778,0.510714,0.0,0.441576,0.035487,0.0,0.0,0.204286,0.121670
3,9,749,-3,3,6,ЮЗ,51,пасмурно,0,1,...,0.281667,0.462222,0.510714,0.0,0.353261,0.035487,0.0,0.0,0.204286,0.121670
4,12,750,-1,5,13,ЮЗ,52,облачно,2,1,...,0.256061,0.231111,0.464286,0.0,0.309103,0.032261,0.0,0.0,0.185714,0.110609
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
244147,9,753,-6,0,7,ЮВ,41,пасмурно,0,-1,...,0.198611,0.121333,0.582214,0.0,0.576522,0.000000,0.0,0.0,0.247619,0.103645
244148,12,753,-3,0,5,В,38,пасмурно,2,-1,...,0.180556,0.060667,0.529286,0.0,0.504457,0.000000,0.0,0.0,0.225108,0.094223
244149,15,753,-3,0,8,ЮВ,36,облачно,1,-1,...,0.180556,0.091000,0.529286,0.0,0.504457,0.000000,0.0,0.0,0.225108,0.094223
244150,18,752,-4,2,8,В,28,пасмурно,0,-1,...,0.198611,0.136500,0.582214,0.0,0.648587,0.000000,0.0,0.0,0.202597,0.084800


In [30]:
time_idxs = []
for i, row in data.iterrows():
    try:
        time_idxs.append(get_time_idx(row))
    except Exception as e:
        print(e)
        print(row)
        break

In [31]:
data['day_year'] = time_idxs
data = one_hot(data, ['wind_direction', 'moon_direction', 'time'])
data = one_hot_phenomenon(data, 'phenomenon')
del data['day']
data


Unnamed: 0,pressure,temperature,wind,gust,humidity,uv_index,moon,month,Щука,Судак,...,пасмурно,небольшой дождь,ясно,гроза,снег с дождём,сильный снег,мокрый снег,небольшой снег,дождь,малооблачно
0,749,-6,2,5,54,0,97,1,0.035846,0.357500,...,1,0,0,0,0,0,0,0,0,0
1,749,-4,3,9,55,0,97,1,0.179228,0.325000,...,1,0,0,0,0,0,0,0,0,0
2,749,-4,3,10,54,0,97,1,0.430147,0.357500,...,1,0,0,0,0,0,0,0,0,0
3,749,-3,3,6,51,0,97,1,0.394301,0.325000,...,1,0,0,0,0,0,0,0,0,0
4,750,-1,5,13,52,2,97,1,0.358456,0.325000,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
244147,753,-6,0,7,41,0,79,12,0.499449,0.357762,...,1,0,0,0,0,0,0,0,0,0
244148,753,-3,0,5,38,2,79,12,0.454044,0.357762,...,1,0,0,0,0,0,0,0,0,0
244149,753,-3,0,8,36,1,79,12,0.408640,0.321986,...,0,0,0,0,0,0,0,0,0,0
244150,752,-4,2,8,28,0,79,12,0.522151,0.357762,...,1,0,0,0,0,0,0,0,0,0


In [32]:
y = data[fishs]
for fish in fishs:
    del data[fish]
data

Unnamed: 0,pressure,temperature,wind,gust,humidity,uv_index,moon,month,day_year,wind_direction_В,...,пасмурно,небольшой дождь,ясно,гроза,снег с дождём,сильный снег,мокрый снег,небольшой снег,дождь,малооблачно
0,749,-6,2,5,54,0,97,1,2,0,...,1,0,0,0,0,0,0,0,0,0
1,749,-4,3,9,55,0,97,1,2,0,...,1,0,0,0,0,0,0,0,0,0
2,749,-4,3,10,54,0,97,1,2,0,...,1,0,0,0,0,0,0,0,0,0
3,749,-3,3,6,51,0,97,1,2,0,...,1,0,0,0,0,0,0,0,0,0
4,750,-1,5,13,52,2,97,1,2,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
244147,753,-6,0,7,41,0,79,12,364,0,...,1,0,0,0,0,0,0,0,0,0
244148,753,-3,0,5,38,2,79,12,364,1,...,1,0,0,0,0,0,0,0,0,0
244149,753,-3,0,8,36,1,79,12,364,0,...,0,0,0,0,0,0,0,0,0,0
244150,752,-4,2,8,28,0,79,12,364,1,...,1,0,0,0,0,0,0,0,0,0


In [33]:
list(data.columns)

['pressure',
 'temperature',
 'wind',
 'gust',
 'humidity',
 'uv_index',
 'moon',
 'month',
 'day_year',
 'wind_direction_В',
 'wind_direction_З',
 'wind_direction_С',
 'wind_direction_СВ',
 'wind_direction_СЗ',
 'wind_direction_Ю',
 'wind_direction_ЮВ',
 'wind_direction_ЮЗ',
 'moon_direction_-1',
 'moon_direction_1',
 'time_0',
 'time_3',
 'time_6',
 'time_9',
 'time_12',
 'time_15',
 'time_18',
 'time_21',
 'облачно',
 'сильный дождь',
 'снег',
 'пасмурно',
 'небольшой дождь',
 'ясно',
 'гроза',
 'снег с дождём',
 'сильный снег',
 'мокрый снег',
 'небольшой снег',
 'дождь',
 'малооблачно']

In [34]:
torch.save(list(data.columns), 'features.keys')

In [8]:
class FishDataset(Dataset):
    def __init__(self, X, y, step_size=3):
        self.X = X
        self.y = y
        self.step_size = step_size
        self.steps = []
        
    def init_steps(self):
        steps = []
        for i in tqdm(range(0, len(self.X) - self.step_size)):
            step = list(self.X['day_year'][i:i+self.step_size * 8])
            flag = True
            if len(step) != self.step_size * 8:
                continue
            for j in range(0, len(step) - 8, 8):
                if step[j] + 1 != step[j + 8]:
                    flag = False
                    break
            if flag:
                steps.append((i, i + self.step_size * 8))
        return steps
        
    def __len__(self):
        return len(self.steps)
    
    def __getitem__(self, idx):
        return self.X[self.steps[idx][0]:self.steps[idx][1]].values, self.y[self.steps[idx][0]:self.steps[idx][1]].values

In [9]:
steps = FishDataset(data, y, 3).init_steps()
mask = np.random.rand(len(data)) < 0.8
train_dataset = FishDataset(data, y, 3)
train_dataset.steps = [steps[i] for i in range(len(steps)) if mask[i]]
test_dataset = FishDataset(data, y, 3)
test_dataset.steps = [steps[i] for i in range(len(steps)) if not mask[i]]

HBox(children=(FloatProgress(value=0.0, max=244149.0), HTML(value='')))




In [10]:
train_dataloader = DataLoader(train_dataset, shuffle=True, batch_size=32, pin_memory=True)
test_dataloader = DataLoader(test_dataset, shuffle=False, batch_size=32, pin_memory=True)

In [11]:
for data, targets in train_dataloader:
    print(data.shape)
    print(targets.shape)
    break

torch.Size([32, 24, 67])
torch.Size([32, 24, 29])


In [4]:
class FCNBlock(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.fc = nn.Linear(input_dim, output_dim, bias=True)
        self.relu = nn.ReLU()
        
    def forward(self, inputs):
        return self.relu(self.fc(inputs))

class FCN(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super().__init__()
        self.start_block = FCNBlock(input_dim, hidden_dim)
        self.blocks = nn.Sequential()
        for _ in range(layer_dim):
            self.blocks.add_module('block_{}'.format(_), FCNBlock(hidden_dim, hidden_dim))
        self.final_block = FCNBlock(hidden_dim, output_dim)
        
    def forward(self, inputs):
        return self.final_block(self.blocks(self.start_block(inputs)))
    

class LSTM(nn.Module):
    """Very simple implementation of LSTM-based time-series classifier."""
    
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim
        self.rnn = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.batch_size = None
        self.hidden = None
    
    def forward(self, x):
        h0, c0 = self.init_hidden(x)
        out, (hn, cn) = self.rnn(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out
    
    def init_hidden(self, x):
        h0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)
        c0 = torch.zeros(self.layer_dim, x.size(0), self.hidden_dim)
        return [t for t in (h0, c0)]
    
class MainModel(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.fcn = FCN(config['fcn_input'], config['fcn_hidden'], config['fcn_layers'], config['fcn_output'])
        self.lstm = LSTM(config['lstm_input'], config['lstm_hidden'], config['lstm_layers'], config['lstm_output'])
        self.fc = nn.Linear(config['fcn_output'] + config['lstm_output'], config['output'])
    
    def forward(self, x, y):
        fcn_output = self.fcn(x)
        lstm_output = self.lstm(y)
        output = self.fc(torch.cat((fcn_output, lstm_output), dim=1))
        return output, fcn_output, lstm_output
    
    
    
class MainLoss(nn.Module):
    def __init__(self):
        super().__init__()
        self.loss = nn.MSELoss()
        self.fcn_loss = nn.MSELoss()
        self.lstm_loss = nn.MSELoss()
        
    def forward(self, x, x_fcn, x_lstm, targets):
        loss = 2 * self.loss(x, targets) + self.fcn_loss(x_fcn, targets) + self.lstm_loss(x_lstm, targets)
        return loss

In [12]:
model_config = {
    'fcn_input': 24 * 67,
    'fcn_hidden': 128,
    'fcn_layers': 5,
    'fcn_output': 29,
    'lstm_input': 29,
    'lstm_hidden': 64,
    'lstm_layers': 3,
    'lstm_output': 29,
    'output': 29
}

model = MainModel(model_config)
model

MainModel(
  (fcn): FCN(
    (start_block): FCNBlock(
      (fc): Linear(in_features=1608, out_features=128, bias=True)
      (relu): ReLU()
    )
    (blocks): Sequential(
      (block_0): FCNBlock(
        (fc): Linear(in_features=128, out_features=128, bias=True)
        (relu): ReLU()
      )
      (block_1): FCNBlock(
        (fc): Linear(in_features=128, out_features=128, bias=True)
        (relu): ReLU()
      )
      (block_2): FCNBlock(
        (fc): Linear(in_features=128, out_features=128, bias=True)
        (relu): ReLU()
      )
      (block_3): FCNBlock(
        (fc): Linear(in_features=128, out_features=128, bias=True)
        (relu): ReLU()
      )
      (block_4): FCNBlock(
        (fc): Linear(in_features=128, out_features=128, bias=True)
        (relu): ReLU()
      )
    )
    (final_block): FCNBlock(
      (fc): Linear(in_features=128, out_features=29, bias=True)
      (relu): ReLU()
    )
  )
  (lstm): LSTM(
    (rnn): LSTM(29, 64, num_layers=3, batch_first=True)


In [18]:
optimizer = optim.Adam(model.parameters(), lr=1e-4)
loss_f = MainLoss()

In [19]:
def train(model, dataloader, optimizer, loss_f, log_interval, epoch, device='cpu'):
    running_loss = []
    for i, data in enumerate(dataloader):
        x, y = data
        targets = y[:, -1, :]
        y = y[:, :-1, :]
        x = x.view(x.size(0), -1)
        x = x.to(device).float()
        y = y.to(device).float()
        targets = targets.to(device).float()
        output, fcn_output, lstm_output = model(x, y)
        loss = loss_f(output, fcn_output, lstm_output, targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        loss_value = loss.item()
        if i % log_interval == 0 and len(running_loss) > 0:
            print('Train: Epoch {} [{:.2f}] Loss {:.2f} [{:.2f}]'.format(epoch, i / len(dataloader) * 100, loss_value, np.mean(running_loss)))
            running_loss = []
        running_loss.append(loss_value)
 
@torch.no_grad()
def validate(model, dataloader, loss_f, epoch, device='cpu'):
    running_loss = []
    for data in tqdm(dataloader):
        x, y = data
        targets = y[:, -1, :]
        y = y[:, :-1, :]
        x = x.view(x.size(0), -1)
        x = x.to(device).float()
        y = y.to(device).float()
        targets = targets.to(device).float()
        output, fcn_output, lstm_output = model(x, y)
        loss = loss_f(output, fcn_output, lstm_output, targets)
        loss_value = loss.item()
        running_loss.append(loss_value)
    print('Test: Epoch {} Loss {:.2f}'.format(epoch, np.mean(running_loss)))

In [20]:
num_epochs = 10
log_interval = 100
device = 'cpu'
model.to(device)
loss_f.to(device)

for epoch in range(num_epochs):
    model.train()
    train(model, train_dataloader, optimizer, loss_f, log_interval, epoch, device)
    model.eval()
    validate(model, test_dataloader, loss_f, epoch, device)

Train: Epoch 0 [1.67] Loss 0.63 [1.58]
Train: Epoch 0 [3.34] Loss 0.58 [0.60]
Train: Epoch 0 [5.01] Loss 0.36 [0.47]
Train: Epoch 0 [6.68] Loss 0.33 [0.42]
Train: Epoch 0 [8.35] Loss 0.34 [0.39]
Train: Epoch 0 [10.02] Loss 0.31 [0.37]
Train: Epoch 0 [11.69] Loss 0.32 [0.36]
Train: Epoch 0 [13.36] Loss 0.31 [0.35]
Train: Epoch 0 [15.03] Loss 0.29 [0.34]
Train: Epoch 0 [16.69] Loss 0.28 [0.34]
Train: Epoch 0 [18.36] Loss 0.54 [0.33]
Train: Epoch 0 [20.03] Loss 0.35 [0.33]
Train: Epoch 0 [21.70] Loss 0.36 [0.32]
Train: Epoch 0 [23.37] Loss 0.41 [0.31]
Train: Epoch 0 [25.04] Loss 0.33 [0.31]
Train: Epoch 0 [26.71] Loss 0.33 [0.31]
Train: Epoch 0 [28.38] Loss 0.29 [0.31]
Train: Epoch 0 [30.05] Loss 0.30 [0.32]
Train: Epoch 0 [31.72] Loss 0.25 [0.30]
Train: Epoch 0 [33.39] Loss 0.33 [0.30]
Train: Epoch 0 [35.06] Loss 0.26 [0.30]
Train: Epoch 0 [36.73] Loss 0.25 [0.29]
Train: Epoch 0 [38.40] Loss 0.24 [0.30]
Train: Epoch 0 [40.07] Loss 0.21 [0.29]
Train: Epoch 0 [41.74] Loss 0.28 [0.30]
Train

HBox(children=(FloatProgress(value=0.0, max=1479.0), HTML(value='')))


Test: Epoch 0 Loss 0.26
Train: Epoch 1 [1.67] Loss 0.26 [0.27]
Train: Epoch 1 [3.34] Loss 0.20 [0.27]
Train: Epoch 1 [5.01] Loss 0.33 [0.27]
Train: Epoch 1 [6.68] Loss 0.27 [0.26]
Train: Epoch 1 [8.35] Loss 0.25 [0.26]
Train: Epoch 1 [10.02] Loss 0.23 [0.26]
Train: Epoch 1 [11.69] Loss 0.22 [0.26]
Train: Epoch 1 [13.36] Loss 0.23 [0.26]
Train: Epoch 1 [15.03] Loss 0.22 [0.26]
Train: Epoch 1 [16.69] Loss 0.29 [0.25]
Train: Epoch 1 [18.36] Loss 0.32 [0.26]
Train: Epoch 1 [20.03] Loss 0.24 [0.26]
Train: Epoch 1 [21.70] Loss 0.23 [0.25]
Train: Epoch 1 [23.37] Loss 0.25 [0.25]
Train: Epoch 1 [25.04] Loss 0.21 [0.25]
Train: Epoch 1 [26.71] Loss 0.24 [0.25]
Train: Epoch 1 [28.38] Loss 0.30 [0.24]
Train: Epoch 1 [30.05] Loss 0.23 [0.25]
Train: Epoch 1 [31.72] Loss 0.24 [0.25]
Train: Epoch 1 [33.39] Loss 0.30 [0.24]
Train: Epoch 1 [35.06] Loss 0.17 [0.23]
Train: Epoch 1 [36.73] Loss 0.26 [0.24]
Train: Epoch 1 [38.40] Loss 0.19 [0.23]
Train: Epoch 1 [40.07] Loss 0.26 [0.23]
Train: Epoch 1 [41.7

HBox(children=(FloatProgress(value=0.0, max=1479.0), HTML(value='')))


Test: Epoch 1 Loss 0.20


KeyboardInterrupt: 

In [29]:
for data in tqdm(train_dataloader):
    x, y = data
    targets = y[:, -1, :]
    y = y[:, :-1, :]
    x = x.view(x.size(0), -1)
    x = x.to(device).float()
    y = y.to(device).float()
    targets = targets.to(device).float()
    output, fcn_output, lstm_output = model(x, y)
    print(output[0])
    print(fcn_output[0])
    print(lstm_output[0])
    print(targets[0])
    break

HBox(children=(FloatProgress(value=0.0, max=5990.0), HTML(value='')))

tensor([ 0.7123,  0.4214,  0.3123,  0.3966,  0.3027,  0.7605,  0.0579,  0.0050,
        -0.0270, -0.0073,  0.2010,  0.0154,  0.0020,  0.1700, -0.0143, -0.0303,
        -0.0226,  0.3926,  0.3645,  0.1809,  0.5335,  0.4018, -0.0335,  0.6308,
         0.0675,  0.0619, -0.0231,  0.1863,  0.1873], grad_fn=<SelectBackward>)
tensor([0.7054, 0.4421, 0.2516, 0.3666, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.3147, 0.1936, 0.2365, 0.0000, 0.0000, 0.6389, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000], grad_fn=<SelectBackward>)
tensor([ 6.9942e-01,  3.5250e-01,  3.2532e-01,  3.7733e-01,  2.8803e-01,
         7.8674e-01,  6.0401e-02, -1.6580e-02, -1.1015e-02,  8.0534e-02,
         1.0233e-01,  2.5486e-02, -1.2213e-02,  1.3259e-01, -2.7261e-04,
        -1.1148e-01, -2.7523e-02,  2.4683e-01,  4.0372e-01,  1.3684e-01,
         5.8663e-01,  3.5425e-01,  1.2159e-02,  6.2753e-01,  7.0878e-02,
         1.1805e-01,  4.