# Import library

In [1]:
import os
import os, sys
import glob
import re
import datetime
from datetime import datetime, timedelta 
import math, copy, time
from itertools import cycle
from collections import defaultdict

from parse import parse
import numpy as np
import pandas as pd
import torch
from torch import nn, optim
from torch.autograd import Variable
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.nn.functional as F

import torchvision
import torchvision.transforms as transforms

from sklearn.metrics import roc_auc_score, plot_roc_curve
from sklearn import svm


from tqdm.notebook import tqdm

import matplotlib.pyplot as plt
import seaborn

%load_ext autoreload
%autoreload 2

In [2]:
# Util functions
def size_and_ratio(df, col, dropna=True):
    if pd.__version__ > '1.1':
        sr = df.groupby(col, dropna=dropna).size().sort_values(ascending=False)
    elif dropna:
        sr = df.groupby(col).size().sort_values(ascending=False)
    else:  # dropna == False
        sr = df.fillna('nan').groupby(col).size().sort_values(ascending=False)
    sr_ratio = sr.copy() / sum(sr)
    print("Sum :", sum(sr), sr.shape)
    sr.name = 'size'
    sr_ratio.name = 'ratio'
    return pd.concat([sr, sr_ratio], axis=1)

# Data load

In [3]:
df_input = pd.read_csv("opendataset/df_input.csv")
df_input.head()

Unnamed: 0,cough,fever,sore_throat,shortness_of_breath,head_ache,age_60_and_above,gender,confirmed_ratio,weekday_ratio,corona_result
0,0,0,0,0,0,-1,0,0.040948,0.0,0
1,0,0,0,0,0,-1,1,0.040948,0.0,0
2,0,0,0,0,0,-1,0,0.040948,0.0,0
3,0,0,0,0,0,-1,0,0.040948,0.0,0
4,0,0,0,0,0,-1,1,0.040948,0.0,0


In [4]:
df_input.describe()

Unnamed: 0,cough,fever,sore_throat,shortness_of_breath,head_ache,age_60_and_above,gender,confirmed_ratio,weekday_ratio,corona_result
count,7230.0,7230.0,7230.0,7230.0,7230.0,7230.0,7230.0,7230.0,7230.0,7230.0
mean,0.017704,0.017566,0.007884,0.001521,0.01231,0.062102,0.539004,0.400977,0.433679,0.072337
std,0.131882,0.131376,0.088446,0.038979,0.110272,0.467378,0.498511,0.252509,0.338471,0.259064
min,0.0,0.0,0.0,0.0,0.0,-1.0,0.0,0.040948,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.18121,0.166667,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.36853,0.333333,0.0
75%,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.60076,0.666667,0.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [5]:
df_input['age_60_and_above'].unique()

size_and_ratio(df_input, 'age_60_and_above')

Sum : 7230 (3,)


Unnamed: 0_level_0,size,ratio
age_60_and_above,Unnamed: 1_level_1,Unnamed: 2_level_1
0,5623,0.777732
1,1028,0.142185
-1,579,0.080083


In [6]:
from model.attention_model import CategoricalAttentionModel

In [7]:
from model.utils import get_dict_category_from_dataset

dict_category, df_input_typed = get_dict_category_from_dataset(df_input)
dict_category

cough category [0 1] ...
fever category [0 1] ...
sore_throat category [0 1] ...
shortness_of_breath category [0 1] ...
head_ache category [0 1] ...
age_60_and_above category [-1  0  1] ...
gender category [0 1] ...
confirmed_ratio float [0.04094774 0.05944027 0.07859325 0.06852142 0.07132832 0.10071824
 0.12309089 0.09229753 0.11838521 0.11731198]
weekday_ratio category [0.         0.33333333 0.5        0.66666667 0.83333333 0.16666667
 1.        ] ...
corona_result category [0 1] ...


{'cough_0': 0,
 'cough_1': 1,
 'fever_0': 2,
 'fever_1': 3,
 'sore_throat_0': 4,
 'sore_throat_1': 5,
 'shortness_of_breath_0': 6,
 'shortness_of_breath_1': 7,
 'head_ache_0': 8,
 'head_ache_1': 9,
 'age_60_and_above_-1': 10,
 'age_60_and_above_0': 11,
 'age_60_and_above_1': 12,
 'gender_0': 13,
 'gender_1': 14,
 'confirmed_ratio': 15,
 'weekday_ratio_0': 16,
 'weekday_ratio_1': 17,
 'corona_result_0': 18,
 'corona_result_1': 19}

In [8]:
df_input_typed['gender'].dtype

CategoricalDtype(categories=[0, 1], ordered=False)

In [9]:
from model.utils import get_category_key

In [10]:
df_train = df_input_typed[[x for x in df_input_typed.columns if x != 'corona_result']]

df_train_category = pd.DataFrame()
df_train_intensity = df_train_category.copy()

for index, row in df_train.iterrows():
    for col, val in row.items():
        type_name = df_train[col].dtype
        key = get_category_key(colname=col, type_name=type_name, value=val)
        cat_idx = dict_category[key]

        df_train_category.loc[index, col] = cat_idx
        if type_name == 'category':
            df_train_intensity.loc[index, col] = 1
        if type_name == 'float':
            df_train_intensity.loc[index, col] = val
         
    
display(df_train_category.head())
display(df_train_intensity.head())

Unnamed: 0,cough,fever,sore_throat,shortness_of_breath,head_ache,age_60_and_above,gender,confirmed_ratio,weekday_ratio
0,0.0,2.0,4.0,6.0,8.0,10.0,13.0,15.0,16.0
1,0.0,2.0,4.0,6.0,8.0,10.0,14.0,15.0,16.0
2,0.0,2.0,4.0,6.0,8.0,10.0,13.0,15.0,16.0
3,0.0,2.0,4.0,6.0,8.0,10.0,13.0,15.0,16.0
4,0.0,2.0,4.0,6.0,8.0,10.0,14.0,15.0,16.0


Unnamed: 0,cough,fever,sore_throat,shortness_of_breath,head_ache,age_60_and_above,gender,confirmed_ratio,weekday_ratio
0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.040948,1.0
1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.040948,1.0
2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.040948,1.0
3,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.040948,1.0
4,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.040948,1.0


In [11]:
display(df_train_category.sample(10))
display(df_train_intensity.sample(10))

Unnamed: 0,cough,fever,sore_throat,shortness_of_breath,head_ache,age_60_and_above,gender,confirmed_ratio,weekday_ratio
206,0.0,2.0,4.0,6.0,8.0,10.0,14.0,15.0,16.0
5617,0.0,2.0,4.0,6.0,8.0,11.0,13.0,15.0,17.0
2596,0.0,2.0,4.0,6.0,8.0,10.0,14.0,15.0,16.0
5761,0.0,2.0,4.0,6.0,8.0,11.0,13.0,15.0,16.0
3730,0.0,2.0,4.0,6.0,8.0,11.0,13.0,15.0,16.0
24,0.0,2.0,4.0,6.0,8.0,10.0,14.0,15.0,16.0
6097,0.0,2.0,4.0,6.0,8.0,11.0,13.0,15.0,16.0
6131,0.0,2.0,4.0,6.0,8.0,11.0,13.0,15.0,16.0
2074,0.0,2.0,4.0,6.0,8.0,11.0,14.0,15.0,16.0
349,0.0,2.0,4.0,6.0,8.0,12.0,14.0,15.0,16.0


Unnamed: 0,cough,fever,sore_throat,shortness_of_breath,head_ache,age_60_and_above,gender,confirmed_ratio,weekday_ratio
767,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.123091,1.0
4770,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.681252,1.0
3556,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.488979,1.0
404,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.068521,1.0
6886,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.81557,1.0
6906,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.81557,1.0
2596,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.259886,1.0
6689,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.479567,1.0
1161,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.117312,1.0
543,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.100718,1.0


In [12]:
size_and_ratio(df_train_category, 'sore_throat')

Sum : 7230 (2,)


Unnamed: 0_level_0,size,ratio
sore_throat,Unnamed: 1_level_1,Unnamed: 2_level_1
4.0,7173,0.992116
5.0,57,0.007884


In [13]:
def split_train_test(data, test_ratio, seed=None):
    if seed != None:
        np.random.seed(seed)
    
    shuffled_indices = np.random.permutation(len(data)).astype(int)
    print("shuffled:", shuffled_indices[:5], '...', "total len:", len(data))
    test_set_size = int(len(data) * test_ratio)
    
    test_indices = shuffled_indices[:test_set_size]
    train_indices = shuffled_indices[test_set_size:]
    
    test_data = data[:test_set_size]
    train_data = data[test_set_size:]
    test_data = data[test_indices]
    train_data = data[train_indices]
    
    return train_data, test_data
def split_input_gt(non_split_data):
    splited_input = non_split_data[:, :-1]
    splited_gt = non_split_data[:, -1]
    
    return splited_input, splited_gt

In [14]:
np_cat = df_train_category.values
np_ins = df_train_intensity.values
np_gt = df_input_typed[['corona_result']].values
np_gt

array([[0],
       [0],
       [0],
       ...,
       [1],
       [0],
       [0]])

In [15]:
not_test_np_cat, test_np_cat = split_train_test(np_cat, test_ratio=0.2, seed=1212)
train_np_cat, valid_np_cat = split_train_test(not_test_np_cat, test_ratio=0.2, seed=1212)

not_test_np_ins, test_np_ins = split_train_test(np_ins, test_ratio=0.2, seed=1212)
train_np_ins, valid_np_ins = split_train_test(not_test_np_ins, test_ratio=0.2, seed=1212)


not_test_np_gt, test_np_gt = split_train_test(np_gt, test_ratio=0.2, seed=1212)
train_np_gt, valid_np_gt = split_train_test(not_test_np_gt, test_ratio=0.2, seed=1212)


shuffled: [3571  241 1807 3269 2200] ... total len: 7230
shuffled: [2451 4535 4270 4511 1652] ... total len: 5784
shuffled: [3571  241 1807 3269 2200] ... total len: 7230
shuffled: [2451 4535 4270 4511 1652] ... total len: 5784
shuffled: [3571  241 1807 3269 2200] ... total len: 7230
shuffled: [2451 4535 4270 4511 1652] ... total len: 5784


In [16]:
train_input = [train_np_cat.copy(), train_np_ins.copy()]
train_gt = train_np_gt.copy()

valid_input = [valid_np_cat.copy(), valid_np_ins.copy()]
valid_gt = valid_np_gt.copy()

test_input = [test_np_cat.copy(), test_np_ins.copy()]
test_gt = test_np_gt.copy()


In [17]:
print(train_input[0].shape, valid_input[0].shape, test_input[0].shape)
print(train_gt.shape, valid_gt.shape, test_gt.shape)

(4628, 9) (1156, 9) (1446, 9)
(4628, 1) (1156, 1) (1446, 1)


In [18]:
print("# of train data positive: ", train_gt.sum())
print("# of train data negative: ", train_gt.shape[0] - train_gt.sum())
print("ratio of P/N: ", train_gt.sum()/(train_gt.shape[0] - train_gt.sum()))
print("")

print("# of valid data positive: ", valid_gt.sum())
print("# of valid data negative: ", valid_gt.shape[0] - valid_gt.sum())
print("ratio of P/N: ", valid_gt.sum()/(valid_gt.shape[0] - valid_gt.sum()))
print("")

print("# of test data positive: ", test_gt.sum())
print("# of test data negative: ", test_gt.shape[0] - test_gt.sum())
print("ratio of P/N: ", test_gt.sum()/(test_gt.shape[0] - test_gt.sum()))

# of train data positive:  335
# of train data negative:  4293
ratio of P/N:  0.07803400885161892

# of valid data positive:  90
# of valid data negative:  1066
ratio of P/N:  0.08442776735459662

# of test data positive:  98
# of test data negative:  1348
ratio of P/N:  0.07270029673590504


### Check dict_category and train_np_cat

In [19]:
dict_category

{'cough_0': 0,
 'cough_1': 1,
 'fever_0': 2,
 'fever_1': 3,
 'sore_throat_0': 4,
 'sore_throat_1': 5,
 'shortness_of_breath_0': 6,
 'shortness_of_breath_1': 7,
 'head_ache_0': 8,
 'head_ache_1': 9,
 'age_60_and_above_-1': 10,
 'age_60_and_above_0': 11,
 'age_60_and_above_1': 12,
 'gender_0': 13,
 'gender_1': 14,
 'confirmed_ratio': 15,
 'weekday_ratio_0': 16,
 'weekday_ratio_1': 17,
 'corona_result_0': 18,
 'corona_result_1': 19}

In [20]:
train_np_cat

array([[ 0.,  2.,  4., ..., 13., 15., 16.],
       [ 0.,  2.,  4., ..., 13., 15., 16.],
       [ 0.,  2.,  4., ..., 14., 15., 16.],
       ...,
       [ 0.,  2.,  4., ..., 13., 15., 16.],
       [ 0.,  2.,  4., ..., 14., 15., 16.],
       [ 0.,  2.,  4., ..., 13., 15., 17.]])

# Category embeding layer

In [104]:
n = len(dict_category)
d = 5
embeds = nn.Embedding(n, d)  # n words in vocab, 5 dimensional embeddings

# sample test with "cough"
lookup_tensor = torch.tensor([dict_category["cough_0"]], dtype=torch.long)
couth_embed = embeds(lookup_tensor)
print(couth_embed)

tensor([[-1.1995,  1.0258, -1.5161,  0.6267, -0.4996]],
       grad_fn=<EmbeddingBackward0>)


In [116]:
embeds

Embedding(20, 10)

 Sample inputs

In [40]:
train_batch = torch.LongTensor(train_np_cat[0])
train_batch

tensor([ 0,  2,  4,  6,  8, 12, 13, 15, 16])

 intensity values for the samples. Note: only confirmed_ratio has 0.34 values, other have 1

In [41]:
train_batch_intensity = torch.FloatTensor(train_np_ins[0])
train_batch_intensity

tensor([1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 0.3438, 1.0000])

In [51]:
cat_embeds = embeds(train_batch)
print(cat_embeds.shape)
cat_embeds

torch.Size([9, 5])


tensor([[ 0.0468,  0.0694, -0.8301, -0.5892, -0.4500],
        [ 0.2189,  1.0110,  0.8616, -0.1891,  0.8887],
        [-0.6935, -1.9201, -0.5703, -0.9124,  0.1106],
        [ 1.1608, -0.3957,  0.0713,  1.3774, -0.6742],
        [ 1.6958, -1.1947,  0.1399,  0.9050, -0.6406],
        [-0.1252, -0.2113, -0.0383, -1.6542, -0.8345],
        [-0.2912,  0.8185, -0.8129, -0.5812, -1.1632],
        [-0.4745, -0.8060,  0.4962,  0.0873, -0.6951],
        [-1.2760,  1.3263, -0.4213, -0.5523, -0.8796]],
       grad_fn=<EmbeddingBackward0>)

In [52]:
intensity = train_batch_intensity.expand(d, 9).transpose(0, 1)
print(intensity.shape)
intensity

torch.Size([9, 5])


tensor([[1.0000, 1.0000, 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000, 1.0000, 1.0000],
        [1.0000, 1.0000, 1.0000, 1.0000, 1.0000],
        [0.3438, 0.3438, 0.3438, 0.3438, 0.3438],
        [1.0000, 1.0000, 1.0000, 1.0000, 1.0000]])

multiple the intensity (scalar)

In [53]:
intensity_embeds = cat_embeds * intensity
print(intensity_embeds.shape)
intensity_embeds

torch.Size([9, 5])


tensor([[ 0.0468,  0.0694, -0.8301, -0.5892, -0.4500],
        [ 0.2189,  1.0110,  0.8616, -0.1891,  0.8887],
        [-0.6935, -1.9201, -0.5703, -0.9124,  0.1106],
        [ 1.1608, -0.3957,  0.0713,  1.3774, -0.6742],
        [ 1.6958, -1.1947,  0.1399,  0.9050, -0.6406],
        [-0.1252, -0.2113, -0.0383, -1.6542, -0.8345],
        [-0.2912,  0.8185, -0.8129, -0.5812, -1.1632],
        [-0.1631, -0.2771,  0.1706,  0.0300, -0.2389],
        [-1.2760,  1.3263, -0.4213, -0.5523, -0.8796]], grad_fn=<MulBackward0>)

### Apply attention layer

In [54]:

fc_query = nn.Linear(d, d)
fc_key = nn.Linear(d, d)
fc_value = nn.Linear(d, d)

query = fc_query(intensity_embeds)
key = fc_key(intensity_embeds)
value = fc_value(intensity_embeds)

In [66]:
query, key, value

(tensor([[-0.2857,  0.1889,  0.1489, -0.1565,  0.6605],
         [-0.7395,  0.3893, -0.2984,  0.5636,  0.1867],
         [ 0.2933,  0.4409,  0.6648,  0.0376, -0.2046],
         [-0.3587,  0.2807,  0.6418, -0.2844,  0.5242],
         [-0.1292,  0.7542,  0.8130, -0.6248,  0.0094],
         [-0.3969,  0.5077, -0.5793, -0.1518,  0.3529],
         [-0.5883, -0.1551, -0.3775, -0.0735,  1.2481],
         [-0.3661,  0.1754,  0.0770,  0.2138,  0.4081],
         [-0.7898, -0.5310, -0.7525,  0.5241,  1.4550]],
        grad_fn=<AddmmBackward0>),
 tensor([[-0.6269, -0.3281, -0.1026, -0.1579, -0.1261],
         [ 0.5384, -0.0798,  0.4958,  0.4476,  0.5327],
         [-1.2396, -0.5369,  0.8596, -0.7259, -0.9787],
         [ 0.1533, -0.7948, -0.3024,  0.9275,  0.6377],
         [ 0.0572, -1.3515, -0.3055,  0.5088,  0.3625],
         [-0.5564, -0.7791, -0.0624, -0.2210, -0.7636],
         [-0.5556, -0.1371, -0.4583,  0.2827, -0.1186],
         [-0.2527, -0.3553,  0.3056,  0.3985, -0.0858],
         [-0

In [67]:
n_head = 5
multihead_attn = nn.MultiheadAttention(embed_dim=d, num_heads=n_head)

attn_output, attn_output_weights = multihead_attn(query, key, value)
attn_output, attn_output_weights

(tensor([[-0.0590, -0.0826, -0.1745, -0.0322,  0.2837],
         [-0.0563, -0.0774, -0.1727, -0.0299,  0.2800],
         [-0.0532, -0.0786, -0.1665, -0.0289,  0.2743],
         [-0.0560, -0.0821, -0.1737, -0.0305,  0.2845],
         [-0.0536, -0.0768, -0.1710, -0.0255,  0.2808],
         [-0.0591, -0.0779, -0.1742, -0.0296,  0.2812],
         [-0.0639, -0.0862, -0.1809, -0.0358,  0.2907],
         [-0.0574, -0.0808, -0.1726, -0.0317,  0.2808],
         [-0.0669, -0.0871, -0.1843, -0.0383,  0.2930]],
        grad_fn=<SqueezeBackward1>),
 tensor([[0.1118, 0.1089, 0.1163, 0.1099, 0.1105, 0.1112, 0.1105, 0.1106, 0.1101],
         [0.1113, 0.1164, 0.1178, 0.1079, 0.1084, 0.1099, 0.1073, 0.1120, 0.1090],
         [0.1106, 0.1062, 0.1155, 0.1087, 0.1093, 0.1137, 0.1121, 0.1111, 0.1128],
         [0.1109, 0.1049, 0.1158, 0.1115, 0.1116, 0.1113, 0.1120, 0.1102, 0.1116],
         [0.1102, 0.1015, 0.1222, 0.1076, 0.1093, 0.1139, 0.1117, 0.1105, 0.1132],
         [0.1121, 0.1134, 0.1232, 0.1050, 0

In [68]:
sig_out = torch.flatten(attn_output)
print(sig_out.shape)
sig_out

torch.Size([45])


tensor([-0.0590, -0.0826, -0.1745, -0.0322,  0.2837, -0.0563, -0.0774, -0.1727,
        -0.0299,  0.2800, -0.0532, -0.0786, -0.1665, -0.0289,  0.2743, -0.0560,
        -0.0821, -0.1737, -0.0305,  0.2845, -0.0536, -0.0768, -0.1710, -0.0255,
         0.2808, -0.0591, -0.0779, -0.1742, -0.0296,  0.2812, -0.0639, -0.0862,
        -0.1809, -0.0358,  0.2907, -0.0574, -0.0808, -0.1726, -0.0317,  0.2808,
        -0.0669, -0.0871, -0.1843, -0.0383,  0.2930],
       grad_fn=<ReshapeAliasBackward0>)

Intensity 값도 좀 써보자...

In [198]:
model = CategoricalAttentionModel(dict_category, dim=10, n_head=5, len_seq=train_input[0].shape[0])
loss = nn.BCELoss()
epoch = 1

for i in range(epoch):
    for train_batch, train_batch_intensity, train_batch_gt in zip(*train_input, train_gt):
        train_batch = torch.LongTensor(train_batch)
        train_batch_intensity = torch.FloatTensor(train_batch_intensity)
        
        print(train_batch.shape, train_batch_intensity.shape, train_batch_gt.shape)
        print(train_batch, train_batch_intensity, train_batch_gt)      

        out, attn_output_weight = model(train_batch], [train_batch_intensity])
        out = loss(out, torch.FloatTensor(train_batch_gt))
        out.backward()

TypeError: __init__() got an unexpected keyword argument 'dim'

In [114]:
embeds = nn.Embedding(len(dict_category), 10)
embeds(train_batch)

tensor([[-0.6470, -0.4671, -0.9521, -0.3721,  0.1073, -0.3636, -0.6553, -0.4153,
         -0.2740, -0.1161],
        [-0.8199,  0.6420,  0.0690,  0.2407,  0.5281,  0.5990, -0.5269, -0.0838,
         -1.2944, -2.4451],
        [-0.8439,  0.3972, -0.9228, -1.3569, -0.0828,  1.6367,  1.2470,  1.0412,
          0.3062, -0.2643],
        [-1.4641,  1.1879, -1.0983, -0.3468, -0.4475,  0.1358,  0.5564, -0.7417,
         -0.9949, -0.5689],
        [ 0.6894,  0.8224,  1.6778,  0.9481,  0.4234, -0.5520,  0.4859,  0.1137,
          0.9515, -0.2714],
        [-0.0478, -0.2252,  1.4150,  0.5634, -0.6786,  1.0044,  1.2636,  0.9458,
         -1.3276,  0.1284],
        [-1.3680, -0.8717, -0.9801, -0.0789,  0.1069, -0.2780,  1.3702,  1.5853,
          0.9717, -2.1310],
        [-0.0193,  1.6397, -1.9519, -0.0153, -0.7928,  0.3646, -0.5454, -0.2519,
         -0.9905,  0.0076],
        [-0.6290, -0.5411, -0.0429,  0.4950,  1.3508,  0.0924, -0.9634,  1.6695,
          0.6529, -1.7690]], grad_fn=<Embedding

## Using transformer instead of attention. (positional encoding is included)

In [124]:
transformer_model = nn.Transformer(d_model=10, nhead=5, num_encoder_layers=5)
transformer_model

Transformer(
  (encoder): TransformerEncoder(
    (layers): ModuleList(
      (0): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=10, out_features=10, bias=True)
        )
        (linear1): Linear(in_features=10, out_features=2048, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=2048, out_features=10, bias=True)
        (norm1): LayerNorm((10,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((10,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
      (1): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=10, out_features=10, bias=True)
        )
        (linear1): Linear(in_features=10, out_features=2048, bias=True)
        (dropout): Dropout(p=0.1, inplace=Fal

In [154]:
transformer_model.d_model

10

In [None]:
out = transformer_model(intensity_embeds, intensity_embeds)

In [163]:
len(train_input[0])

4628

Let's train with my own model

In [175]:
from model.attention_model import CategoricalTransformer

d_model = 10
len_seq = len(train_input[0][0])

transformer_model = nn.Transformer(d_model=d_model, nhead=5, num_encoder_layers=5)
model = CategoricalTransformer(transformer_model, dict_category, len_seq=len_seq)
loss = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
epoch = 10

model.train()
for i in range(epoch):
    total_loss = 0
    model.train()
    for train_batch, train_batch_intensity, train_batch_gt in zip(*train_input, train_gt):
        train_batch = torch.LongTensor(train_batch)
        train_batch_intensity = torch.FloatTensor(train_batch_intensity)

        out = model(train_batch, train_batch_intensity)
        loss_val = loss(out, torch.FloatTensor(train_batch_gt))
        total_loss += loss_val
        out.backward()
        optimizer.step()

    
    model.eval()
    total_loss = total_loss / len(train_input)
    print(f"Epoch {i} train_loss:", total_loss)


Epoch 0 train_loss: tensor(15775.6855, grad_fn=<DivBackward0>)
Epoch 1 train_loss: tensor(16750., grad_fn=<DivBackward0>)
Epoch 2 train_loss: tensor(16750., grad_fn=<DivBackward0>)
Epoch 3 train_loss: tensor(16750., grad_fn=<DivBackward0>)
Epoch 4 train_loss: tensor(16750., grad_fn=<DivBackward0>)
Epoch 5 train_loss: tensor(16750., grad_fn=<DivBackward0>)
Epoch 6 train_loss: tensor(16750., grad_fn=<DivBackward0>)
Epoch 7 train_loss: tensor(16750., grad_fn=<DivBackward0>)
Epoch 8 train_loss: tensor(16750., grad_fn=<DivBackward0>)
Epoch 9 train_loss: tensor(16750., grad_fn=<DivBackward0>)


In [176]:
model.eval()

CategoricalTransformer(
  (cat_embeding): Embedding(20, 10)
  (transformer_model): Transformer(
    (encoder): TransformerEncoder(
      (layers): ModuleList(
        (0): TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=10, out_features=10, bias=True)
          )
          (linear1): Linear(in_features=10, out_features=2048, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (linear2): Linear(in_features=2048, out_features=10, bias=True)
          (norm1): LayerNorm((10,), eps=1e-05, elementwise_affine=True)
          (norm2): LayerNorm((10,), eps=1e-05, elementwise_affine=True)
          (dropout1): Dropout(p=0.1, inplace=False)
          (dropout2): Dropout(p=0.1, inplace=False)
        )
        (1): TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=10, out_features=10, bias=True)
          