In [59]:
import pandas as pd
import numpy as np
import string
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize 
import re
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from sklearn.feature_extraction.text import TfidfVectorizer
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
from sklearn.metrics import f1_score

In [2]:
import nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /Users/skwong/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/skwong/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/skwong/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [3]:
disaster = pd.read_csv('nlp-getting-started/train.csv')

In [4]:
disaster.head()

Unnamed: 0,id,keyword,location,text,target
0,1,,,Our Deeds are the Reason of this #earthquake M...,1
1,4,,,Forest fire near La Ronge Sask. Canada,1
2,5,,,All residents asked to 'shelter in place' are ...,1
3,6,,,"13,000 people receive #wildfires evacuation or...",1
4,7,,,Just got sent this photo from Ruby #Alaska as ...,1


> ## add features


> ### cleaning

In [5]:
text_data = np.array(disaster[['id','text']])

In [6]:
lemmatizer = WordNetLemmatizer()
analyser = SentimentIntensityAnalyzer()
stop_words = set(stopwords.words('english')) 

In [7]:
text_data[0:5,:]

array([[1,
        'Our Deeds are the Reason of this #earthquake May ALLAH Forgive us all'],
       [4, 'Forest fire near La Ronge Sask. Canada'],
       [5,
        "All residents asked to 'shelter in place' are being notified by officers. No other evacuation or shelter in place orders are expected"],
       [6,
        '13,000 people receive #wildfires evacuation orders in California '],
       [7,
        'Just got sent this photo from Ruby #Alaska as smoke from #wildfires pours into a school ']],
      dtype=object)

In [8]:
# Removes words that are not needed. Lemmaizes words
def clean_text(arr):
    cleaned_texts = []
    for row in arr:
        text = "".join((char for char in row[1] if char not in string.punctuation))
        text = re.sub(r'^https?:\/\/.*[\r\n]*', '', text, flags=re.MULTILINE)
        text = re.sub(r'^http?:\/\/.*[\r\n]*', '', row[1], flags=re.MULTILINE)
        text = word_tokenize(text)
        text = [lemmatizer.lemmatize(w.lower()) for w in text if not w in stop_words] 
        cleaned_texts.append(text)
        
    return np.array(cleaned_texts)

In [9]:
text_clean = clean_text(text_data)

In [10]:
disaster['clean_text'] = text_clean

> ### sentiment analysis

In [11]:
# Obtains sentiment score
def sentiment(arr):
    scores = []
    for row in arr:
        text = re.sub(r'^https?:\/\/.*[\r\n]*', '', row[1], flags=re.MULTILINE)
        text = re.sub(r'^http?:\/\/.*[\r\n]*', '', row[1], flags=re.MULTILINE)
        scores.append(list(analyser.polarity_scores(text).values()))
    return np.array(scores)

In [12]:
text_sentiment = sentiment(text_data)

In [13]:
positive = [t_s[0] for t_s in text_sentiment]
neutral = [t_s[1] for t_s in text_sentiment]
negative = [t_s[2] for t_s in text_sentiment]
compound = [t_s[3] for t_s in text_sentiment]

In [14]:
disaster['sentiment_score_positive'] = positive
disaster['sentiment_score_neutral'] = neutral
disaster['sentiment_score_negative'] = negative
disaster['sentiment_score_compound'] = compound

In [15]:
disaster.head()

Unnamed: 0,id,keyword,location,text,target,clean_text,sentiment_score_positive,sentiment_score_neutral,sentiment_score_negative,sentiment_score_compound
0,1,,,Our Deeds are the Reason of this #earthquake M...,1,"[our, deed, reason, #, earthquake, may, allah,...",0.0,0.851,0.149,0.2732
1,4,,,Forest fire near La Ronge Sask. Canada,1,"[forest, fire, near, la, ronge, sask, ., canada]",0.286,0.714,0.0,-0.34
2,5,,,All residents asked to 'shelter in place' are ...,1,"[all, resident, asked, 'shelter, place, ', not...",0.095,0.905,0.0,-0.296
3,6,,,"13,000 people receive #wildfires evacuation or...",1,"[13,000, people, receive, #, wildfire, evacuat...",0.0,1.0,0.0,0.0
4,7,,,Just got sent this photo from Ruby #Alaska as ...,1,"[just, got, sent, photo, ruby, #, alaska, smok...",0.0,1.0,0.0,0.0


In [16]:
# word_count
disaster['word_count'] = disaster['clean_text'].apply(lambda x: len(x))
# hashtag_count
disaster['hashtag_count'] = disaster['clean_text'].apply(lambda x: len([c for c in x if c == '#']))
#df_test['hashtag_count'] = df_test['text'].apply(lambda x: len([c for c in str(x) if c == '#']))

# mention_count
disaster['mention_count'] = disaster['clean_text'].apply(lambda x: len([c for c in x if c == '@']))
#df_test['mention_count'] = df_test['text'].apply(lambda x: len([c for c in str(x) if c == '@']))

In [17]:
cleaned_text = text_data

In [18]:
cleaned_text

array([[1,
        'Our Deeds are the Reason of this #earthquake May ALLAH Forgive us all'],
       [4, 'Forest fire near La Ronge Sask. Canada'],
       [5,
        "All residents asked to 'shelter in place' are being notified by officers. No other evacuation or shelter in place orders are expected"],
       ...,
       [10871,
        'M1.94 [01:04 UTC]?5km S of Volcano Hawaii. http://t.co/zDtoyd8EbJ'],
       [10872,
        'Police investigating after an e-bike collided with a car in Little Portugal. E-bike rider suffered serious non-life threatening injuries.'],
       [10873,
        'The Latest: More Homes Razed by Northern California Wildfire - ABC News http://t.co/YmY4rSkQ3d']],
      dtype=object)

In [19]:
def clean_text_tfidf(arr):
    cleaned_texts = []
    for row in arr:
        text = "".join((char for char in row[1] if char not in string.punctuation))
        text = re.sub(r'^https?:\/\/.*[\r\n]*', '', text, flags=re.MULTILINE)
        text = re.sub(r'^http?:\/\/.*[\r\n]*', '', row[1], flags=re.MULTILINE)
        text = word_tokenize(text)
        text = [lemmatizer.lemmatize(w.lower()) for w in text if not w in stop_words]
        text = ' '.join(text)
        text = text.replace('# ','#')
        cleaned_texts.append(text)
        
    return np.array(cleaned_texts)

In [20]:
cleaned_text = clean_text_tfidf(cleaned_text)

In [21]:
cleaned_text

array(['our deed reason #earthquake may allah forgive u',
       'forest fire near la ronge sask . canada',
       "all resident asked 'shelter place ' notified officer . no evacuation shelter place order expected",
       ...,
       'm1.94 [ 01:04 utc ] ? 5km s volcano hawaii . http : //t.co/zdtoyd8ebj',
       'police investigating e-bike collided car little portugal . e-bike rider suffered serious non-life threatening injury .',
       'the latest : more home razed northern california wildfire - abc news http : //t.co/ymy4rskq3d'],
      dtype='<U195')

In [22]:
# put it in tfidf vectorizer
tfidf = TfidfVectorizer(min_df = 5, ngram_range = (2,2))
features = tfidf.fit_transform(cleaned_text)

In [23]:
features_df = pd.DataFrame(features.todense(),columns = tfidf.get_feature_names())

In [24]:
features_df.shape

(7613, 1227)

In [25]:
disaster.reset_index(drop=True, inplace=True)
features_df.reset_index(drop=True, inplace=True)

In [26]:
disaster = pd.concat([disaster,features_df],axis = 1)

In [27]:
disaster.shape

(7613, 1240)

In [28]:
disaster.columns

Index(['id', 'keyword', 'location', 'text', 'target', 'clean_text',
       'sentiment_score_positive', 'sentiment_score_neutral',
       'sentiment_score_negative', 'sentiment_score_compound',
       ...
       'youtube video', 'z10 full', 'û_ http', 'ûª http', 'ûª israel',
       'ûªs first', 'ûªs stock', 'ûªt let', 'ûªve home', 'ûïwhen saw'],
      dtype='object', length=1240)

In [29]:
disaster_drop = disaster.drop(['id','keyword','location','text','clean_text'],axis = 1)

### encode sentence

In [30]:
clean_text_list = []
for t in cleaned_text:
    clean_text_list.extend(t.split(' '))

In [31]:
clean_list = []
for t in cleaned_text:
    lst = t.split(' ')
    clean_list.append(lst)

In [32]:
clean_list[0]

['our', 'deed', 'reason', '#earthquake', 'may', 'allah', 'forgive', 'u']

In [33]:
from collections import Counter
counts = Counter(clean_text_list)

In [34]:
len(counts.keys())

22444

In [35]:
for word in list(counts):
    if counts[word] < 2:
        del counts[word]

In [36]:
len(counts.keys())

6493

In [37]:
vocab2index = {"":0, "UNK":1}
words = ["", "UNK"]
for word in counts:
    vocab2index[word] = len(words)
    words.append(word)

In [38]:
def encode_sentence(text, vocab2index, N=20, padding_start=True):
    # text = clean_list[i]
    enc = np.zeros(N, dtype=np.int32)
    enc1 = np.array([vocab2index.get(w, vocab2index["UNK"]) for w in text])
    l = min(N, len(enc1))
    if padding_start:
        enc[:l] = enc1[:l]
    else:
        enc[N-l:] = enc1[:l]
    return enc, l

In [39]:
encode_sentence(clean_list[0], vocab2index, N=40, padding_start=False)

(array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int32),
 8)

# model

In [40]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import f1_score

In [41]:
X = np.array(disaster_drop.drop('target', axis = 1))
y = np.array(disaster_drop.target)

In [42]:
x_train, x_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [43]:
x_train

array([[0.207, 0.536, 0.257, ..., 0.   , 0.   , 0.   ],
       [0.205, 0.795, 0.   , ..., 0.   , 0.   , 0.   ],
       [0.   , 1.   , 0.   , ..., 0.   , 0.   , 0.   ],
       ...,
       [0.155, 0.845, 0.   , ..., 0.   , 0.   , 0.   ],
       [0.   , 1.   , 0.   , ..., 0.   , 0.   , 0.   ],
       [0.   , 1.   , 0.   , ..., 0.   , 0.   , 0.   ]])

In [44]:
y_train

array([1, 0, 1, ..., 0, 1, 1])

> ## baseline

> ### Creates RandomizedSearch/ GridSearch CV objects

In [45]:
## Defines Dictionaries for possible values to choose from
ada_boost_dict = {'n_estimators': np.linspace(50, 500, num=46, dtype = int), 
                  'learning_rate':np.linspace(0.05,1,num=20), 
                  'algorithm' : ['SAMME', 'SAMME.R']}

log_dict = {'penalty': ['l2','none'], 
            'fit_intercept': [True, False],
           'C': np.linspace(0,5,21)}

rf_dict = {'n_estimators': np.linspace(50, 1000, num=96, dtype = int),
          'criterion':['gini','entropy'],
          'min_samples_split':np.linspace(5,50,11, dtype = int),
          'max_features':['auto','sqrt','log2',None],
          'bootstrap':[True, False]}

In [46]:
adaboost = AdaBoostClassifier()
rf = RandomForestClassifier(n_jobs=-1)
log_reg = LogisticRegression(multi_class = 'ovr', max_iter = 1000)

In [47]:
ada_model = RandomizedSearchCV(adaboost, ada_boost_dict, 60, 
                               random_state = 42, cv = 3)
rf_model = RandomizedSearchCV(rf, rf_dict, 300, 
                               random_state = 42, cv = 3)
log_model = GridSearchCV(log_reg, log_dict, cv = 3)

> ### Finds best parameters for each model

In [None]:
ada_model.fit(x_train, y_train)

In [None]:
rf_model.fit(x_train, y_train)

In [None]:
log_model.fit(x_train, y_train)

In [219]:
best_ada_params = ada_model.best_params_
best_log_params = log_model.best_params_
best_rf_params = rf_model.best_params_

> ### Determines the best model

In [220]:
best_log_reg = LogisticRegression(**best_log_params, multi_class = 'ovr', max_iter = 1000)
best_ada = AdaBoostClassifier(**best_ada_params)
best_rf = RandomForestClassifier(**best_rf_params, n_jobs=-1)

In [221]:
best_rf.fit(x_train, y_train)
best_log_reg.fit(x_train, y_train)
best_ada.fit(x_train, y_train)

AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None, learning_rate=0.6,
                   n_estimators=280, random_state=None)

In [222]:
y_pred_rf = best_rf.predict(x_test)
y_pred_ada = best_ada.predict(x_test)
y_pred_log_reg = best_log_reg.predict(x_test)

In [223]:
f1_score(y_pred_rf, y_test)

0.6352313167259785

In [332]:
sum(y_pred_rf==y_test)/len(y_test)

0.7307944845699278

In [224]:
f1_score(y_pred_ada, y_test)

0.6215722120658136

In [333]:
sum(y_pred_ada==y_test)/len(y_test)

0.7281680892974393

In [225]:
f1_score(y_pred_log_reg, y_test)

0.6272401433691756

In [334]:
sum(y_pred_log_reg==y_test)/len(y_test)

0.726854891661195

## RNN model

### dataset & model

In [49]:
class DisasterDataset(Dataset):
    def __init__(self, clean_list, y, padding_start=True, N=40):
        self.X1 = [encode_sentence(c_l, vocab2index, N=40, padding_start=False) for c_l in clean_list]
        self.y = y
    
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, idx):
        x1,s = self.X1[idx]
        return x1, s, self.y[idx]
    
train_ds = DisasterDataset(clean_list, y_train)
valid_ds = DisasterDataset(clean_list, y_val)

In [50]:
b_size = 100
train_dl = DataLoader(train_ds, shuffle=True, batch_size=b_size)
valid_dl = DataLoader(valid_ds, batch_size=b_size)

In [51]:
x1, s, y = next(iter(train_dl))

In [52]:
x1.shape

torch.Size([100, 40])

In [53]:
s.shape

torch.Size([100])

In [54]:
def update_optimizer(optimizer, lr):
    for i, param_group in enumerate(optimizer.param_groups):
        param_group["lr"] = lr

In [55]:
class GRUModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim):
        super(GRUModel, self).__init__() 
        self.hidden_dim = hidden_dim
        self.embeddings = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
    
        self.gru = nn.GRU(embedding_dim, hidden_dim, batch_first=True)
        self.linear = nn.Linear(hidden_dim, 1)
        self.dropout = nn.Dropout(0.5)
        
    def forward(self, x1):
        x = self.embeddings(x1)
        x = self.dropout(x)
        out, h = self.gru(x)
        return self.linear(h[-1])
    

class LSTMModel(torch.nn.Module) :
    def __init__(self, vocab_size, embedding_dim, hidden_dim) :
        super(LSTMModel,self).__init__()
        self.hidden_dim = hidden_dim
        self.dropout = nn.Dropout(0.5)
        self.embeddings = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.linear = nn.Linear(hidden_dim, 1)
        
    def forward(self, x, s):
        # sorting
        s, sort_index = torch.sort(s, 0,descending=True)
        s = s.numpy().tolist()
        x = x[sort_index]
        x = self.embeddings(x)
        x = self.dropout(x)
        x_pack = pack_padded_sequence(x, s, batch_first=True)
        out_pack, (ht, ct) = self.lstm(x_pack)
        out = self.linear(ht[-1])
        return torch.zeros_like(out).scatter_(0, sort_index.unsqueeze(1).cuda(), out)

In [56]:
device = torch.device("cuda:6" if torch.cuda.is_available() else "cpu")

In [66]:
def train_epocs_gru(model, optimizer, train_dl, val_dl, epochs=10):
    for i in range(epochs):
        model.train()
        sum_loss = 0.0
        total = 0
        for x1, s, y in train_dl:
            x1 = x1.long()#.cuda()
            y = y.float()#.cuda()
            y_pred = model(x1)
            optimizer.zero_grad()
            loss = F.binary_cross_entropy_with_logits(y_pred, y.unsqueeze(1))
            loss.backward()
            optimizer.step()
            sum_loss += loss.item()*y.shape[0]
            total += y.shape[0]
        val_loss, val_acc, val_f1 = val_metrics_gru(model, val_dl)
        if i % 5 == 1:
            print("train loss %.3f val loss %.3f val accuracy %.3f and val f1 %.3f" % (sum_loss/total, val_loss, val_acc, val_f1))

In [67]:
def val_metrics_gru(model, valid_dl):
    model.eval()
    correct = 0
    total = 0
    sum_loss = 0.0
    y_pred_all = []
    y_all = []
    for x1, s, y in train_dl:
        x1 = x1.long()#.cuda()
        y = y.float().unsqueeze(1)#.cuda()
        y_hat = model(x1)
        loss = F.binary_cross_entropy_with_logits(y_hat, y)
        y_pred = y_hat > 0
        y_pred_all.extend(y_pred)
        correct += (y_pred.float() == y).float().sum()
        y_all.extend(y)
        total += y.shape[0]
        sum_loss += loss.item()*y.shape[0]
    return sum_loss/total, correct/total, f1_score(y_pred_all, y_all)

In [68]:
vocab_size = len(words)
print(vocab_size)

gru_model = GRUModel(vocab_size, 50,50)#.cuda()

parameters = filter(lambda p: p.requires_grad, gru_model.parameters())
optimizer = torch.optim.Adam(parameters, lr=0.01)

6495


In [69]:
train_epocs_gru(gru_model, optimizer, train_dl, valid_dl, epochs=30)

train loss 0.681 val loss 0.661 val accuracy 0.619 and val f1 0.348
train loss 0.559 val loss 0.448 val accuracy 0.805 and val f1 0.747
train loss 0.451 val loss 0.317 val accuracy 0.866 and val f1 0.837
train loss 0.396 val loss 0.275 val accuracy 0.885 and val f1 0.860
train loss 0.361 val loss 0.240 val accuracy 0.898 and val f1 0.881
train loss 0.344 val loss 0.230 val accuracy 0.906 and val f1 0.891


In [70]:
update_optimizer(optimizer, lr=0.001)
train_epocs_gru(gru_model, optimizer, train_dl, valid_dl, epochs=30)

train loss 0.311 val loss 0.213 val accuracy 0.911 and val f1 0.895
train loss 0.307 val loss 0.208 val accuracy 0.915 and val f1 0.900
train loss 0.300 val loss 0.201 val accuracy 0.915 and val f1 0.900
train loss 0.293 val loss 0.197 val accuracy 0.915 and val f1 0.900
train loss 0.300 val loss 0.193 val accuracy 0.919 and val f1 0.904
train loss 0.287 val loss 0.190 val accuracy 0.918 and val f1 0.903


In [71]:
update_optimizer(optimizer, lr=0.001)
train_epocs_gru(gru_model, optimizer, train_dl, valid_dl, epochs=30)

train loss 0.288 val loss 0.187 val accuracy 0.919 and val f1 0.904
train loss 0.281 val loss 0.185 val accuracy 0.919 and val f1 0.905
train loss 0.279 val loss 0.182 val accuracy 0.921 and val f1 0.906
train loss 0.279 val loss 0.178 val accuracy 0.922 and val f1 0.909
train loss 0.280 val loss 0.176 val accuracy 0.924 and val f1 0.910
train loss 0.268 val loss 0.173 val accuracy 0.926 and val f1 0.913


In [72]:
update_optimizer(optimizer, lr=0.001)
train_epocs_gru(gru_model, optimizer, train_dl, valid_dl, epochs=30)

train loss 0.268 val loss 0.171 val accuracy 0.926 and val f1 0.913
train loss 0.265 val loss 0.168 val accuracy 0.926 and val f1 0.913
train loss 0.272 val loss 0.167 val accuracy 0.926 and val f1 0.913
train loss 0.262 val loss 0.165 val accuracy 0.928 and val f1 0.915
train loss 0.257 val loss 0.164 val accuracy 0.927 and val f1 0.914
train loss 0.259 val loss 0.163 val accuracy 0.928 and val f1 0.915


In [73]:
update_optimizer(optimizer, lr=0.001)
train_epocs_gru(gru_model, optimizer, train_dl, valid_dl, epochs=30)

train loss 0.252 val loss 0.160 val accuracy 0.930 and val f1 0.917
train loss 0.258 val loss 0.158 val accuracy 0.931 and val f1 0.918
train loss 0.250 val loss 0.156 val accuracy 0.930 and val f1 0.917
train loss 0.251 val loss 0.154 val accuracy 0.933 and val f1 0.921
train loss 0.241 val loss 0.152 val accuracy 0.934 and val f1 0.922
train loss 0.252 val loss 0.151 val accuracy 0.934 and val f1 0.921


In [74]:
update_optimizer(optimizer, lr=0.001)
train_epocs_gru(gru_model, optimizer, train_dl, valid_dl, epochs=30)

train loss 0.240 val loss 0.150 val accuracy 0.933 and val f1 0.921
train loss 0.242 val loss 0.148 val accuracy 0.934 and val f1 0.922
train loss 0.247 val loss 0.147 val accuracy 0.935 and val f1 0.924
train loss 0.237 val loss 0.147 val accuracy 0.935 and val f1 0.924
train loss 0.242 val loss 0.144 val accuracy 0.936 and val f1 0.925
train loss 0.234 val loss 0.141 val accuracy 0.938 and val f1 0.926


In [75]:
update_optimizer(optimizer, lr=0.001)
train_epocs_gru(gru_model, optimizer, train_dl, valid_dl, epochs=30)

train loss 0.231 val loss 0.142 val accuracy 0.940 and val f1 0.929
train loss 0.236 val loss 0.141 val accuracy 0.937 and val f1 0.926
train loss 0.223 val loss 0.139 val accuracy 0.939 and val f1 0.928
train loss 0.229 val loss 0.138 val accuracy 0.939 and val f1 0.928
train loss 0.225 val loss 0.136 val accuracy 0.939 and val f1 0.928
train loss 0.215 val loss 0.134 val accuracy 0.941 and val f1 0.930


In [76]:
update_optimizer(optimizer, lr=0.001)
train_epocs_gru(gru_model, optimizer, train_dl, valid_dl, epochs=30)

train loss 0.220 val loss 0.133 val accuracy 0.941 and val f1 0.930
train loss 0.216 val loss 0.132 val accuracy 0.940 and val f1 0.929
train loss 0.230 val loss 0.131 val accuracy 0.941 and val f1 0.930
train loss 0.220 val loss 0.130 val accuracy 0.941 and val f1 0.931
train loss 0.221 val loss 0.130 val accuracy 0.940 and val f1 0.929
train loss 0.217 val loss 0.129 val accuracy 0.942 and val f1 0.931


In [77]:
update_optimizer(optimizer, lr=0.001)
train_epocs_gru(gru_model, optimizer, train_dl, valid_dl, epochs=30)

train loss 0.219 val loss 0.127 val accuracy 0.942 and val f1 0.931
train loss 0.214 val loss 0.126 val accuracy 0.942 and val f1 0.931
train loss 0.212 val loss 0.126 val accuracy 0.942 and val f1 0.932
train loss 0.209 val loss 0.125 val accuracy 0.943 and val f1 0.933
train loss 0.205 val loss 0.125 val accuracy 0.943 and val f1 0.932
train loss 0.212 val loss 0.125 val accuracy 0.943 and val f1 0.932


In [78]:
update_optimizer(optimizer, lr=0.001)
train_epocs_gru(gru_model, optimizer, train_dl, valid_dl, epochs=30)

train loss 0.206 val loss 0.123 val accuracy 0.944 and val f1 0.934
train loss 0.213 val loss 0.122 val accuracy 0.943 and val f1 0.933
train loss 0.194 val loss 0.122 val accuracy 0.943 and val f1 0.933
train loss 0.198 val loss 0.121 val accuracy 0.943 and val f1 0.932
train loss 0.202 val loss 0.120 val accuracy 0.944 and val f1 0.934
train loss 0.203 val loss 0.119 val accuracy 0.944 and val f1 0.934


In [79]:
update_optimizer(optimizer, lr=0.001)
train_epocs_gru(gru_model, optimizer, train_dl, valid_dl, epochs=30)

train loss 0.214 val loss 0.120 val accuracy 0.944 and val f1 0.934
train loss 0.202 val loss 0.118 val accuracy 0.944 and val f1 0.934
train loss 0.196 val loss 0.119 val accuracy 0.945 and val f1 0.935
train loss 0.202 val loss 0.117 val accuracy 0.944 and val f1 0.935
train loss 0.192 val loss 0.117 val accuracy 0.945 and val f1 0.935
train loss 0.186 val loss 0.116 val accuracy 0.946 and val f1 0.936


In [80]:
update_optimizer(optimizer, lr=0.001)
train_epocs_gru(gru_model, optimizer, train_dl, valid_dl, epochs=30)

train loss 0.197 val loss 0.115 val accuracy 0.946 and val f1 0.937
train loss 0.203 val loss 0.114 val accuracy 0.946 and val f1 0.937
train loss 0.188 val loss 0.114 val accuracy 0.946 and val f1 0.937
train loss 0.195 val loss 0.114 val accuracy 0.946 and val f1 0.937
train loss 0.190 val loss 0.113 val accuracy 0.946 and val f1 0.936
train loss 0.191 val loss 0.113 val accuracy 0.946 and val f1 0.936


In [81]:
update_optimizer(optimizer, lr=0.001)
train_epocs_gru(gru_model, optimizer, train_dl, valid_dl, epochs=30)

train loss 0.187 val loss 0.113 val accuracy 0.946 and val f1 0.936
train loss 0.189 val loss 0.112 val accuracy 0.947 and val f1 0.937
train loss 0.187 val loss 0.111 val accuracy 0.946 and val f1 0.937
train loss 0.183 val loss 0.110 val accuracy 0.947 and val f1 0.937
train loss 0.183 val loss 0.110 val accuracy 0.947 and val f1 0.937
train loss 0.185 val loss 0.110 val accuracy 0.946 and val f1 0.938


In [82]:
update_optimizer(optimizer, lr=0.001)
train_epocs_gru(gru_model, optimizer, train_dl, valid_dl, epochs=30)

train loss 0.183 val loss 0.109 val accuracy 0.948 and val f1 0.939
train loss 0.178 val loss 0.109 val accuracy 0.948 and val f1 0.939
train loss 0.173 val loss 0.109 val accuracy 0.947 and val f1 0.939
train loss 0.180 val loss 0.108 val accuracy 0.948 and val f1 0.939
train loss 0.185 val loss 0.108 val accuracy 0.947 and val f1 0.938
train loss 0.170 val loss 0.108 val accuracy 0.949 and val f1 0.939


In [83]:
update_optimizer(optimizer, lr=0.001)
train_epocs_gru(gru_model, optimizer, train_dl, valid_dl, epochs=30)

train loss 0.182 val loss 0.107 val accuracy 0.949 and val f1 0.940
train loss 0.177 val loss 0.107 val accuracy 0.948 and val f1 0.939
train loss 0.166 val loss 0.106 val accuracy 0.948 and val f1 0.939
train loss 0.172 val loss 0.106 val accuracy 0.948 and val f1 0.940
train loss 0.174 val loss 0.106 val accuracy 0.949 and val f1 0.940
train loss 0.171 val loss 0.105 val accuracy 0.948 and val f1 0.940


In [84]:
update_optimizer(optimizer, lr=0.001)
train_epocs_gru(gru_model, optimizer, train_dl, valid_dl, epochs=30)

train loss 0.172 val loss 0.104 val accuracy 0.949 and val f1 0.940
train loss 0.170 val loss 0.104 val accuracy 0.949 and val f1 0.940
train loss 0.173 val loss 0.104 val accuracy 0.949 and val f1 0.940
train loss 0.170 val loss 0.104 val accuracy 0.949 and val f1 0.940
train loss 0.158 val loss 0.104 val accuracy 0.948 and val f1 0.940
train loss 0.165 val loss 0.104 val accuracy 0.949 and val f1 0.939


In [85]:
update_optimizer(optimizer, lr=0.001)
train_epocs_gru(gru_model, optimizer, train_dl, valid_dl, epochs=30)

train loss 0.165 val loss 0.103 val accuracy 0.948 and val f1 0.940
train loss 0.171 val loss 0.103 val accuracy 0.949 and val f1 0.940
train loss 0.160 val loss 0.104 val accuracy 0.949 and val f1 0.940
train loss 0.161 val loss 0.102 val accuracy 0.950 and val f1 0.941
train loss 0.162 val loss 0.103 val accuracy 0.949 and val f1 0.941
train loss 0.166 val loss 0.103 val accuracy 0.949 and val f1 0.940


In [86]:
update_optimizer(optimizer, lr=0.001)
train_epocs_gru(gru_model, optimizer, train_dl, valid_dl, epochs=30)

train loss 0.166 val loss 0.102 val accuracy 0.950 and val f1 0.941
train loss 0.161 val loss 0.101 val accuracy 0.950 and val f1 0.941
train loss 0.161 val loss 0.101 val accuracy 0.949 and val f1 0.940
train loss 0.168 val loss 0.100 val accuracy 0.950 and val f1 0.941
train loss 0.156 val loss 0.100 val accuracy 0.950 and val f1 0.942
train loss 0.159 val loss 0.100 val accuracy 0.950 and val f1 0.942
