In [1]:
import numpy as np
from hmm_utils import HMM
from params import *

import numpy as np  # linear algebra
import pandas as pd  # data processing, CSV file I/O (e.g. pd.read_csv)
from tqdm import tqdm
import random

#some other libraries
import re
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')

from typing import List

from sklearn.model_selection import GroupShuffleSplit
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_score, recall_score, \
    f1_score, roc_auc_score

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/roi.naveiro/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


## Perprocessing

In [2]:
data = pd.read_csv("data/ner.csv", encoding = "latin1")
data = data.fillna(method="ffill")
data = data.rename(columns={'Sentence #': 'sentence'})
data.head(5)

  data = data.fillna(method="ffill")


Unnamed: 0,sentence,Word,POS,Tag
0,Sentence: 1,Thousands,NNS,O
1,Sentence: 1,of,IN,O
2,Sentence: 1,demonstrators,NNS,O
3,Sentence: 1,have,VBP,O
4,Sentence: 1,marched,VBN,O


In [3]:
def pre_processing(text_column):
    # lowercase all text in the column
    text_column = text_column.str.lower()

    # replacing numbers with NUM token
    text_column = text_column.str.replace(r'\d+', 'NUM')

    # removing stopwords
    stop_words = set(stopwords.words('english'))
    text_column = text_column.apply(lambda x: ' '.join([word for word in x.split() if word not in stop_words]))

    return text_column

data_pre_precessed = pre_processing(data.Word)
#creating new dataframe with preprocessed word as a column
data_processed = data
data_processed['Word'] = data_pre_precessed

#removing the rows where word is empty
data_processed = data_processed[(data_processed['Word'] != '') | (data_processed['Word'].isna())]

In [4]:
data_processed

Unnamed: 0,sentence,Word,POS,Tag
0,Sentence: 1,thousands,NNS,O
2,Sentence: 1,demonstrators,NNS,O
4,Sentence: 1,marched,VBN,O
6,Sentence: 1,london,NNP,B-geo
8,Sentence: 1,protest,VB,O
...,...,...,...,...
1048567,Sentence: 47959,indian,JJ,B-gpe
1048568,Sentence: 47959,forces,NNS,O
1048569,Sentence: 47959,said,VBD,O
1048571,Sentence: 47959,responded,VBD,O


# Select most common words 

In [5]:
# Most common words
N_w = 300
common_words = data_processed['Word'].value_counts().sort_values(ascending=False)[:N_w].index
data_reduced = data_processed[data_processed['Word'].isin(common_words)]

tags = list(set(data_reduced.POS.values))  # Unique POS tags in the dataset
words = list(set(data_reduced.Word.values))  # Unique words in the dataset
len(tags), len(words)

tags = np.sort(tags)
tags

array(['$', ',', '.', ':', 'CC', 'CD', 'DT', 'IN', 'JJ', 'JJR', 'JJS',
       'LRB', 'MD', 'NN', 'NNP', 'NNPS', 'NNS', 'POS', 'PRP', 'RB', 'RBR',
       'RRB', 'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ', '``'], dtype='<U4')

In [6]:
# Convert words and tags into numbers
word2id = {w: i for i, w in enumerate(words)}
tag2id = {t: i for i, t in enumerate(tags)}
id2tag = {i: t for i, t in enumerate(tags)}
len(tags), len(words)
id2word = {}
for key in word2id:
    id2word[word2id[key]] = key


In [7]:
tag2id

{'$': 0,
 ',': 1,
 '.': 2,
 ':': 3,
 'CC': 4,
 'CD': 5,
 'DT': 6,
 'IN': 7,
 'JJ': 8,
 'JJR': 9,
 'JJS': 10,
 'LRB': 11,
 'MD': 12,
 'NN': 13,
 'NNP': 14,
 'NNPS': 15,
 'NNS': 16,
 'POS': 17,
 'PRP': 18,
 'RB': 19,
 'RBR': 20,
 'RRB': 21,
 'VB': 22,
 'VBD': 23,
 'VBG': 24,
 'VBN': 25,
 'VBP': 26,
 'VBZ': 27,
 '``': 28}

In [8]:
attack_tag = {
    0  : 1,
    1  : 2,
    2  : 3,
    4  : 6,
    5  : 13,
    6  : 4,
    7  : 6,
    8  : 9,
    9  : 10,
    10 : 9,
    11 : 19,
    12 : 22,
    13 : 16,
    14 : 15,
    15 : 14,
    16 : 13,
    17 : 17,
    18 : 19,
    19 : 20,
    20 : 21,
    21 : 20,
    22 : 23,
    23 : 25,
    24 : 22,
    25 : 23,
    26 : 27,
    27 : 26,
    28 : 2 
}

attack_tag = {
    0  : 0,
    1  : 1,
    2  : 1,
    3  : 3,
    4  : 4,
    5  : 5,
    6  : 6,
    7  : 7,
    8  : 8,
    9  : 9,
    10 : 10,
    11 : 11,
    12 : 12,
    13 : 16,
    14 : 16,
    15 : 15,
    16 : 16,
    17 : 17,
    18 : 18,
    19 : 19,
    20 : 20,
    21 : 21,
    22 : 22,
    23 : 23,
    24 : 24,
    25 : 25,
    26 : 26,
    27 : 27,
    28 : 2 
}

def seq2word(X):
    l = []
    for i in range(len(X)):
        l.append(id2word[X[i]])
    return l

def attack_seq(X):
    l = []
    for i in range(len(X)):
        l.append(attack_tag[X[i]])
    return l

In [9]:
data_reduced[data_reduced.sentence == "Sentence: 44516"]

Unnamed: 0,sentence,Word,POS,Tag
973508,Sentence: 44516,iranian,JJ,B-gpe
973509,Sentence: 44516,foreign,JJ,O
973510,Sentence: 44516,minister,NN,O
973513,Sentence: 44516,",",",",O
973514,Sentence: 44516,left,VBD,O
973515,Sentence: 44516,",",",",O
973518,Sentence: 44516,iraqi,JJ,B-gpe
973519,Sentence: 44516,foreign,NNP,O
973520,Sentence: 44516,minister,NNP,O
973524,Sentence: 44516,foreign,NNP,B-org


In [10]:
data_reduced.shape[0]

329086

In [11]:
tag2id

{'$': 0,
 ',': 1,
 '.': 2,
 ':': 3,
 'CC': 4,
 'CD': 5,
 'DT': 6,
 'IN': 7,
 'JJ': 8,
 'JJR': 9,
 'JJS': 10,
 'LRB': 11,
 'MD': 12,
 'NN': 13,
 'NNP': 14,
 'NNPS': 15,
 'NNS': 16,
 'POS': 17,
 'PRP': 18,
 'RB': 19,
 'RBR': 20,
 'RRB': 21,
 'VB': 22,
 'VBD': 23,
 'VBG': 24,
 'VBN': 25,
 'VBP': 26,
 'VBZ': 27,
 '``': 28}

## Create HMM manually

In [12]:
count_tags = dict(data_reduced.POS.value_counts())  # Total number of POS tags in the dataset
# Now let's create the tags to words count
count_tags_to_words = data_reduced.groupby(['POS']).apply(
    lambda grp: grp.groupby('Word')['POS'].count().to_dict()).to_dict()
# We shall also collect the counts for the first tags in the sentence
count_init_tags = dict(data_reduced.groupby('sentence').first().POS.value_counts())

# Create a mapping that stores the frequency of transitions in tags to it's next tags
count_tags_to_next_tags = np.zeros((len(tags), len(tags)), dtype=int)
sentences = list(data_reduced.sentence)
pos = list(data_reduced.POS)
for i in tqdm(range(len(sentences)), position=0, leave=True):
    if (i > 0) and (sentences[i] == sentences[i - 1]):
        prevtagid = tag2id[pos[i - 1]]
        nexttagid = tag2id[pos[i]]
        count_tags_to_next_tags[prevtagid][nexttagid] += 1

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 329086/329086 [00:00<00:00, 1576555.24it/s]


In [13]:
startprob = np.zeros((len(tags),))
transmat = np.zeros((len(tags), len(tags)))
emissionprob = np.zeros((len(tags), len(words)))
num_sentences = sum(count_init_tags.values())
sum_tags_to_next_tags = np.sum(count_tags_to_next_tags, axis=1)
for tag, tagid in tqdm(tag2id.items(), position=0, leave=True):
    floatCountTag = float(count_tags.get(tag, 0))
    startprob[tagid] = count_init_tags.get(tag, 0) / num_sentences
    for word, wordid in word2id.items():
        emissionprob[tagid][wordid] = count_tags_to_words.get(tag, {}).get(word, 0) / floatCountTag
    for tag2, tagid2 in tag2id.items():
        transmat[tagid][tagid2] = count_tags_to_next_tags[tagid][tagid2] / sum_tags_to_next_tags[tagid]

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 6771.03it/s]


# Build HMM

Include cutoff for probabilities equal to 0

In [14]:
cutoff = 0.001
startprob = startprob +  cutoff
startprob = startprob/ np.sum(startprob)
##
transmat =  transmat + cutoff
transmat = transmat / np.sum(transmat, axis=1)
##
emissionprob =  emissionprob + cutoff
emissionprob = emissionprob / np.sum(emissionprob, axis=1).reshape(-1,1)

In [15]:
hmm_n = HMM(len(tags), len(words))
hmm_n.startprob_ = startprob
hmm_n.transmat_ = transmat
hmm_n.emissionprob_ = emissionprob

# Attack HMM - APS

In [55]:
from solvers.aps_gibbs_class import aps_gibbs
from attackers.decoding_attacker import dec_attacker

In [56]:
T  = 53
n_obs = 300
n_hidden = 29
w1 = 3.0
w2 = 10.0 
k_value = 1000000.0
cool = np.arange(500,501, 1)
seq = 19*np.ones(T).astype(int)
X = np.zeros( (1, len(data_reduced.Word.values[:T])) )
for i in range(len(data_reduced.Word.values[:T])):
    X[0, i] = word2id[data_reduced.Word.values[i]]

###
X = np.zeros( (1, len(data_reduced.Word.values[:T])) )
for i in range(len(data_reduced.Word.values[:T])):
    X[0, i] = word2id[data_reduced.Word.values[i]]
    
A =  X.astype(int)

_, y_pred = hmm_n.nu(A[0])
y_pred

y = np.zeros(len(data_reduced.POS[:T]))
for i in range(len(data_reduced.POS[:T])):
    y[i] = tag2id[data_reduced.POS[:T].iloc[i]]
###

A =  X.astype(int)


In [84]:
rho_probs = np.ones(n_obs)
att = dec_attacker(hmm_n.startprob_ , hmm_n.transmat_, hmm_n.emissionprob_, rho_probs,
         A.T, w1, w2, seq, k_value)

In [None]:
find_sol = aps_gibbs(att, cool, burnin=0.1, verbose=True)
sol, samples = find_sol.iterate(simulation_seconds=None)

Percentage completed: 0.0
Current state [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18


In [None]:
attack_obs = att.attack_X(np.ones_like(sol), sol)
attack_obs = attack_obs.squeeze().astype(int)
V, seq = hmm_n.nu(attack_obs)
probs, opt_s = hmm_n.nu(seq)

In [None]:
opt_s

In [None]:
np.argmax(sol, axis=1)

In [None]:
A[0] == np.argmax(sol, axis=1)

In [48]:
#word2id

# Attack HMM - RS

In [20]:
from solvers.nn_RS.nn_RS import nn_RS
from attackers.decoding_attacker import dec_attacker
from attackers.smoothing_distribution_attacker import sd_attacker

In [17]:
T  = 53
n_obs = 300
n_hidden = 29
w1 = 1.0
w2 = 5.0 
k_value = 1000000.0

###
X = np.zeros( (1, len(data_reduced.Word.values[:T])) )
for i in range(len(data_reduced.Word.values[:T])):
    X[0, i] = word2id[data_reduced.Word.values[i]]
    
A =  X.astype(int)

_, y_pred = hmm_n.nu(A[0])
y_pred

y = np.zeros(len(data_reduced.POS[:T]))
for i in range(len(data_reduced.POS[:T])):
    y[i] = tag2id[data_reduced.POS[:T].iloc[i]]
###

seq = attack_seq(y_pred.astype(int))


In [22]:
sentence_num = 41785
sentence = f'Sentence: {sentence_num}'

sentence = data_reduced[data_reduced.sentence == sentence]

X = np.zeros( (1, sentence.shape[0] ) )
for i in range(sentence.shape[0]):
    X[0, i] = word2id[sentence.Word.values[i]]
    
X =  X.astype(int)

y = np.zeros(len(sentence.POS))
for i in range(len(sentence.POS)):
    y[i] = tag2id[sentence.POS.iloc[i]]
###


In [25]:
alpha = hmm_n.alpha(X)
beta = hmm_n.beta(X)
p  = np.exp(alpha[5 -1] + beta[5 -1])
p /= np.sum(p)
p


array([1.91835860e-03, 4.99946951e-02, 2.60585458e-02, 1.66899464e-03,
       9.46369609e-05, 4.80658030e-01, 1.61218995e-03, 9.17483031e-03,
       6.71247815e-02, 9.96410622e-04, 1.50725822e-03, 7.33856177e-04,
       4.24548650e-03, 6.63889142e-02, 6.82515287e-02, 1.29038951e-03,
       3.01499354e-02, 1.65189964e-02, 8.56529463e-04, 9.48547154e-03,
       3.72914365e-03, 1.09363482e-03, 7.55671906e-03, 2.99775799e-02,
       6.22199249e-03, 3.24691500e-02, 4.53655719e-02, 5.14016009e-03,
       2.97162088e-02])

In [28]:
hmm_n.smoothing(X, 5)

array([1.91835860e-03, 4.99946951e-02, 2.60585458e-02, 1.66899464e-03,
       9.46369609e-05, 4.80658030e-01, 1.61218995e-03, 9.17483031e-03,
       6.71247815e-02, 9.96410622e-04, 1.50725822e-03, 7.33856177e-04,
       4.24548650e-03, 6.63889142e-02, 6.82515287e-02, 1.29038951e-03,
       3.01499354e-02, 1.65189964e-02, 8.56529463e-04, 9.48547154e-03,
       3.72914365e-03, 1.09363482e-03, 7.55671906e-03, 2.99775799e-02,
       6.22199249e-03, 3.24691500e-02, 4.53655719e-02, 5.14016009e-03,
       2.97162088e-02])

In [26]:
np.argsort(p)

array([ 4, 11, 18,  9, 21, 15, 10,  6,  3,  0, 20, 12, 27, 24, 22,  7, 19,
       17,  2, 28, 23, 16, 25, 26,  1, 13,  8, 14,  5])

In [18]:
rho_probs = np.ones(n_obs)
att = dec_attacker(hmm_n.startprob_ , hmm_n.transmat_, hmm_n.emissionprob_, rho_probs,
         A.T, w1, w2, seq, k_value)

att = sd_attacker(hmm_n.startprob_ , hmm_n.transmat_, hmm_n.emissionprob_, rho_probs,
         A.T, w1, w2, 5, k_value)
    

In [19]:
find_sol = nn_RS(att, "SA", RS_iters=500, mcts_iters=10, sa_iters=10, eps=0.05, lr=0.005, verbose=True)
sol, samples = find_sol.iterate(simulation_seconds=None)

Percentage completed: 0.0
Best value: 
0.0


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(inpu

Percentage completed: 10.0
Best value: 
14.09450781863437


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(inpu

Percentage completed: 20.0
Best value: 
14.09450781863437


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(inpu

Percentage completed: 30.0
Best value: 
14.09450781863437


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(inpu

Percentage completed: 40.0
Best value: 
14.09450781863437


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(inpu

Percentage completed: 50.0
Best value: 
14.09450781863437


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(inpu

Percentage completed: 60.0
Best value: 
14.09450781863437


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(inpu

Percentage completed: 70.0
Best value: 
14.09450781863437


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(inpu

Percentage completed: 80.0
Best value: 
14.09450781863437


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(inpu

Percentage completed: 90.0
Best value: 
14.09450781863437


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(inpu

In [24]:
sol

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [25]:
attack_obs = att.attack_X(np.ones_like(sol), sol)
attack_obs = attack_obs.squeeze().astype(int)

In [30]:
hmm_n.smoothing(attack_obs.reshape(1,-1), 5)

array([6.60980283e-04, 1.99465406e-02, 7.98138015e-03, 6.50363229e-04,
       2.13736487e-05, 7.71485770e-01, 1.00980922e-03, 3.85471669e-03,
       3.28179272e-02, 3.96347496e-04, 2.15328400e-03, 3.06043859e-04,
       2.00914082e-03, 3.79429044e-02, 4.65739944e-02, 6.19203333e-04,
       1.75700119e-02, 7.11977501e-03, 4.09613692e-04, 2.54373160e-03,
       1.35880583e-03, 5.10853196e-04, 3.97365406e-03, 6.35236293e-03,
       2.34407179e-03, 5.13595689e-03, 1.55342783e-03, 2.68318880e-03,
       2.00147670e-02])

In [34]:
attack_obs.reshape(1,-1)

array([[105,  35,  33, 117, 165, 144, 245,  61, 109, 137, 235, 291, 157,
        110, 144, 235,  98,  87, 225, 257,  47, 233, 136, 194,  93, 242,
        296,  81, 158, 145, 130, 148, 142, 271, 182,  72, 159,  28, 281,
        170, 206, 248, 266,  70, 114,  11, 266, 223,  13,  63,   2, 214,
         17]])

In [36]:
A

array([[158, 238,  28,  78,  50,  18,  46,  61, 179, 174,   5,  47, 230,
        136, 174, 174,  46, 174, 118,  46,  30,  47,  46, 145,  93, 291,
        289,  46, 291, 145,  93,  28,  78,  50,  18,  46, 105, 260, 193,
         27, 193, 193,  46, 108, 162,   3, 119,  56, 207,  63,   0,  93,
         46]])

In [37]:
hmm_n.smoothing(A, 5)

array([1.95265954e-04, 1.58508581e-02, 4.22737750e-03, 5.25506519e-04,
       3.12823896e-06, 3.84214082e-03, 6.99572189e-04, 2.62455714e-03,
       3.42877388e-02, 3.06147535e-04, 2.01439976e-04, 2.60609499e-04,
       1.30551098e-03, 4.45448136e-02, 2.36880327e-02, 9.12231571e-04,
       8.23519128e-01, 9.13458126e-03, 4.31791178e-04, 1.45369164e-03,
       1.84594547e-03, 4.96808084e-04, 2.02307444e-03, 5.27378624e-03,
       1.39468159e-03, 4.27146503e-03, 2.52350055e-03, 2.99047215e-03,
       1.11661430e-02])

In [26]:
attack_obs

array([105,  35,  33, 117, 165, 144, 245,  61, 109, 137, 235, 291, 157,
       110, 144, 235,  98,  87, 225, 257,  47, 233, 136, 194,  93, 242,
       296,  81, 158, 145, 130, 148, 142, 271, 182,  72, 159,  28, 281,
       170, 206, 248, 266,  70, 114,  11, 266, 223,  13,  63,   2, 214,
        17])

In [50]:
np.mean(att_seq == seq)

0.2641509433962264

In [36]:
y_pred

array([16, 13, 14,  8, 16, 13,  2, 28,  2, 28, 14, 13,  5,  2, 28, 28,  2,
       28, 13,  2, 28, 13,  2, 28, 17, 13,  8,  2, 28, 14, 17, 14,  8, 16,
       13,  2, 28, 23,  1, 24,  1,  1,  2, 28, 13, 13,  8, 13, 16, 14, 14,
       17,  2])

In [37]:
y

array([16., 13., 14.,  8., 16., 13.,  2., 16., 25., 28., 14., 13.,  5.,
       13., 28., 28.,  2., 28., 13.,  2., 16., 13.,  2., 14., 17., 14.,
        8.,  2., 13., 14., 17., 14.,  8., 16., 13.,  2., 13., 23.,  1.,
       24.,  1.,  1.,  2., 14., 14., 14.,  8., 13., 16., 14., 14., 17.,
        2.])

In [23]:
seq

array([16, 13,  8,  8, 16, 16,  2, 28, 14,  8, 14, 21,  5,  2, 28, 13, 16,
        8, 25,  2, 28, 14,  2, 28,  8, 16,  8,  2, 28, 14, 17, 14, 25,  8,
       13, 14, 14, 12, 16, 25,  1,  1,  2, 28, 13, 14,  8,  5, 13, 13, 27,
       13, 14])

In [51]:
print( np.mean(att_seq == y) )
print( np.mean(y_pred == y) )

0.39622641509433965
0.7924528301886793


In [40]:
print( np.mean( attack_obs == A[0] ) )

0.4339622641509434


In [26]:
att.expected_utility(sol)

180.19610300402007

In [27]:
print(' '.join(seq2word(attack_obs))) 

thousands war european british troops prices . former nato french bush ) one terrorist " day attacks top reported . france mr. . britain european police southern . reported britain 's iraq used east bomb pakistan march may media wounded , , . tehran energy united second 10 office found says week mr.


In [28]:
print(' '.join(seq2word(A[0]))) 

thousands war iraq british troops country . soldiers killed " bush number one terrorist " " . " parliament . police number . britain 's party southern . party britain 's iraq british troops country . march came , including , , . international energy agency second day talks wednesday iran 's .
