In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from nltk.tokenize import word_tokenize
from torch.utils.data import TensorDataset, DataLoader, RandomSampler
from torch import optim

from io import open
import unicodedata
import re
import random

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [2]:
df = pd.read_csv("../data/AI.csv", sep=",", header= None, skiprows=1)

df.head()

Unnamed: 0,0,1
0,Who did the first work generally recognized as...,Warren McCulloch and Walter Pitts (1943).\n
1,What sources was drawn on the formation of the...,knowledge of the basic physiology and function...
2,Who created the Hebbian learning rule?,Donald Hebb (1949).\n
3,When the first neural network is built?,1950.\n
4,What is the first neural network called?,The SNARC.\n


In [3]:
# for a in df[[1,2]].values:
#     print(a[0])
#     break

pairs_read = df.values.tolist()
pairs_read[0][0]

'Who did the first work generally recognized as AI?'

In [4]:
SOS_token = 0
EOS_token = 1

class Lang:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "SOS", 1: "EOS"}
        self.n_words = 2  # Count SOS and EOS

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1

In [5]:
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
    )

# Lowercase, trim, and remove non-letter characters
def normalizeString(s):
    s = unicodeToAscii(s.lower().strip())
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z!?]+", r" ", s)
    return s.strip()

In [6]:
MAX_LENGTH = 30

def filterPair(p):
    return len(p[0].split(' ')) < MAX_LENGTH and \
        len(p[1].split(' ')) < MAX_LENGTH

def filterPairs(pairs):
    return [pair for pair in pairs if filterPair(pair)]

In [7]:
def readLangs(df_filter: list, reverse = False):
    '''
    The input is two column question and answer
    return OOP_QA, pairs (question answer)
    '''

    print("Reading lines...")
    OOP_QA = Lang("Q_A")
    return OOP_QA, df_filter

In [8]:
def prepareData(dataframe , reverse = False):
    '''
    Input must be dataframe fillter just two column Q&A
    '''
    oop_qa, pairs = readLangs(dataframe)
    print("Read %s sentence pairs" % len(pairs))
    pairs = filterPairs(pairs)
    print("Trimmed to %s sentence pairs" % len(pairs))
    print("Counting words...")
    # Add word to oop to create index2word
    for pair in pairs:
        oop_qa.addSentence(pair[0])
        oop_qa.addSentence(pair[1])
        
    print("Counted words: ")
    print("Total bag of word: ", oop_qa.n_words)
    return oop_qa, pairs

oop_qa, pairs_return = prepareData(pairs_read, False)
print(random.choice(pairs_return))

Reading lines...
Read 503 sentence pairs
Trimmed to 495 sentence pairs
Counting words...
Counted words: 
Total bag of word:  2382
['What is Type B strategy?', '"A Type B strategy ignores moves that look bad']


In [9]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, dropout_p = 0.1):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True )
        self.dropout = nn.Dropout(dropout_p)

    def forward(self, input):
        embedded = self.dropout(self.embedding(input))
        output, hidden =self.gru(embedded)
        return output, hidden


In [10]:
class BahdanauAttention(nn.Module):
    def __init__(self, hidden_size, dropout = 0, **kwargs) -> None:
        super().__init__()
        self.Wq = nn.LazyLinear(hidden_size, bias = False)
        self.Wk = nn.LazyLinear(hidden_size, bias = False)
        self.Wv = nn.LazyLinear(1, bias=False)
        self.dropout = nn.Dropout(dropout)
    
    def forward(self, query, keys, values = None):
        if values is None:
            values = keys
        # Key = values shape 
        # Ex reduce shape key: (10, 8) -> (10, 1) -> (1, 10)@(10, 8) = (1, 8)
        scores = self.Wv(torch.tanh(self.Wq(query) + self.Wk(keys)))
        # Chuyển [batc, num rows, num cols (1)] -> [batch, 1, num rows -> num col]
        # Mỗi hàng sẽ chứa kết quả của 1 batch không còn mỗi cột nữa
        # Hay nói cách khác chuyển đặng trưng từ cột về hàng
        '''
            Features shape  torch.Size([32, 10, 1])
            Scores shape  torch.Size([32, 1, 10])
        '''
        scores = scores.squeeze(2).unsqueeze(1)
        weights = F.softmax(scores, dim=-1)
        context = torch.bmm(weights, values)

        return context, weights
    
class AttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1):
        super(AttnDecoderRNN, self).__init__()
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.attention = BahdanauAttention(hidden_size, dropout= dropout_p)
        # batch_first=True 
        # ->input và output sẽ có dạng (batch_size, seq_len, features) thay vì (seq_len, batch_size, features) như mặc định
        self.gru = nn.GRU(2*hidden_size, hidden_size, batch_first=True) 
        self.out = nn.LazyLinear(output_size)

        self.dropout = nn.Dropout(dropout_p)
    
    def forward(self, encoder_outputs, encoder_hidden, target_tensor=None):
        batch_size = encoder_outputs.size(0)
        decoder_input  = torch.empty(batch_size, 1, dtype = torch.long, device = device).fill_(SOS_token)
        # print("Hidden encoder shape", encoder_hidden.shape)
        decoder_hidden = encoder_hidden
        decoder_outputs = []
        attentions = []

        for i in range(MAX_LENGTH):
            decoder_output, decoder_hidden, atten_weight = self.forward_step(decoder_input, decoder_hidden, encoder_outputs)
            # Append [1,2,x,.., 10]
            # [32, 1, 128],[32,1,128]
            decoder_outputs.append(decoder_output)
            attentions.append(atten_weight)

            if target_tensor is not None:
                decoder_input = target_tensor[:, i].unsqueeze(1)

            else:
                _, topi = decoder_output.topk(1)
                decoder_input = topi.squeeze(-1).detach() 
                
        decoder_outputs = self.out(torch.cat(decoder_outputs, dim=1))
        decoder_outputs = F.log_softmax(decoder_outputs, dim=-1)
        attentions = torch.cat(attentions, dim=1)

        return decoder_outputs, decoder_hidden, attentions


    def forward_step(self, input, hidden, encoder_outputs):
        embedded =  self.dropout(self.embedding(input))

        # Check phải đưa batch lên đầu không
        query = hidden.permute(1, 0 ,2)
        context, attn_weights = self.attention(query, encoder_outputs)
        input_gru  = torch.cat((embedded, context), dim = 2)
        # print("input gru shape ",input_gru.shape, hidden.shape)
        output, hidden = self.gru(input_gru, hidden)
        output = self.out(output)
        return output, hidden, attn_weights

In [11]:
def indexesFromSentence(oop_qa, sentence):
    return [oop_qa.word2index[word] for word in sentence.split(" ")]

def tensorFromSentence(oop_qa, sentence):
    indexes = indexesFromSentence(oop_qa, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype= torch.long, device = device).reshape(1, -1)


def get_dataloader(batch_size):
    oop_qa, pairs = prepareData(pairs_read, False)
    n = len(pairs)

    input_ids = np.zeros((n, MAX_LENGTH), dtype = np.int32)
    target_ids = np.zeros((n, MAX_LENGTH), dtype = np.int32)

    for idx, (inp, tgt) in enumerate(pairs):
        inp_ids =indexesFromSentence(oop_qa,inp)
        tgt_ids =indexesFromSentence(oop_qa,tgt)
        
        inp_ids.append(EOS_token)
        tgt_ids.append(EOS_token)

        input_ids[idx, :len(inp_ids)] = inp_ids
        target_ids[idx, :len(tgt_ids)] = tgt_ids

    train_data = TensorDataset(torch.LongTensor(input_ids).to(device),
                               torch.LongTensor(target_ids).to(device))

    train_sampler = RandomSampler(train_data)
    train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)
    return oop_qa, train_dataloader

In [12]:
def train_epoch(dataloader, encoder, decoder, encoder_optimizer,
          decoder_optimizer, criterion):

    total_loss = 0
    for data in dataloader:
        input_tensor, target_tensor = data

        encoder_optimizer.zero_grad()
        decoder_optimizer.zero_grad()

        encoder_outputs, encoder_hidden = encoder(input_tensor)
        decoder_outputs, _, _ = decoder(encoder_outputs, encoder_hidden, target_tensor)

        loss = criterion(
            decoder_outputs.view(-1, decoder_outputs.size(-1)),
            target_tensor.view(-1)
        )
        loss.backward()

        encoder_optimizer.step()
        decoder_optimizer.step()

        total_loss += loss.item()

    return total_loss / len(dataloader)

In [13]:
import time
import math

def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [14]:
def train(train_dataloader, encoder, decoder, n_epochs, learning_rate=0.001,
               print_every=100, plot_every=100):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)
    criterion = nn.NLLLoss()

    for epoch in range(1, n_epochs + 1):
        loss = train_epoch(train_dataloader, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)
        print_loss_total += loss
        plot_loss_total += loss

        if epoch % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, epoch / n_epochs),
                                        epoch, epoch / n_epochs * 100, print_loss_avg))

        if epoch % plot_every == 0:
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_loss_total = 0



In [None]:
hidden_size = 128
batch_size = 32

oop_qa, train_dataloader = get_dataloader(batch_size)

encoder = EncoderRNN(oop_qa.n_words, hidden_size).to(device)
decoder = AttnDecoderRNN(hidden_size, oop_qa.n_words).to(device)

train(train_dataloader, encoder, decoder, 400, print_every=20, plot_every=5)

In [None]:
def evaluate(encoder, decoder, sentence, oop_qa):
    with torch.no_grad():
        input_tensor = tensorFromSentence(oop_qa, sentence)

        encoder_outputs, encoder_hidden = encoder(input_tensor)
        decoder_outputs, decoder_hidden, decoder_attn = decoder(encoder_outputs, encoder_hidden)

        _, topi = decoder_outputs.topk(1)
        decoded_ids = topi.squeeze()

        decoded_words = []
        for idx in decoded_ids:
            if idx.item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            decoded_words.append(oop_qa.index2word[idx.item()])
    return decoded_words, decoder_attn


def evaluateRandomly(encoder, decoder, n=10):
    for i in range(n):
        pair = random.choice(pairs_return)
        print('>', pair[0])
        print('=', pair[1])
        output_words, _ = evaluate(encoder, decoder, pair[0], oop_qa)
        output_sentence = ' '.join(output_words)
        print('<', output_sentence)
        print('')

In [None]:
encoder.eval()
decoder.eval()
evaluateRandomly(encoder, decoder)

> who discovered a complete theorem-proving algorithm for first-order logic in 1965?
= J. A. Robinson's

< initial pruning space  satisfiable episodic; mechanism concerned sophisticated concerned branching sophisticated sophisticated Complete fit fit concerned branching "Reduces example branching Goal Complex branching "Reduces Goal Complex branching "Reduces Goal

> How does logic differs?
= Logics differ in their ontological commitments and epistemological commitments.

< of representing knowledge base world.
 <EOS>

> What is the right thing?
= "It is expected to maximize goal achievement
< "It is expected to maximize goal achievement <EOS>

> What is Five-in-row system?
= Five-in-row system is normally is implemented as a board game.

< Five-in-row system is higher than by programming its intelligence (AI) problem unary proof they go next.
 <EOS>

> What are the four basic kinds of agent programs that embody the principles underlying almost all intelligent systems?
= Simple reflex 

In [25]:
import torch

max_len = 1000
num_hiddens = 32

a = torch.arange(max_len, dtype=torch.float32).reshape(-1, 1) / torch.pow(10000, torch.arange(0, num_hiddens, 2, dtype=torch.float32) / num_hiddens)
a.shape

torch.Size([1000, 16])

In [22]:
torch.pow(2, torch.arange(0, num_hiddens, 2, dtype=torch.int)) 

tensor([         1,          4,         16,         64,        256,       1024,
              4096,      16384,      65536,     262144,    1048576,    4194304,
          16777216,   67108864,  268435456, 1073741824,          0,          0,
                 0,          0,          0,          0,          0,          0,
                 0,          0,          0,          0,          0,          0,
                 0,          0,          0,          0,          0,          0,
                 0,          0,          0,          0,          0,          0,
                 0,          0,          0,          0,          0,          0,
                 0,          0,          0,          0,          0,          0,
                 0,          0,          0,          0,          0,          0,
                 0,          0,          0,          0,          0,          0,
                 0,          0,          0,          0,          0,          0,
                 0,          0,         

In [7]:
torch.arange(1000, dtype = torch.float32)

tensor([  0.,   1.,   2.,   3.,   4.,   5.,   6.,   7.,   8.,   9.,  10.,  11.,
         12.,  13.,  14.,  15.,  16.,  17.,  18.,  19.,  20.,  21.,  22.,  23.,
         24.,  25.,  26.,  27.,  28.,  29.,  30.,  31.,  32.,  33.,  34.,  35.,
         36.,  37.,  38.,  39.,  40.,  41.,  42.,  43.,  44.,  45.,  46.,  47.,
         48.,  49.,  50.,  51.,  52.,  53.,  54.,  55.,  56.,  57.,  58.,  59.,
         60.,  61.,  62.,  63.,  64.,  65.,  66.,  67.,  68.,  69.,  70.,  71.,
         72.,  73.,  74.,  75.,  76.,  77.,  78.,  79.,  80.,  81.,  82.,  83.,
         84.,  85.,  86.,  87.,  88.,  89.,  90.,  91.,  92.,  93.,  94.,  95.,
         96.,  97.,  98.,  99., 100., 101., 102., 103., 104., 105., 106., 107.,
        108., 109., 110., 111., 112., 113., 114., 115., 116., 117., 118., 119.,
        120., 121., 122., 123., 124., 125., 126., 127., 128., 129., 130., 131.,
        132., 133., 134., 135., 136., 137., 138., 139., 140., 141., 142., 143.,
        144., 145., 146., 147., 148., 14

In [4]:

nums = [0,0,1]
temp_num = nums.copy()
for num in nums:
    if num == 0:
        nums.remove(num)
        nums.append(0)

nums

[1, 0, 0]

In [1]:
import torch
from torch import nn

In [2]:
class PositionWiseFFN(nn.Module):
    def __init__(self, ffn_num_hiddens, ffn_num_outputs):
        super().__init__()
        self.dense1 = nn.LazyLinear(ffn_num_hiddens)
        self.relu = nn.ReLU()
        self.dense2 = nn.LazyLinear(ffn_num_outputs)

    def forward(self, X):
        return self.dense2(self.relu(self.dense1(X)))

In [5]:
ffn = PositionWiseFFN(4, 8)
ffn.eval()
ffn(torch.ones((2,3,4)))[0]

tensor([[-0.4076,  0.0295, -0.1874, -0.3984, -0.2571, -0.1063, -0.5304, -0.1125],
        [-0.4076,  0.0295, -0.1874, -0.3984, -0.2571, -0.1063, -0.5304, -0.1125],
        [-0.4076,  0.0295, -0.1874, -0.3984, -0.2571, -0.1063, -0.5304, -0.1125]],
       grad_fn=<SelectBackward0>)

In [6]:
class AddNorm(nn.Module):
    def __init__(self, norm_shape, dropout):
        super().__init__()
        self.dropout = nn.Dropout(dropout)
        self.ln = nn.LayerNorm(norm_shape)

    def forward(self, X, Y):
        return self.ln(self.dropout(Y) + X)
    

In [8]:
add_norm = AddNorm(4, 0.5)
shape = (2, 3, 4)
add_norm(torch.ones(shape), torch.ones(shape)).shape

torch.Size([2, 3, 4])

In [36]:
import re

# Chuỗi thời gian
time_string = "12:20:22AM"



def timeConversion(s):
    # Write your code here
    time_convert = lambda h: f"{h:02d}"
    meridiem = re.findall("AM|PM", s)
    hour = int(re.findall("^(\d{2})", s)[0])
    if meridiem[0] == "PM":
        if hour < 12:
            s = re.sub("^(\d{2})", time_convert(hour+12),s)
    else:
        if hour >= 12:
            s = re.sub("^(\d{2})", time_convert(hour-12),s)

            
    return re.findall("\d{2}:\d{2}:\d{2}", s)[0]

print(timeConversion(time_string))

00:20:22


In [28]:
time_string = "12:20:22PM"

re.findall("\d{2}:\d{2}:\d{2}", time_string)

['12:20:22']

In [34]:
str(0)

'0'