In [None]:
from __future__ import unicode_literals, print_function, division
from io import open
import glob
import os
import unicodedata
import string
import torch
import torch.nn as nn

In [None]:
all_letters = string.ascii_letters + " .,;'-"
n_letters = len(all_letters) + 1 # Plus EOS marker

def findFiles(path): return glob.glob(path)

# Turn a Unicode string to plain ASCII, thanks to https://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
        and c in all_letters
    )

In [None]:
def readLines(filename):
    with open(filename, encoding='utf-8') as some_file:
        return [unicodeToAscii(line.strip()) for line in some_file]

In [None]:
# Build the category_lines dictionary, a list of lines per category
category_lines = {}
all_categories = []
for filename in findFiles('/content/drive/MyDrive/Data/data/names/*.txt'):
    category = os.path.splitext(os.path.basename(filename))[0]
    all_categories.append(category)
    lines = readLines(filename)
    category_lines[category] = lines

n_categories = len(all_categories)

In [None]:
print('# categories:', n_categories, all_categories)
print(unicodeToAscii("O'Néàl"))
print(category_lines["English"])

# categories: 18 ['Polish', 'Portuguese', 'Scottish', 'English', 'Arabic', 'Dutch', 'Irish', 'Spanish', 'Czech', 'Russian', 'Chinese', 'Korean', 'Italian', 'German', 'Greek', 'Japanese', 'French', 'Vietnamese']
O'Neal
['Abbas', 'Abbey', 'Abbott', 'Abdi', 'Abel', 'Abraham', 'Abrahams', 'Abrams', 'Ackary', 'Ackroyd', 'Acton', 'Adair', 'Adam', 'Adams', 'Adamson', 'Adanet', 'Addams', 'Adderley', 'Addinall', 'Addis', 'Addison', 'Addley', 'Aderson', 'Adey', 'Adkins', 'Adlam', 'Adler', 'Adrol', 'Adsett', 'Agar', 'Ahern', 'Aherne', 'Ahmad', 'Ahmed', 'Aikman', 'Ainley', 'Ainsworth', 'Aird', 'Airey', 'Aitchison', 'Aitken', 'Akhtar', 'Akram', 'Alam', 'Alanson', 'Alber', 'Albert', 'Albrighton', 'Albutt', 'Alcock', 'Alden', 'Alder', 'Aldersley', 'Alderson', 'Aldred', 'Aldren', 'Aldridge', 'Aldworth', 'Alesbury', 'Alexandar', 'Alexander', 'Alexnader', 'Alford', 'Algar', 'Ali', 'Alker', 'Alladee', 'Allam', 'Allan', 'Allard', 'Allaway', 'Allcock', 'Allcott', 'Alldridge', 'Alldritt', 'Allen', 'Allgood'

In [None]:
import random

# Random item from a list
def randomChoice(l):
    return l[random.randint(0, len(l) - 1)]

def randomTrainingPair():
    category = randomChoice(all_categories)
    line = randomChoice(category_lines[category])
    print(category,line)
    return category, line
randomTrainingPair()

Irish Ruadhain


('Irish', 'Ruadhain')

In [None]:
def categoryTensor(category):
    idx=all_categories.index(category)
    tensor=torch.zeros(n_categories)
    tensor[idx]=1
    return tensor

categoryTensor("French")

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0.])

In [None]:
# One-hot matrix of first to last letters (not including EOS) for input
def inputTensor(line):
    tensor = torch.zeros(len(line), 1, n_letters)
    for li in range(len(line)):
        letter = line[li]
        tensor[li][0][all_letters.find(letter)] = 1
    return tensor
print(inputTensor("Diaz"))

tensor([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0.]],

        [[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
          0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
       

In [None]:
# LongTensor of second letter to end (EOS) for target without one-hot encoding
def targetTensor(line):
    letter_indexes = [all_letters.find(line[li]) for li in range(1, len(line))]
    letter_indexes.append(n_letters - 1) # EOS
    return torch.LongTensor(letter_indexes)
targetTensor("Diaz")

tensor([ 8,  0, 25, 58])

In [None]:
# Make category, input, and target tensors from a random category, line pair
def randomTrainingExample():
    category, line = randomTrainingPair()
    category_tensor = categoryTensor(category)
    input_line_tensor = inputTensor(line)
    target_line_tensor = targetTensor(line)
    return category_tensor, input_line_tensor, target_line_tensor

In [None]:
class RNN(nn.Module):

  def __init__(self,input_dim,output_dim,hidden_dim):
    super(RNN,self).__init__()
    self.hidden_dim=hidden_dim
    self.i2h=nn.Linear(n_categories+input_dim+hidden_dim,hidden_dim)
    self.i2o=nn.Linear(n_categories+input_dim+hidden_dim,output_dim)
    self.o2o=nn.Linear(hidden_dim+output_dim,output_dim)

    self.dropout=nn.Dropout(0.2)
    self.activation=nn.LogSoftmax(dim=1)

  def forward(self,category,input,hidden):
    print(category,input,hidden)
    input_combined=torch.cat((category,input,hidden),1)
    hidden=self.i2h(input_combined)
    output=self.i2o(input_combined)
    output_combined=torch.cat((hidden+output),1)
    output=self.o2o(output_combined)
    output=self.dropout(output)
    output=self.activation(output)
    return output

  def init_hidden(self):
    return torch.zeros(1,self.hidden_dim)



In [None]:
criterion = nn.NLLLoss()

learning_rate = 0.0005
def train(category_tensor, input_line_tensor, target_line_tensor):
    target_line_tensor.unsqueeze_(-1)
    hidden = rnn.init_hidden()

    rnn.zero_grad()

    loss = 0

    for i in range(input_line_tensor.size(0)):
        output, hidden = rnn(category_tensor, input_line_tensor[i], hidden)
        print(output,hidden)
        l = criterion(output, target_line_tensor[i])
        loss += l

    loss.backward()


    for p in rnn.parameters():
        p.data.add_(p.grad.data, alpha=-learning_rate)

    return output, loss.item() / input_line_tensor.size(0)

In [None]:
rnn = RNN(n_letters, 128, n_letters)

n_iters = 100000
print_every = 5000
plot_every = 500
all_losses = []
total_loss = 0 # Reset every plot_every iters

In [None]:
for iter in range(1, n_iters + 1):
    output, loss = train(*randomTrainingExample())
    total_loss += loss

    if iter % print_every == 0:
        print('%s (%d %d%%) %.4f' % ( iter, iter / n_iters * 100, loss))

    if iter % plot_every == 0:
        all_losses.appe

Arabic Sleiman
tensor([0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]) tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0.]]) tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])


IndexError: ignored

In [None]:
from __future__ import unicode_literals, print_function, division
from io import open
import glob
import os
import unicodedata
import string

all_letters = string.ascii_letters + " .,;'-"
n_letters = len(all_letters) + 1 # Plus EOS marker

def findFiles(path): return glob.glob(path)

# Turn a Unicode string to plain ASCII, thanks to https://stackoverflow.com/a/518232/2809427
def unicodeToAscii(s):
    return ''.join(
        c for c in unicodedata.normalize('NFD', s)
        if unicodedata.category(c) != 'Mn'
        and c in all_letters
    )

# Read a file and split into lines
def readLines(filename):
    with open(filename, encoding='utf-8') as some_file:
        return [unicodeToAscii(line.strip()) for line in some_file]

# Build the category_lines dictionary, a list of lines per category
category_lines = {}
all_categories = []
for filename in findFiles('/content/drive/MyDrive/Data/data/names/*.txt'):
    category = os.path.splitext(os.path.basename(filename))[0]
    all_categories.append(category)
    lines = readLines(filename)
    category_lines[category] = lines

n_categories = len(all_categories)

if n_categories == 0:
    raise RuntimeError('Data not found. Make sure that you downloaded data '
        'from https://download.pytorch.org/tutorial/data.zip and extract it to '
        'the current directory.')

print('# categories:', n_categories, all_categories)
print(unicodeToAscii("O'Néàl"))


######################################################################
# Creating the Network
# ====================
#
# This network extends `the last tutorial's RNN <#Creating-the-Network>`__
# with an extra argument for the category tensor, which is concatenated
# along with the others. The category tensor is a one-hot vector just like
# the letter input.
#
# We will interpret the output as the probability of the next letter. When
# sampling, the most likely output letter is used as the next input
# letter.
#
# I added a second linear layer ``o2o`` (after combining hidden and
# output) to give it more muscle to work with. There's also a dropout
# layer, which `randomly zeros parts of its
# input <https://arxiv.org/abs/1207.0580>`__ with a given probability
# (here 0.1) and is usually used to fuzz inputs to prevent overfitting.
# Here we're using it towards the end of the network to purposely add some
# chaos and increase sampling variety.
#
# .. figure:: https://i.imgur.com/jzVrf7f.png
#    :alt:
#
#

import torch
import torch.nn as nn

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()
        self.hidden_size = hidden_size

        self.i2h = nn.Linear(n_categories + input_size + hidden_size, hidden_size)
        self.i2o = nn.Linear(n_categories + input_size + hidden_size, output_size)
        self.o2o = nn.Linear(hidden_size + output_size, output_size)
        self.dropout = nn.Dropout(0.1)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, category, input, hidden):
        input_combined = torch.cat((category, input, hidden), 1)
        hidden = self.i2h(input_combined)
        output = self.i2o(input_combined)
        output_combined = torch.cat((hidden, output), 1)
        output = self.o2o(output_combined)
        output = self.dropout(output)
        output = self.softmax(output)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, self.hidden_size)


######################################################################
# Training
# =========
# Preparing for Training
# ----------------------
#
# First of all, helper functions to get random pairs of (category, line):
#

import random

# Random item from a list
def randomChoice(l):
    return l[random.randint(0, len(l) - 1)]

# Get a random category and random line from that category
def randomTrainingPair():
    category = randomChoice(all_categories)
    line = randomChoice(category_lines[category])
    return category, line


######################################################################
# For each timestep (that is, for each letter in a training word) the
# inputs of the network will be
# ``(category, current letter, hidden state)`` and the outputs will be
# ``(next letter, next hidden state)``. So for each training set, we'll
# need the category, a set of input letters, and a set of output/target
# letters.
#
# Since we are predicting the next letter from the current letter for each
# timestep, the letter pairs are groups of consecutive letters from the
# line - e.g. for ``"ABCD<EOS>"`` we would create ("A", "B"), ("B", "C"),
# ("C", "D"), ("D", "EOS").
#
# .. figure:: https://i.imgur.com/JH58tXY.png
#    :alt:
#
# The category tensor is a `one-hot
# tensor <https://en.wikipedia.org/wiki/One-hot>`__ of size
# ``<1 x n_categories>``. When training we feed it to the network at every
# timestep - this is a design choice, it could have been included as part
# of initial hidden state or some other strategy.
#

# One-hot vector for category
def categoryTensor(category):
    li = all_categories.index(category)
    tensor = torch.zeros(1, n_categories)
    tensor[0][li] = 1
    return tensor

# One-hot matrix of first to last letters (not including EOS) for input
def inputTensor(line):
    tensor = torch.zeros(len(line), 1, n_letters)
    for li in range(len(line)):
        letter = line[li]
        tensor[li][0][all_letters.find(letter)] = 1
    return tensor

# LongTensor of second letter to end (EOS) for target
def targetTensor(line):
    letter_indexes = [all_letters.find(line[li]) for li in range(1, len(line))]
    letter_indexes.append(n_letters - 1) # EOS
    return torch.LongTensor(letter_indexes)



# Make category, input, and target tensors from a random category, line pair
def randomTrainingExample():
    category, line = randomTrainingPair()
    category_tensor = categoryTensor(category)
    input_line_tensor = inputTensor(line)
    target_line_tensor = targetTensor(line)
    return category_tensor, input_line_tensor, target_line_tensor


criterion = nn.NLLLoss()

learning_rate = 0.0005

def train(category_tensor, input_line_tensor, target_line_tensor):
    target_line_tensor.unsqueeze_(-1)
    hidden = rnn.initHidden()

    rnn.zero_grad()

    loss = 0

    for i in range(input_line_tensor.size(0)):
        output, hidden = rnn(category_tensor, input_line_tensor[i], hidden)
        l = criterion(output, target_line_tensor[i])
        loss += l

    loss.backward()

    for p in rnn.parameters():
        p.data.add_(p.grad.data, alpha=-learning_rate)

    return output, loss.item() / input_line_tensor.size(0)

import time
import math

def timeSince(since):
    now = time.time()
    s = now - since
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)


rnn = RNN(n_letters, 128, n_letters)

n_iters = 100000
print_every = 5000
plot_every = 500
all_losses = []
total_loss = 0 # Reset every plot_every iters

start = time.time()

for iter in range(1, n_iters + 1):
    output, loss = train(*randomTrainingExample())
    total_loss += loss

    if iter % print_every == 0:
        print('%s (%d %d%%) %.4f' % (timeSince(start), iter, iter / n_iters * 100, loss))

    if iter % plot_every == 0:
        all_losses.append(total_loss / plot_every)
        total_loss = 0


In [None]:
#LSTMs understanding 

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0x7f59a78decd0>

In [None]:
lstm = nn.LSTM(3, 3)  # Input dim is 3, output dim is 3
inputs = [torch.randn(1, 3) for _ in range(5)]  # make a sequence of length 5
print(torch.randn(1, 3))

print(inputs[0].view(1, 1, -1))
# initialize the hidden state.
hidden = (torch.randn(1, 1, 3),
          torch.randn(1, 1, 3))
print(hidden)
for i in inputs:
    # Step through the sequence one element at a time.
    # after each step, hidden contains the hidden state.
    out, hidden = lstm(i.view(1, 1, -1), hidden)
    print(out)

tensor([[ 0.5438, -0.4057,  1.1341]])
tensor([[[-0.5525,  0.6355, -0.3968]]])
(tensor([[[-1.1115,  0.3501, -0.7703]]]), tensor([[[-0.1473,  0.6272,  1.0935]]]))
tensor([[[-0.0580,  0.2825,  0.3994]]], grad_fn=<StackBackward0>)
tensor([[[-0.4137,  0.1009,  0.3741]]], grad_fn=<StackBackward0>)
tensor([[[-0.3485,  0.0918,  0.2322]]], grad_fn=<StackBackward0>)
tensor([[[-0.1715,  0.1001,  0.1877]]], grad_fn=<StackBackward0>)
tensor([[[-0.3489,  0.0869,  0.3202]]], grad_fn=<StackBackward0>)


In [None]:
def prepare_sequence(seq, to_ix):
    idxs = [to_ix[w] for w in seq]
    return torch.tensor(idxs, dtype=torch.long)


training_data = [
    # Tags are: DET - determiner; NN - noun; V - verb
    # For example, the word "The" is a determiner
    ("The dog ate the apple".split(), ["DET", "NN", "V", "DET", "NN"]),
    ("Everybody read that book".split(), ["NN", "V", "DET", "NN"])
]
word_to_ix = {}
# For each words-list (sentence) and tags-list in each tuple of training_data
for sent, tags in training_data:
    for word in sent:
        if word not in word_to_ix:  # word has not been assigned an index yet
            word_to_ix[word] = len(word_to_ix)  # Assign each word with a unique index
print(word_to_ix)
tag_to_ix = {"DET": 0, "NN": 1, "V": 2}  # Assign each tag with a unique index

# These will usually be more like 32 or 64 dimensional.
# We will keep them small, so we can see how the weights change as we train.
EMBEDDING_DIM = 9
HIDDEN_DIM = 9

{'The': 0, 'dog': 1, 'ate': 2, 'the': 3, 'apple': 4, 'Everybody': 5, 'read': 6, 'that': 7, 'book': 8}


In [None]:
class LSTM(nn.Module):
  def __init__(self,sentence_vocab,tag_vocab,embed_dim,hidden_dim):
    super(LSTM,self).__init__()
    self.hidden_dim=hidden_dim
    self.embed=nn.Embedding(sentence_vocab,embed_dim)
    self.lstm=nn.LSTM(embed_dim,hidden_dim)
    self.lin=nn.Linear(hidden_dim,tag_vocab)

  def forward(self,sentence):

    embed=self.embed(sentence)
    out,hidden=self.lstm(embed.view(len(sentence),1,-1))
    res=self.lin(out.view(len(sentence),-1))
    tag_scores = F.log_softmax(res, dim=1)
    return tag_scores

  




In [None]:
model = LSTM(EMBEDDING_DIM, HIDDEN_DIM, len(word_to_ix), len(tag_to_ix))
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

# See what the scores are before training
# Note that element i,j of the output is the score for tag j for word i.
# Here we don't need to train, so the code is wrapped in torch.no_grad()
# with torch.no_grad():
#     inputs = prepare_sequence(training_data[0][0], word_to_ix)
#     tag_scores = model(inputs)
#     print(tag_scores)

for epoch in range(300):  # again, normally you would NOT do 300 epochs, it is toy data
    for sentence, tags in training_data:
        # Step 1. Remember that Pytorch accumulates gradients.
        # We need to clear them out before each instance
        model.zero_grad()

        # Step 2. Get our inputs ready for the network, that is, turn them into
        # Tensors of word indices.
        sentence_in = prepare_sequence(sentence, word_to_ix)
        targets = prepare_sequence(tags, tag_to_ix)

        # Step 3. Run our forward pass.
        tag_scores = model(sentence_in)

        # Step 4. Compute the loss, gradients, and update the parameters by
        #  calling optimizer.step()
        loss = loss_function(tag_scores, targets)
        loss.backward()
        optimizer.step()

# See what the scores are after training
with torch.no_grad():
    inputs = prepare_sequence(training_data[0][0], word_to_ix)
    tag_scores = model(inputs)

    # The sentence is "the dog ate the apple".  i,j corresponds to score for tag j
    # for word i. The predicted tag is the maximum scoring tag.
    # Here, we can see the predicted sequence below is 0 1 2 0 1
    # since 0 is index of the maximum value of row 1,
    # 1 is the index of maximum value of row 2, etc.
    # Which is DET NOUN VERB DET NOUN, the correct sequence!
    print(tag_scores)

tensor([[-0.0880, -2.8352, -4.4762, -5.9068, -6.3540, -5.9129, -5.9989, -5.9235,
         -6.2911],
        [-3.8695, -0.0394, -4.6617, -6.2382, -7.0116, -6.6337, -6.8386, -6.3172,
         -6.7014],
        [-3.6060, -3.4582, -0.0702, -6.8693, -5.9716, -6.3934, -7.0295, -6.3873,
         -6.6286],
        [-0.0425, -3.7925, -4.4410, -6.6913, -6.8360, -6.6228, -6.6141, -6.5646,
         -7.0773],
        [-3.9226, -0.0405, -4.3911, -6.3667, -7.0246, -6.7503, -6.9336, -6.3873,
         -6.8266]])


In [None]:
tensor = torch.ones(4, 4)
print(f"First row: {tensor[0]}")
print(f"First column: {tensor[:, 0]}")
print(f"Last column: {tensor[..., -1]}")
tensor[:,1] = 0
print(tensor)

First row: tensor([1., 1., 1., 1.])
First column: tensor([1., 1., 1., 1.])
Last column: tensor([1., 1., 1., 1.])
tensor([[1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.],
        [1., 0., 1., 1.]])


In [None]:
t1 = torch.cat([tensor, tensor, tensor], dim=1
               )
print(t1)

tensor([[1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.],
        [1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1.]])


In [None]:
import torch
x=torch.randn(3,requires_grad=True)
y=2*x
print(y)
z=2*x*y
print(z)
# z=z.mean()
v=torch.tensor([3.4,5.4,3.4],dtype=torch.float32)
print(z.backward(v))
print(x.grad)
print(x.grad.zero_())

tensor([ 1.5927, -1.2705, -1.2640], grad_fn=<MulBackward0>)
tensor([2.5366, 1.6141, 1.5976], grad_fn=<MulBackward0>)
None
tensor([ 21.6603, -27.4420, -17.1900])
tensor([0., 0., 0.])


In [None]:
x=torch.tensor([1,2,3,4],dtype=torch.float32)
y=torch.tensor([2,4,6,8],dtype=torch.float32)
w=torch.tensor(0.0,dtype=torch.float32,requires_grad=True)
# w=0.0

In [None]:
import numpy as np
def forward(x):
  return x*w

def loss(y,ypred):
  return ((y-ypred)**2).mean()


epoch=100
lr=0.0001

for i in range(epoch):

  # w.zero_grad_()
  ypred=forward(x)

  lossval=loss(y,ypred)

  lossval.backward()

  #update wieghts
  with torch.no_grad():
    w-=lr*w.grad
  w.grad.zero_()
  if i%9==0:
    print("Loss:",lossval,w)

print(forward(10))

Loss: tensor(6.6039e-05, grad_fn=<MeanBackward0>) tensor(1.9970, requires_grad=True)
Loss: tensor(6.4283e-05, grad_fn=<MeanBackward0>) tensor(1.9971, requires_grad=True)
Loss: tensor(6.2563e-05, grad_fn=<MeanBackward0>) tensor(1.9971, requires_grad=True)
Loss: tensor(6.0901e-05, grad_fn=<MeanBackward0>) tensor(1.9972, requires_grad=True)
Loss: tensor(5.9270e-05, grad_fn=<MeanBackward0>) tensor(1.9972, requires_grad=True)
Loss: tensor(5.7698e-05, grad_fn=<MeanBackward0>) tensor(1.9972, requires_grad=True)
Loss: tensor(5.6152e-05, grad_fn=<MeanBackward0>) tensor(1.9973, requires_grad=True)
Loss: tensor(5.4664e-05, grad_fn=<MeanBackward0>) tensor(1.9973, requires_grad=True)
Loss: tensor(5.3197e-05, grad_fn=<MeanBackward0>) tensor(1.9973, requires_grad=True)
Loss: tensor(5.1787e-05, grad_fn=<MeanBackward0>) tensor(1.9974, requires_grad=True)
Loss: tensor(5.0402e-05, grad_fn=<MeanBackward0>) tensor(1.9974, requires_grad=True)
Loss: tensor(4.9061e-05, grad_fn=<MeanBackward0>) tensor(1.9974, 

In [None]:
import torch.nn as nn

x=torch.tensor([[1,1],[1,2],[3,3],[4,4]],dtype=torch.float32)
y=torch.tensor([[2],[4],[6],[8]],dtype=torch.float32)
w=torch.tensor(0.0,dtype=torch.float32,requires_grad=True)

model=nn.Linear(2,1)
loss==nn.MSELoss()
optimizer=torch.optim.SGD(model.parameters(),lr=0.001)

for i in range(epoch):

  ypred=model(x)

  l=loss(y,ypred)

  l.backward()

  optimizer.step()

  optimizer.zero_grad()
  if i%9==0:
    print("Loss:",l)


Loss: tensor(13.4059, grad_fn=<MeanBackward0>)
Loss: tensor(7.8719, grad_fn=<MeanBackward0>)
Loss: tensor(4.6664, grad_fn=<MeanBackward0>)
Loss: tensor(2.8094, grad_fn=<MeanBackward0>)
Loss: tensor(1.7334, grad_fn=<MeanBackward0>)
Loss: tensor(1.1097, grad_fn=<MeanBackward0>)
Loss: tensor(0.7479, grad_fn=<MeanBackward0>)
Loss: tensor(0.5378, grad_fn=<MeanBackward0>)
Loss: tensor(0.4155, grad_fn=<MeanBackward0>)
Loss: tensor(0.3441, grad_fn=<MeanBackward0>)
Loss: tensor(0.3022, grad_fn=<MeanBackward0>)
Loss: tensor(0.2774, grad_fn=<MeanBackward0>)


In [None]:
class LinReg(nn.Module):

  def __init__(self,input_dim,output_dim):
    super(LinReg,self).__init__()
    self.lin=nn.Linear(input_dim,output_dim)

  def forward(self,x):
    return self.lin(x)
  
model=LinReg(2,1)
loss==nn.MSELoss()
optimizer=torch.optim.SGD(model.parameters(),lr=0.001)

for i in range(epoch):

  ypred=model(x)

  l=loss(y,ypred)

  l.backward()

  optimizer.step()

  optimizer.zero_grad()
  if i%9==0:
    print("Loss:",l)



Loss: tensor(38.0805, grad_fn=<MeanBackward0>)
Loss: tensor(22.2089, grad_fn=<MeanBackward0>)
Loss: tensor(13.0165, grad_fn=<MeanBackward0>)
Loss: tensor(7.6922, grad_fn=<MeanBackward0>)
Loss: tensor(4.6080, grad_fn=<MeanBackward0>)
Loss: tensor(2.8210, grad_fn=<MeanBackward0>)
Loss: tensor(1.7854, grad_fn=<MeanBackward0>)
Loss: tensor(1.1848, grad_fn=<MeanBackward0>)
Loss: tensor(0.8361, grad_fn=<MeanBackward0>)
Loss: tensor(0.6334, grad_fn=<MeanBackward0>)
Loss: tensor(0.5152, grad_fn=<MeanBackward0>)
Loss: tensor(0.4460, grad_fn=<MeanBackward0>)


In [None]:
from sklearn import datasets
x_numpy,y_numpy=datasets.make_regression(n_features=1,n_samples=100,noise=20,random_state=1)

x=torch.from_numpy(x_numpy.astype(np.float32))
y=torch.from_numpy(y_numpy.astype(np.float32))
y=y.view(100,1)
print(y[:10])

tensor([[-55.5386],
        [-10.6620],
        [ 22.7574],
        [101.0961],
        [144.3376],
        [ 33.2888],
        [ 33.0153],
        [-25.8870],
        [-99.6391],
        [ 23.8031]])


In [None]:
from torch.utils.data import Dataset,DataLoader
import torchvision
class WineDataset(Dataset):
  def __init__(self,transform=None):
    df=np.loadtxt('/content/drive/MyDrive/Data/winequality_red.csv',delimiter=",",skiprows=1)
    x_numpy=df[:,1:]
    y=df[:,0]
    self.x=torch.from_numpy(x_numpy.astype(np.float32))
    y=torch.from_numpy(y.astype(np.float32))
    self.y=y.view(y.shape[0],1)
    self.len=df.shape[0]
    self.transform=transform


  def __len__(self):
    return self.len
  def __getitem__(self,idx):
    sample=self.x[idx],self.y[idx]
    if self.transform:
      sample=self.transform(sample)
    return sample 

class ToTensor():
  def __call__(self,sample):
    inp,lab=sample
    return torch.from_numpy(inp.astype(np.float32)),torch.from_numpy(lab.astype(np.float32))

class mulTransform():
  def __call__(self,sample):
    inp,lab=sample
    lr=0.001
    return lr*inp,lab

composed=torchvision.transforms.Compose([ToTensor(),mulTransform()])


ds=WineDataset()

# print(ds[:10])

dl=DataLoader(dataset=ds,batch_size=32,shuffle=True)

for i,(feature,label) in enumerate(dl):
  pass




In [None]:
import torch
import torch.autograd
import torch.nn.functional as F
from torch.autograd import Variable
from torch import nn

import numpy as np


def pairwise(data):
    n_obs, dim = data.size()
    xk = data.unsqueeze(0).expand(n_obs, n_obs, dim)
    xl = data.unsqueeze(1).expand(n_obs, n_obs, dim)
    dkl2 = ((xk - xl)**2.0).sum(2).squeeze()
    return dkl2


class VTSNE(nn.Module):
    def __init__(self, n_points, n_topics, n_dim):
        self.n_points = n_points
        self.n_dim = n_dim
        super(VTSNE, self).__init__()
        # Logit of datapoint-to-topic weight
        self.logits_mu = nn.Embedding(n_points, n_topics)
        self.logits_lv = nn.Embedding(n_points, n_topics)
    
    @property
    def logits(self):
        return self.logits_mu

    def reparametrize(self, mu, logvar):
        # From VAE example
        # https://github.com/pytorch/examples/blob/master/vae/main.py
        std = logvar.mul(0.5).exp_()
        eps = torch.cuda.FloatTensor(std.size()).normal_()
        eps = Variable(eps)
        z = eps.mul(std).add_(mu)
        kld = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)
        kld = torch.sum(kld).mul_(-0.5)
        return z, kld

    def sample_logits(self, i=None):
        if i is None:
            return self.reparametrize(self.logits_mu.weight, self.logits_lv.weight)
        else:
            return self.reparametrize(self.logits_mu(i), self.logits_lv(i))

    def forward(self, pij, i, j):
        # Get  for all points
        x, loss_kldrp = self.sample_logits()
        # Compute squared pairwise distances
        dkl2 = pairwise(x)
        # Compute partition function
        n_diagonal = dkl2.size()[0]
        part = (1 + dkl2).pow(-1.0).sum() - n_diagonal
        # Compute the numerator
        xi, _ = self.sample_logits(i)
        xj, _ = self.sample_logits(j)
        num = ((1. + (xi - xj)**2.0).sum(1)).pow(-1.0).squeeze()
        qij = num / part.expand_as(num)
        # Compute KLD(pij || qij)
        loss_kld = pij * (torch.log(pij) - torch.log(qij))
        # Compute sum of all variational terms
        return loss_kld.sum() + loss_kldrp.sum() * 1e-7

    def __call__(self, *args):
        return self.forward(*args)

In [None]:
import torch
import torch.autograd
import torch.nn.functional as F
from torch.autograd import Variable
from torch import nn

import numpy as np


def gumbel_sample(logits, tau=1.0, temperature=0.0):
    # Uniform sample
    with torch.cuda.device(logits.get_device()):
        noise = torch.rand(logits.size())
        noise.add_(1e-9).log_().neg_()
        noise.add_(1e-9).log_().neg_()
        gumbel = Variable(noise).cuda()
    sample = (logits + gumbel) / tau + temperature
    sample = F.softmax(sample.view(sample.size(0), -1))
    return sample.view_as(logits)


def pairwise(data):
    n_obs, dim = data.size()
    xk = data.unsqueeze(0).expand(n_obs, n_obs, dim)
    xl = data.unsqueeze(1).expand(n_obs, n_obs, dim)
    dkl2 = ((xk - xl)**2.0).sum(2).squeeze()
    return dkl2


class TopicSNE(nn.Module):
    def __init__(self, n_points, n_topics, n_dim):
        self.n_points = n_points
        self.n_dim = n_dim
        super(TopicSNE, self).__init__()
        # Logit of datapoint-to-topic weight
        self.logits = nn.Embedding(n_points, n_topics)
        # Vector for each topic
        self.topic = nn.Linear(n_topics, n_dim)

    def positions(self):
        # x = self.topic(F.softmax(self.logits.weight))
        x = self.logits.weight
        return x

    def dirichlet_ll(self):
        pass

    def forward(self, pij, i, j):
        # Get  for all points
        with torch.cuda.device(pij.get_device()):
            alli = torch.from_numpy(np.arange(self.n_points))
            alli = Variable(alli).cuda()
        # x = self.topic(gumbel_sample(self.logits(alli)))
        x = self.logits(alli)
        # Compute squared pairwise distances
        dkl2 = pairwise(x)
        # Compute partition function
        n_diagonal = dkl2.size()[0]
        part = (1 + dkl2).pow(-1.0).sum() - n_diagonal
        # Compute the numerator
        # xi = self.topic(gumbel_sample(self.logits(i)))
        # xj = self.topic(gumbel_sample(self.logits(j)))
        xi = self.logits(i)
        xj = self.logits(j)
        num = ((1. + (xi - xj)**2.0).sum(1)).pow(-1.0).squeeze()
        qij = num / part.expand_as(num)
        # Compute KLD
        loss_kld = pij * (torch.log(pij) - torch.log(qij))
        # Compute Dirichlet likelihood
        # loss_dir = self.dirichlet_ll()
        return loss_kld.sum() # + loss_dir

    def __call__(self, *args):
        return self.forward(*args)

In [None]:
import random

import torch
import torch.optim as optim
from torch.autograd import Variable


def chunks(n, *args):
    """Yield successive n-sized chunks from l."""
    endpoints = []
    start = 0
    for stop in range(0, len(args[0]), n):
        if stop - start > 0:
            endpoints.append((start, stop))
            start = stop
    random.shuffle(endpoints)
    for start, stop in endpoints:
        yield [a[start: stop] for a in args]


class Wrapper():
    def __init__(self, model, cuda=True, log_interval=100, epochs=1000,
                 batchsize=1024):
        self.batchsize = batchsize
        self.epochs = epochs
        self.cuda = cuda
        self.model = model
        if cuda:
            self.model.cuda()
        self.optimizer = optim.Adam(model.parameters(), lr=1e-2)
        self.log_interval = log_interval

    def fit(self, *args):
        print("problem while training")
        self.model.train()
        if self.cuda:
            self.model.cuda()
        for epoch in range(self.epochs):
            total = 0.0
            for itr, datas in enumerate(chunks(self.batchsize, *args)):
                print(datas)
                datas = [Variable(torch.from_numpy(data)) for data in datas]
                if self.cuda:
                    datas = [data.cuda() for data in datas]
                self.optimizer.zero_grad()
                loss = self.model(*datas)
                print("Loss:",loss)
                loss.backward()
                self.optimizer.step()
                total += loss.item()
            msg = 'Train Epoch: {} \tLoss: {:.6e}'
            msg = msg.format(epoch, total / (len(args[0]) * 1.0))
            print(msg)

In [None]:
from sklearn import manifold, datasets
from sklearn.metrics.pairwise import pairwise_distances
from scipy.spatial.distance import squareform
from matplotlib.patches import Ellipse

import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import numpy as np

# from wrapper import Wrapper
# from tsne import TSNE
# from vtsne import VTSNE


def preprocess(perplexity=30, metric='euclidean'):
    """ Compute pairiwse probabilities for MNIST pixels.
    """
    digits = datasets.load_digits(n_class=6)
    pos = digits.data
    y = digits.target
    n_points = pos.shape[0]
    distances2 = pairwise_distances(pos, metric=metric, squared=True)
    # This return a n x (n-1) prob array
    pij = manifold._t_sne._joint_probabilities(distances2, perplexity, False)
    # Convert to n x n prob array
    pij = squareform(pij)
    return n_points, pij, y


draw_ellipse = True
n_points, pij2d, y = preprocess()
i, j = np.indices(pij2d.shape)

i = i.ravel()
j = j.ravel()
pij = pij2d.ravel().astype('float32')
# Remove self-indices
idx = i != j
i, j, pij = i[idx], j[idx], pij[idx]

n_topics = 2
n_dim = 2
print(n_points, n_dim, n_topics)

1083 2 2


In [None]:
model = VTSNE(n_points, n_topics, n_dim)
wrap = Wrapper(model, batchsize=4096, epochs=1)
for itr in range(500):
    # wrap.fit(pij, i, j)
    wrap.fit(pij,i,j)


    # Visualize the results
    embed = model.logits.weight.cpu().data.numpy()
    f = plt.figure()
    if not draw_ellipse:
        plt.scatter(embed[:, 0], embed[:, 1], c=y * 1.0 / y.max())
        plt.axis('off')
        plt.savefig('scatter_{:03d}.png'.format(itr), bbox_inches='tight')
        plt.close(f)
    else:
        # Visualize with ellipses
        var = np.sqrt(model.logits_lv.weight.clone().exp_().cpu().data.numpy())
        ax = plt.gca()
        for xy, (w, h), c in zip(embed, var, y):
            e = Ellipse(xy=xy, width=w, height=h, ec=None, lw=0.0)
            e.set_facecolor(plt.cm.Paired(c * 1.0 / y.max()))
            e.set_alpha(0.5)
            ax.add_artist(e)
        ax.set_xlim(-9, 9)
        ax.set_ylim(-9, 9)
        plt.axis('off')
        plt.savefig('scatter_{:03d}.png'.format(itr), bbox_inches='tight')
        plt.close(f)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[array([4.6101984e-09, 2.2956792e-08, 9.9866039e-11, ..., 1.3251568e-08,
       2.8926955e-10, 2.3619751e-09], dtype=float32), array([552, 552, 552, ..., 556, 556, 556]), array([753, 754, 755, ..., 517, 518, 519])]
Loss: tensor(0.0064, device='cuda:0', grad_fn=<AddBackward0>)
[array([4.3866137e-08, 1.2236535e-08, 6.5276184e-10, ..., 2.2937543e-08,
       1.2712831e-06, 2.3012625e-12], dtype=float32), array([787, 787, 787, ..., 791, 791, 791]), array([434, 435, 436, ..., 199, 200, 201])]
Loss: tensor(0.0062, device='cuda:0', grad_fn=<AddBackward0>)
[array([4.9129998e-13, 3.9405640e-10, 2.0541326e-10, ..., 2.4369935e-16,
       2.0347794e-07, 2.4352512e-12], dtype=float32), array([651, 651, 651, ..., 654, 654, 654]), array([130, 131, 132, ..., 978, 979, 980])]
Loss: tensor(0.0089, device='cuda:0', grad_fn=<AddBackward0>)
[array([2.7983985e-10, 4.6752970e-09, 5.4414928e-10, ..., 6.6828742e-13,
       3.7772594e-14, 1.8487328

In [None]:
# !pip install tsfresh tslearn 
from tsfresh.examples import load_robot_execution_failures
from tsfresh.examples.robot_execution_failures import download_robot_execution_failures
download_robot_execution_failures()
df_ts, y = load_robot_execution_failures()

In [None]:
from tsfresh import extract_features
from tsfresh import select_features
from tsfresh.utilities.dataframe_functions import impute
extracted_features = impute(extract_features(df_ts, column_id="id",
column_sort="time"))
features_filtered = select_features(extracted_features, y)

Feature Extraction: 100%|██████████| 528/528 [00:19<00:00, 27.55it/s]
 'F_x__partial_autocorrelation__lag_8'
 'F_x__partial_autocorrelation__lag_9' ...
 'T_z__matrix_profile__feature_"median"__threshold_0.98'
 'T_z__matrix_profile__feature_"25"__threshold_0.98'
 'T_z__matrix_profile__feature_"75"__threshold_0.98'] did not have any finite values. Filling with zeros.


In [None]:
print(df_ts[:10],y[:10])

   id  time  F_x  F_y  F_z  T_x  T_y  T_z
0   1     0   -1   -1   63   -3   -1    0
1   1     1    0    0   62   -3   -1    0
2   1     2   -1   -1   61   -3    0    0
3   1     3   -1   -1   63   -2   -1    0
4   1     4   -1   -1   63   -3   -1    0
5   1     5   -1   -1   63   -3   -1    0
6   1     6   -1   -1   63   -3    0    0
7   1     7   -1   -1   63   -3   -1    0
8   1     8   -1   -1   63   -3   -1    0
9   1     9   -1   -1   61   -3    0    0 1     True
2     True
3     True
4     True
5     True
6     True
7     True
8     True
9     True
10    True
dtype: bool


In [None]:
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
from tslearn.neighbors import KNeighborsTimeSeriesClassifier
knn = KNeighborsTimeSeriesClassifier()
param_search = {
    'metric' : ['dtw'],
    'n_neighbors': [1, 2, 3]
}
tscv = TimeSeriesSplit(n_splits=2)
gsearch = GridSearchCV(
    estimator=knn,
cv=tscv,
    param_grid=param_search
)
gsearch.fit(
    features_filtered,
    y
)


GridSearchCV(cv=TimeSeriesSplit(gap=0, max_train_size=None, n_splits=2, test_size=None),
             estimator=KNeighborsTimeSeriesClassifier(),
             param_grid={'metric': ['dtw'], 'n_neighbors': [1, 2, 3]})

In [None]:
print(gsearch.best_params_)

{'metric': 'dtw', 'n_neighbors': 2}


In [None]:
import pandas as pd
owid_covid = pd.read_csv("https://covid.ourworldindata.org/data/owid-covid-data.csv")
owid_covid["date"] = pd.to_datetime(owid_covid["date"])
df = owid_covid[owid_covid.location == "France"].set_index("date", drop=True).resample('D').interpolate(method='linear').reset_index()

In [None]:
from sklearn.base import TransformerMixin, BaseEstimator
class DateFeatures(TransformerMixin, BaseEstimator):
    features = [
        "hour",
        "year",
        "day",
        "weekday",
        "month",
        "quarter",
    ]
    def __init__(self):
        super().__init__()
    def transform(self, df: pd.DataFrame):
        Xt = []
        for col in df.columns:
            for feature in self.features:
                date_feature = getattr(
                    getattr(
                        df[col], "dt"
                    ), feature
                    )
            date_feature.name = f"{col}_{feature}"
            Xt.append(date_feature)
        df2 = pd.concat(Xt, axis=1)
        return df2
    def fit(self, df: pd.DataFrame, y=None, **fit_params):
        return self

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline, make_pipeline
preprocessor = ColumnTransformer(
transformers=[( "date",
               
        make_pipeline(
            DateFeatures(),
    ColumnTransformer(transformers=[ ("cyclical", CyclicalFeatures(),
    ["date_day", "date_weekday", "date_month"] )
    ], remainder="passthrough") ), ["date"],
      ),], remainder="passthrough"
    )

NameError: ignored