In [1]:
import torch
import torch.autograd as autograd 
import torch.nn as nn 
import torch.nn.functional as F 
import torch.optim as optim 

In [2]:
d = [
        [[1,2,3],[4,5,6]],
        [[7,8,9],[10,11,12]]
    ]
d = torch.Tensor(d)
print("shape:",d.size())

print("sum", d[0]+d[1])

print(torch.randn([2,3]))

shape: torch.Size([2, 2, 3])
sum 
  8  10  12
 14  16  18
[torch.FloatTensor of size 2x3]


 0.2931 -0.8245  0.3434
 0.4421 -1.7660 -0.1732
[torch.FloatTensor of size 2x3]



In [3]:
# Autograd genera la computation graph que calcula los backpropagation gradient eficientemente. Una computation graph especifica
# como se combinan los datos para dar el output, y como la graph especifica los parametros involucrados en las operaciones tiene
# todos los datos para calcular las derivadas. Se usa la clase autograd.Variable
first = autograd.Variable(data=torch.Tensor([[1,2,3],[4,5,6]]), requires_grad=True)
first = first * 2
print(first.data)

aux = autograd.Variable(data=torch.randn([5,6]))
print(aux)
print(F.relu(aux))

print(F.softmax(aux))
print(F.softmax(aux).sum())# debe devolver 1, ya que softmax pasa la matrix a probabilidades y la suma debe ser 1


  2   4   6
  8  10  12
[torch.FloatTensor of size 2x3]

Variable containing:
 0.5434  1.2296 -0.0942  1.9574 -0.1271  1.7451
-0.2989  1.3809  1.2106  1.9495  0.4360  0.1652
 0.3684  0.9500 -0.0401 -0.3565 -0.6574 -2.0002
 0.6157  0.3171  1.5491 -1.6937  1.9789  0.9560
-1.0528  1.3535  0.3907  1.8049  0.3101  0.8006
[torch.FloatTensor of size 5x6]

Variable containing:
 0.5434  1.2296  0.0000  1.9574  0.0000  1.7451
 0.0000  1.3809  1.2106  1.9495  0.4360  0.1652
 0.3684  0.9500  0.0000  0.0000  0.0000  0.0000
 0.6157  0.3171  1.5491  0.0000  1.9789  0.9560
 0.0000  1.3535  0.3907  1.8049  0.3101  0.8006
[torch.FloatTensor of size 5x6]

Variable containing:
 0.0872  0.1733  0.0461  0.3587  0.0446  0.2901
 0.0416  0.2232  0.1882  0.3941  0.0868  0.0662
 0.2278  0.4075  0.1514  0.1103  0.0817  0.0213
 0.1031  0.0765  0.2622  0.0102  0.4030  0.1449
 0.0227  0.2519  0.0962  0.3956  0.0887  0.1449
[torch.FloatTensor of size 5x6]

Variable containing:
 5
[torch.FloatTensor of size 1]



In [4]:
data = [ ("me gusta comer en la cafeteria".split(), "SPANISH"),
         ("Give it to me".split(), "ENGLISH"),
         ("No creo que sea una buena idea".split(), "SPANISH"),
         ("No it is not a good idea to get lost at sea".split(), "ENGLISH") ]

test_data = [ ("Yo creo que si".split(), "SPANISH"),
              ("it is lost on me".split(), "ENGLISH")]

word_to_index = {}
for frase, idioma in data + test_data:
    for palabra in frase:
        if palabra not in word_to_index:
            word_to_index[palabra] = len(word_to_index)
print(word_to_index) # Imprimimos la bag of words

VOCAB_SIZE = len(word_to_index)
NUM_LABELS = 2

{'me': 0, 'gusta': 1, 'comer': 2, 'en': 3, 'la': 4, 'cafeteria': 5, 'Give': 6, 'it': 7, 'to': 8, 'No': 9, 'creo': 10, 'que': 11, 'sea': 12, 'una': 13, 'buena': 14, 'idea': 15, 'is': 16, 'not': 17, 'a': 18, 'good': 19, 'get': 20, 'lost': 21, 'at': 22, 'Yo': 23, 'si': 24, 'on': 25}


In [5]:
# Source: https://github.com/rguthrie3/DeepLearningForNLPInPytorch
class BagsOfWordsClassifier(nn.Module): # hereda de nn.module - contenedor basico de neural networks
    def __init__(self, num_labels, vocab_size):
        super(BagsOfWordsClassifier, self).__init__()
        
        #Linear proporciona el affine map, aplica una transformacion lineal a los datos, y = Ax + b, matrix A y vectores x y b
        #Parametros a aprender son A y b (se le suele llamar bias)
        self.linear = nn.Linear(vocab_size, num_labels)
    
    def forward(self, bow_vec):
        """
    In the forward pass we receive a Tensor containing the input and return a
    Tensor containing the output. You can cache arbitrary Tensors for use in the
    backward pass using the save_for_backward method.
        """
        # Pasamos los valores por la funcion lineal, y luego por la no lineal para devolver probabilidades
        return F.log_softmax(self.linear(bow_vec))
    
        #No hay backward xk cogemos el predefinido, pero si lo quisieramos hacer:
        """
    In the backward pass we receive a Tensor containing the gradient of the loss
    with respect to the output, and we need to compute the gradient of the loss
    with respect to the input.
        """

In [6]:
def make_bow_vector(sentence, word_to_index):
    vec = torch.zeros(len(word_to_index))
    for palabra in sentence:
        vec[word_to_index[palabra]] += 1
    return vec.view(1,-1)

def make_target(label, label_to_index):
    return torch.LongTensor([label_to_index[label]])

In [7]:
model = BagsOfWordsClassifier(NUM_LABELS,VOCAB_SIZE)
for param in model.parameters(): # Los parametros mostrados son A(matriz) y b(vector) 
    print(param.data)




Columns 0 to 9 
-0.0303 -0.0996 -0.1653 -0.0097 -0.1270  0.1892  0.0897 -0.0550 -0.0498  0.1454
 0.1961  0.0792 -0.1286  0.0566  0.0367 -0.0973  0.1507  0.1139 -0.1598  0.1311

Columns 10 to 19 
-0.0269  0.0622 -0.0011 -0.1049 -0.1298 -0.1307 -0.1113  0.1915 -0.1748  0.0766
-0.0178 -0.1017 -0.1327  0.1619  0.0461 -0.0503 -0.0724 -0.0724 -0.1491  0.0447

Columns 20 to 25 
-0.0886  0.0763 -0.0632  0.1349  0.0955 -0.1251
 0.0622  0.0522 -0.0727  0.1604 -0.1952 -0.1476
[torch.FloatTensor of size 2x26]


 0.1611
 0.0981
[torch.FloatTensor of size 2]



In [8]:
# To run the model, pass in a BoW vector, but wrapped in an autograd.Variable
sample = data[0]
bow_vector = make_bow_vector(sample[0], word_to_index)
log_probs = model(autograd.Variable(bow_vector))
print (log_probs)
 
# El print devuelve un par de numeros (español o ingles) pero cada numero de que es?
# Definimos el 1er indice como español, y el 2do como ingles
label_to_index = { "SPANISH":0, "ENGLISH":1}

Variable containing:
-0.8673 -0.5449
[torch.FloatTensor of size 1x2]



In [9]:
#Opcional, test data antes del entrenamiento, para ver el antes y el despues
for instance, label in test_data:
    bow_vec = autograd.Variable(make_bow_vector(instance, word_to_index))
    log_probs = model(bow_vec)
    print (log_probs)
print( next(model.parameters())[:,word_to_index["creo"]] )# Print the matrix column corresponding to "creo"

Variable containing:
-0.4805 -0.9635
[torch.FloatTensor of size 1x2]

Variable containing:
-0.8686 -0.5439
[torch.FloatTensor of size 1x2]

Variable containing:
1.00000e-02 *
 -2.6949
 -1.7846
[torch.FloatTensor of size 2]



In [10]:
#Entrenamos, para ello calculamos la probabilidad como antes, calculamos la funcion de perdida y el gradiente de la misma.
#Por ultimo actualizamos los parametros con un paso de gradiente. 

loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.05)

#Normalmente se hace el entrenamiento varias veces(5 a 30 veces)
#100 es demasiado, pero los datasets reales tb tienen mas de 2 instancias
for epoch in range(100):
    for instance, label in data:
        # 1. Borrar contenido de los gradientes anteriores antes de cada instancia, debido a que pytorch los acumula.
        model.zero_grad()
        
        # 2. Crear vector BOW, y el target debe encapsularse en una Variable como un entero, asi que si vemos SPANISH sera 0
        # De esta manera la fx de perdida sabe que el elemento 0 es probabilidad de Spanish
        bow_vec = autograd.Variable(make_bow_vector(instance, word_to_index), requires_grad=False)
        target = autograd.Variable(make_target(label, label_to_index), requires_grad=False)
        
        # 3. Ejecutar el forward pass
        log_probs = model(bow_vec)
        
        # 4. Calcular la perdida, gradientes, y actualizar los parametros llamando a optimizer.step
        loss = loss_function(log_probs, target)
        loss.backward()
        optimizer.step()
        

In [11]:
for instance, label in test_data:
    bow_vec = autograd.Variable(make_bow_vector(instance,word_to_index))
    log_probs = model(bow_vec)
    print(log_probs)
print(next(model.parameters())[:,word_to_index["to"]])
#Vemos como el 1er indice, el correspondiente a SPANISH sube mas que el de ENGLISH

Variable containing:
-0.1010 -2.3425
[torch.FloatTensor of size 1x2]

Variable containing:
-2.5055 -0.0852
[torch.FloatTensor of size 1x2]

Variable containing:
-0.7142
 0.5046
[torch.FloatTensor of size 2]

