In [1]:
#https://github.com/rguthrie3/DeepLearningForNLPInPytorch/blob/master/Deep%20Learning%20for%20Natural%20Language%20Processing%20with%20Pytorch.ipynb 

import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

torch.manual_seed(1)

<torch._C.Generator at 0x106d9cc70>

# 1. Torch Library 소개

In [2]:
# Create a torch.Tensor object with the given data.  It is a 1D vector
V_data = [1., 2., 3.]
V = torch.Tensor(V_data)
print(V)

# Creates a matrix
M_data = [[1., 2., 3.], [4., 5., 6]]
M = torch.Tensor(M_data)
print(M)

# Create a 3D tensor of size 2x2x2.
T_data = [[[1.,2.], [3.,4.]],
          [[5.,6.], [7.,8.]]]
T = torch.Tensor(T_data)
print(T)


 1
 2
 3
[torch.FloatTensor of size 3]


 1  2  3
 4  5  6
[torch.FloatTensor of size 2x3]


(0 ,.,.) = 
  1  2
  3  4

(1 ,.,.) = 
  5  6
  7  8
[torch.FloatTensor of size 2x2x2]



In [3]:
# Index into V and get a scalar
print(V[0])

# Index into M and get a vector
print(M[0])

# Index into T and get a matrix
print(T[0])

1.0

 1
 2
 3
[torch.FloatTensor of size 3]


 1  2
 3  4
[torch.FloatTensor of size 2x2]



In [4]:
#Default는 Float입니다.
x = torch.randn((3, 4, 5))
print(x)


(0 ,.,.) = 
  0.6614  0.2669  0.0617  0.6213 -0.4519
 -0.1661 -1.5228  0.3817 -1.0276 -0.5631
 -0.8923 -0.0583 -0.1955 -0.9656  0.4224
  0.2673 -0.4212 -0.5107 -1.5727 -0.1232

(1 ,.,.) = 
  3.5870 -1.8313  1.5987 -1.2770  0.3255
 -0.4791  1.3790  2.5286  0.4107 -0.9880
 -0.9081  0.5423  0.1103 -2.2590  0.6067
 -0.1383  0.8310 -0.2477 -0.8029  0.2366

(2 ,.,.) = 
  0.2857  0.6898 -0.6331  0.8795 -0.6842
  0.4533  0.2912 -0.8317 -0.5525  0.6355
 -0.3968 -0.6571 -1.6428  0.9803 -0.0421
 -0.8206  0.3133 -1.1352  0.3773 -0.2824
[torch.FloatTensor of size 3x4x5]



In [5]:
# By default, it concatenates along the first axis (concatenates rows)
# 간단하게 말해서 그냥 아래에 쌓는 기능
x_1 = torch.randn(2, 5)
y_1 = torch.randn(3, 5)
z_1 =torch.cat([x_1, y_1])
print(z_1)

# Concatenate columns:
# Column 형으로 쌓는 기능
x_2 = torch.randn(2, 3)
y_2 = torch.randn(2, 5)
z_2 = torch.cat([x_2, y_2], 1) # second arg specifies which axis to concat along
print(z_2)

# If your tensors are not compatible, torch will complain.  Uncomment to see the error
# torch.cat([x_1, x_2])


-2.5667 -1.4303  0.5009  0.5438 -0.4057
 1.1341 -1.1115  0.3501 -0.7703 -0.1473
 0.6272  1.0935  0.0939  1.2381 -1.3459
 0.5119 -0.6933 -0.1668 -0.9999 -1.6476
 0.8098  0.0554  1.1340 -0.5326  0.6592
[torch.FloatTensor of size 5x5]


-1.5964 -0.3769 -3.1020 -0.0020 -1.0952  0.6016  0.6984 -0.8005
-0.0995 -0.7213  1.2708  1.5381  1.4673  1.5951 -1.5279  1.0156
[torch.FloatTensor of size 2x8]



# Reshape Tensor

In [6]:
x = torch.randn(2, 3, 4)
print(x)
print(x.view(2, 12)) # Reshape to 2 rows, 12 columns
print(x.view(2, -1)) # Same as above.  If one of the dimensions is -1, its size can be inferred


(0 ,.,.) = 
 -0.2020 -1.2865  0.8231 -0.6101
 -1.2960 -0.9434  0.6684  1.1628
 -0.3229  1.8782 -0.5666  0.4016

(1 ,.,.) = 
 -0.1153  0.3170  0.5629  0.8662
 -0.3528  0.3482  1.1371 -0.3339
 -1.4724  0.7296 -0.1312 -0.6368
[torch.FloatTensor of size 2x3x4]



Columns 0 to 9 
-0.2020 -1.2865  0.8231 -0.6101 -1.2960 -0.9434  0.6684  1.1628 -0.3229  1.8782
-0.1153  0.3170  0.5629  0.8662 -0.3528  0.3482  1.1371 -0.3339 -1.4724  0.7296

Columns 10 to 11 
-0.5666  0.4016
-0.1312 -0.6368
[torch.FloatTensor of size 2x12]



Columns 0 to 9 
-0.2020 -1.2865  0.8231 -0.6101 -1.2960 -0.9434  0.6684  1.1628 -0.3229  1.8782
-0.1153  0.3170  0.5629  0.8662 -0.3528  0.3482  1.1371 -0.3339 -1.4724  0.7296

Columns 10 to 11 
-0.5666  0.4016
-0.1312 -0.6368
[torch.FloatTensor of size 2x12]



# 2. Computation Graphs & Auto. Diff

In [7]:
# Variables wrap tensor objects
x = autograd.Variable( torch.Tensor([1., 2., 3]), requires_grad=True )
# You can access the data with the .data attribute
print(x.data)

# You can also do all the same operations you did with tensors with Variables.
y = autograd.Variable( torch.Tensor([4., 5., 6]), requires_grad=True )
z = x + y
print(z.data)

# BUT z knows something extra.
print(z.grad_fn)


 1
 2
 3
[torch.FloatTensor of size 3]


 5
 7
 9
[torch.FloatTensor of size 3]

<AddBackward1 object at 0x1064c00b8>


In [8]:
# Lets sum up all the entries in z
s = z.sum()
print(s)
print(s.grad_fn)

Variable containing:
 21
[torch.FloatTensor of size 1]

<SumBackward0 object at 0x10655b780>


In [9]:
s.backward() # calling .backward() on any variable will run backprop, starting from it.
print(x.grad)

Variable containing:
 1
 1
 1
[torch.FloatTensor of size 3]



# 아래 관련된 정보를 이해해야함. 매우 중요함

In [10]:
x = torch.randn((2,2))
y = torch.randn((2,2))
z = x + y # These are Tensor types, and backprop would not be possible

var_x = autograd.Variable( x )
var_y = autograd.Variable( y )
var_z = var_x + var_y # var_z contains enough information to compute gradients, as we saw above
print(var_z.grad_fn)

var_z_data = var_z.data # Get the wrapped Tensor object out of var_z...
new_var_z = autograd.Variable( var_z_data ) # Re-wrap the tensor in a new variable

# ... does new_var_z have information to backprop to x and y?
# NO!
print(new_var_z.grad_fn)
# And how could it?  We yanked the tensor out of var_z (that is what var_z.data is).  This tensor
# doesn't know anything about how it was computed.  We pass it into new_var_z, and this is all the information
# new_var_z gets.  If var_z_data doesn't know how it was computed, theres no way new_var_z will.
# In essence, we have broken the variable away from its past history

None
None


# 3. Deep Learning Building Blocks: Affine maps, non-linearities and obj

In [11]:
#Affine Maps: f(x) + Ax + b

lin = nn.Linear(5,3) #Param A,b
data = autograd.Variable( torch.randn(2, 5)) #data 2x5.

print(lin(data))

Variable containing:
 0.4491 -0.1994  0.0853
-0.0392  0.2063  0.0476
[torch.FloatTensor of size 2x3]



In [12]:
# In pytorch, most non-linearities are in torch.functional (we have it imported as F)
data = autograd.Variable( torch.randn(2, 2) )
print(data)
print(F.relu(data))

Variable containing:
-0.1277 -0.9501
 1.4064 -0.4549
[torch.FloatTensor of size 2x2]

Variable containing:
 0.0000  0.0000
 1.4064  0.0000
[torch.FloatTensor of size 2x2]



In [13]:
#Softmax and Prob

data = autograd.Variable( torch.randn(5) )
print(data)
print(F.softmax(data))
print(F.softmax(data).sum())
print(F.log_softmax(data))

Variable containing:
 0.5838
-0.9699
-0.9469
-0.2330
 0.6424
[torch.FloatTensor of size 5]

Variable containing:
 0.3413
 0.0722
 0.0739
 0.1508
 0.3619
[torch.FloatTensor of size 5]

Variable containing:
 1
[torch.FloatTensor of size 1]

Variable containing:
-1.0750
-2.6287
-2.6057
-1.8918
-1.0164
[torch.FloatTensor of size 5]



  """
  
  import sys


# 4. Optimization & Training
# 5. Creating NW Components in Pytorch

In [14]:
# Logistic Regression Bag-of-Words classifier

data = [ ("me gusta comer en la cafeteria".split(), "SPANISH"),
         ("Give it to me".split(), "ENGLISH"),
         ("No creo que sea una buena idea".split(), "SPANISH"),
         ("No it is not a good idea to get lost at sea".split(), "ENGLISH") ]

test_data = [ ("Yo creo que si".split(), "SPANISH"),
              ("it is lost on me".split(), "ENGLISH")]

In [15]:
# word_to_ix maps each word in the vocab to a unique integer, which will be its
# index into the Bag of words vector
word_to_ix = {}
for sent, _ in data + test_data:
    for word in sent:
        if word not in word_to_ix:
            word_to_ix[word] = len(word_to_ix)
            #print(len(word_to_ix))

print(word_to_ix)

VOCAB_SIZE = len(word_to_ix)
NUM_LABELS = 2

{'me': 0, 'gusta': 1, 'comer': 2, 'en': 3, 'la': 4, 'cafeteria': 5, 'Give': 6, 'it': 7, 'to': 8, 'No': 9, 'creo': 10, 'que': 11, 'sea': 12, 'una': 13, 'buena': 14, 'idea': 15, 'is': 16, 'not': 17, 'a': 18, 'good': 19, 'get': 20, 'lost': 21, 'at': 22, 'Yo': 23, 'si': 24, 'on': 25}


In [16]:
nn.Module?

In [18]:
class BoWClassifier(nn.Module): # inheriting from nn.Module!
    
    def __init__(self, num_labels, vocab_size):
        # calls the init function of nn.Module.  Dont get confused by syntax,
        # just always do it in an nn.Module
        super(BoWClassifier, self).__init__()
        
        # Define the parameters that you will need.  In this case, we need A and b,
        # the parameters of the affine mapping.
        # Torch defines nn.Linear(), which provides the affine map.
        # Make sure you understand why the input dimension is vocab_size
        # and the output is num_labels!
        self.linear = nn.Linear(vocab_size, num_labels)
        
        # NOTE! The non-linearity log softmax does not have parameters! So we don't need
        # to worry about that here
        
    def forward(self, bow_vec):
        # Pass the input through the linear layer,
        # then pass that through log_softmax.
        # Many non-linearities and other functions are in torch.nn.functional
        return F.log_softmax(self.linear(bow_vec))

In [19]:
def make_bow_vector(sentence, word_to_ix):
    vec = torch.zeros(len(word_to_ix))
    for word in sentence:
        vec[word_to_ix[word]] += 1
    return vec.view(1, -1)

def make_target(label, label_to_ix):
    return torch.LongTensor([label_to_ix[label]])

In [20]:
model = BoWClassifier(NUM_LABELS, VOCAB_SIZE)

In [22]:
for param in model.parameters():
    print(param)

Parameter containing:

Columns 0 to 9 
-0.0372 -0.0723  0.0818 -0.0668 -0.1102  0.0445 -0.1418 -0.0419  0.1002  0.0733
-0.1794 -0.1151  0.1199  0.0430 -0.0692 -0.0741  0.1253  0.1414  0.1889  0.0572

Columns 10 to 19 
 0.1670 -0.1338  0.0017 -0.0579 -0.1097 -0.1103  0.0494  0.0205 -0.1001 -0.1382
 0.0947 -0.0158 -0.1147 -0.1921  0.1195 -0.0285  0.0806  0.0097 -0.1830 -0.0962

Columns 20 to 25 
-0.1625  0.1922 -0.0334 -0.0453 -0.0721 -0.0981
 0.0552 -0.0397 -0.1894  0.0688  0.0075 -0.0630
[torch.FloatTensor of size 2x26]

Parameter containing:
1.00000e-02 *
  7.9225
  4.1404
[torch.FloatTensor of size 2]



In [26]:
# To run the model, pass in a BoW vector, but wrapped in an autograd.Variable
sample = data[0]
bow_vector = make_bow_vector(sample[0], word_to_ix)
log_probs = model(autograd.Variable(bow_vector))
print(log_probs)

Variable containing:
-0.6198 -0.7723
[torch.FloatTensor of size 1x2]





In [29]:
label_to_ix = { "SPANISH": 0, "ENGLISH": 1 }

In [35]:
#Train 시키기
for instance, label in test_data:
    bow_vec = autograd.Variable(make_bow_vector(instance, word_to_ix))
    log_probs = model(bow_vec)
    print(log_probs)

print(next(model.parameters())[:,word_to_ix["creo"]])

Variable containing:
-0.7991 -0.5974
[torch.FloatTensor of size 1x2]

Variable containing:
-0.6153 -0.7776
[torch.FloatTensor of size 1x2]

Variable containing:
 0.1670
 0.0947
[torch.FloatTensor of size 2]





In [49]:
loss_function = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

# Usually you want to pass over the training data several times.
# 100 is much bigger than on a real data set, but real datasets have more than
# two instances.  Usually, somewhere between 5 and 30 epochs is reasonable.
for epoch in range(100):
    for instance, label in data:
        # Step 1. 파이토치는 gradients를 쌓으므로 처음에 초기화 하자
        # before each instance
        model.zero_grad()
    
        # Step 2. Make our BOW vector and also we must wrap the target in a Variable
        # as an integer.  For example, if the target is SPANISH, then we wrap the integer
        # 0.  The loss function then knows that the 0th element of the log probabilities is
        # the log probability corresponding to SPANISH
        bow_vec = autograd.Variable(make_bow_vector(instance, word_to_ix))
        target = autograd.Variable(make_target(label, label_to_ix))
    
        # Step 3. Run our forward pass.
        log_probs = model(bow_vec)
    
        # Step 4. Compute the loss, gradients, and update the parameters by calling
        # optimizer.step()
        loss = loss_function(log_probs, target)
        loss.backward()
        optimizer.step()



In [51]:
for instance, label in test_data:
    bow_vec = autograd.Variable(make_bow_vector(instance, word_to_ix))
    log_probs = model(bow_vec)
    print(log_probs)
print(next(model.parameters())[:,word_to_ix["creo"]]) # Index corresponding to Spanish goes up, English goes down

Variable containing:
-0.1232 -2.1547
[torch.FloatTensor of size 1x2]

Variable containing:
-2.7729 -0.0645
[torch.FloatTensor of size 1x2]

Variable containing:
 0.6655
-0.4038
[torch.FloatTensor of size 2]





[(['Yo', 'creo', 'que', 'si'], 'SPANISH'),
 (['it', 'is', 'lost', 'on', 'me'], 'ENGLISH')]