In [103]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim
from torch.nn.init import xavier_normal, kaiming_normal


import torchtext
from torchtext.data import Field, TabularDataset
from torchtext.data import Iterator, BucketIterator

import gensim

In [12]:

# model = gensim.models.KeyedVectors.load_word2vec_format()
# weights = torch.FloatTensor(model.vectors)

In [13]:
glove = torchtext.vocab.GloVe(name="6B", dim=50)

In [14]:
x = glove['cat']
y = glove['dog']
torch.norm(y - x)

torch.cosine_similarity(x.unsqueeze(0), y.unsqueeze(0))


tensor([0.9218])

In [15]:
word = 'cat'
other = ['dog', 'bike', 'kitten', 'puppy', 'kite', 'computer', 'neuron']
for w in other:
    dist = torch.norm(glove[word] - glove[w]) # euclidean distance
    print(w, float(dist))

dog 1.8846031427383423
bike 5.048375129699707
kitten 3.5068609714508057
puppy 3.0644655227661133
kite 4.210376262664795
computer 6.030652046203613
neuron 6.228669166564941


In [26]:
glove.itos

In [16]:
def print_closest_words(vec, n=5):
    dists = torch.norm(glove.vectors - vec, dim=1)     # compute distances to all words
    lst = sorted(enumerate(dists.numpy()), key=lambda x: x[1]) # sort by distance
    for idx, difference in lst[1:n+1]: # take the top n
        print(glove.itos[idx], difference)

print_closest_words(glove["cat"], n=10)

dog 1.8846031
rabbit 2.4572797
monkey 2.8102052
cats 2.8972247
rat 2.9455352
beast 2.9878407
monster 3.0022194
pet 3.0396757
snake 3.0617998
puppy 3.0644655


In [29]:
print_closest_words(glove['big'] - glove['small'] + glove['yes'])

yeah 4.121112
everybody 4.362761
guess 4.381542
hey 4.4264035
maybe 4.44897


In [49]:
! cat sklearn_data/1_1_Linear_Models.txt | head

<div class="section" id="linear-models">
<span id="linear-model"></span><h1>1.1. Linear Models<a class="headerlink" href="#linear-models" title="Permalink to this headline">¶</a></h1>
<p>The following are a set of methods intended for regression in which
the target value is expected to be a linear combination of the features.
In mathematical notation, if <span class="math notranslate nohighlight">\(\hat{y}\)</span> is the predicted
value.</p>
<div class="math notranslate nohighlight">
\[\hat{y}(w, x) = w_0 + w_1 x_1 + ... + w_p x_p\]</div>
<p>Across the module, we designate the vector <span class="math notranslate nohighlight">\(w = (w_1,
..., w_p)\)</span> as <code class="docutils literal notranslate"><span class="pre">coef_</span></code> and <span class="math notranslate nohighlight">\(w_0\)</span> as <code class="docutils literal notranslate"><span class="pre">intercept_</span></code>.</p>
cat: stdout: Broken pipe


In [36]:
from collections import Counter

In [57]:
sample = [
    "But as I wrote in my tweet, I agree with you that it's hard to police. I just disagree that the main losers will be U.S. consumers. I think the main losers will be poor people in poor countries.",
    "I would like to discuss several common exploration strategies in Deep RL here. As this is a very big topic, my post by no means can cover all the important subtopics. I plan to update it periodically and keep further enriching the content gradually in time.",
    "Good exploration becomes especially hard when the environment rarely provides rewards as feedback or the environment has distracting noise. Many exploration strategies are proposed to solve one or both of the following problems."
]

In [58]:
counter = Counter()
for comment in sample:
    counter.update(comment.split())

87

In [66]:
vocabulary = torchtext.vocab.Vocab(counter, max_size=len(counter), vectors=glove, specials=['<pad>', '<unk>'])
torch.zero_(vocabulary.vectors[1])

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0.])

In [67]:
print('Embedding vocab size: ', vocabulary.vectors.size(0))

Embedding vocab size:  89


Embedding(89, 50)

In [75]:

embedding = nn.Embedding.from_pretrained(vocabulary.vectors)
# Get embeddings for index 1
input_ = torch.LongTensor([2])
embedding(input_)

tensor([[ 4.1800e-01,  2.4968e-01, -4.1242e-01,  1.2170e-01,  3.4527e-01,
         -4.4457e-02, -4.9688e-01, -1.7862e-01, -6.6023e-04, -6.5660e-01,
          2.7843e-01, -1.4767e-01, -5.5677e-01,  1.4658e-01, -9.5095e-03,
          1.1658e-02,  1.0204e-01, -1.2792e-01, -8.4430e-01, -1.2181e-01,
         -1.6801e-02, -3.3279e-01, -1.5520e-01, -2.3131e-01, -1.9181e-01,
         -1.8823e+00, -7.6746e-01,  9.9051e-02, -4.2125e-01, -1.9526e-01,
          4.0071e+00, -1.8594e-01, -5.2287e-01, -3.1681e-01,  5.9213e-04,
          7.4449e-03,  1.7778e-01, -1.5897e-01,  1.2041e-02, -5.4223e-02,
         -2.9871e-01, -1.5749e-01, -3.4758e-01, -4.5637e-02, -4.4251e-01,
          1.8785e-01,  2.7849e-03, -1.8411e-01, -1.1514e-01, -7.8581e-01]])

In [104]:
class Net1(nn.Module):
    def __init__(self, input_dim, emb_dim, hidden_dim, glu_layers, kernel_size, dropout, device):
        super().__init__()
        
        self.input_dim = input_dim
        self.emb_dim = emb_dim
        self.hidden_dim = hidden_dim
        self.kernel_size = kernel_size
        self.dropout = dropout
        self.device = device
        
        self.ConvLayers = nn.ModuleList([
            nn.Conv1d(
                in_channels = hidden_dim, 
                out_channels = 2 * hidden_dim, 
                kernel_size = kernel_size, 
                padding = (kernel_size - 1) // 2
            )
            for _ in range(n_layers)
        ])
        
        self.max = nn.MaxPool1d(glu_layers) ## come back to this
        self.linear = nn.Linear() ## fill this in with dimensions
        
        self.dropout = nn.Dropout(dropout)
        
        self.res_scale = torch.sqrt(torch.FloatTensor([0.5])).to(device)
        
        
        def forward(self, x):
            
            for idx, conv in enumerate(self.ConvLayers):
                
                x_conv = conv(self.dropout(x))
                x_conv = F.glu(x_conv, dim=1)
                x_conv = (x_conv + x) * self.res_scale
                
                x = x_conv
                
            
            x = self.max(x)
            
            x = F.relu(self.linear(x))
            
            return x
                

In [80]:
torch.nn.modules.pooling._MaxPoolNd??

In [102]:
m = nn.MaxPool1d(5, stride=2)
input = torch.randn(1, 16, 50)
output = m(input)

print(input.shape, output.shape)

torch.Size([1, 16, 50]) torch.Size([1, 16, 23])


In [None]:
class Net2(nn.Module):
    def __init__(self)

In [None]:


class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 3x3 square convolution
        # kernel
        self.conv1 = nn.Conv2d(1, 6, 3)
        self.conv2 = nn.Conv2d(6, 16, 3)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 6 * 6, 120)  # 6*6 from image dimension
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


net = Net()
print(net)