In [2]:
from __future__ import print_function
import torch

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# backward gradient is automatically defined (with autograd) when you define forward!

class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input channel, 6 output channels, 3x3 square convo
        # kernel
        self.conv1 = nn.Conv2d(1, 6, 3)
        self.conv2 = nn.Conv2d(6, 16, 3)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 6 * 6, 120)  # 6*6 from input dimension
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dims except the batch
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=576, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [7]:
# The learnable parameters of a model are returned by net.parameters()
params = list(net.parameters())
print(len(params))
print(params[0].size())  # conv1's .weight

10
torch.Size([6, 1, 3, 3])


In [None]:
## CountVectorizer in sklearn, just for fun

In [20]:
sample = ["call you in a sec", "call me a cab please", "please don't call me Shirley"]

In [21]:
from sklearn.feature_extraction.text import CountVectorizer
vect = CountVectorizer()

In [22]:
vect.fit(sample) 
# follows same pattern as all sklearn estimators
# fit the vectorizer, not a "model" per se
# learns the vocabulary: the relationship between x and y

CountVectorizer(analyzer='word', binary=False, decode_error='strict',
                dtype=<class 'numpy.int64'>, encoding='utf-8', input='content',
                lowercase=True, max_df=1.0, max_features=None, min_df=1,
                ngram_range=(1, 1), preprocessor=None, stop_words=None,
                strip_accents=None, token_pattern='(?u)\\b\\w\\w+\\b',
                tokenizer=None, vocabulary=None)

In [23]:
import pandas as pd

In [24]:
vect.get_feature_names() # this is the fitted vocabulary

['cab', 'call', 'don', 'in', 'me', 'please', 'sec', 'shirley', 'you']

In [25]:
sample_dtm = vect.transform(sample)

In [26]:
sample_dtm

<3x9 sparse matrix of type '<class 'numpy.int64'>'
	with 13 stored elements in Compressed Sparse Row format>

In [27]:
sample_dtm.toarray()

array([[0, 1, 0, 1, 0, 0, 1, 0, 1],
       [1, 1, 0, 0, 1, 1, 0, 0, 0],
       [0, 1, 1, 0, 1, 1, 0, 1, 0]])

In [28]:
pd.DataFrame(sample_dtm.toarray(), columns=vect.get_feature_names())

Unnamed: 0,cab,call,don,in,me,please,sec,shirley,you
0,0,1,0,1,0,0,1,0,1
1,1,1,0,0,1,1,0,0,0
2,0,1,1,0,1,1,0,1,0
