In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [15]:
# use torchtext==0.3.1 only

In [2]:
TEXT = torch.load("vocab.pt")
LABELS = torch.load("label.pt")

In [3]:
print(TEXT.freqs.most_common(20))

[('.', 1154137), ('the', 1103728), (',', 795017), ('i', 627125), ('and', 598144), ('a', 558247), ('to', 536433), ('it', 495415), ('of', 441118), ('this', 405913), ('is', 394383), (':', 302786), ('in', 257578), ('for', 246061), ('!', 230782), ('that', 225025), ('was', 194573), ('you', 192227), ('not', 186662), ('"', 182002)]


In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

In [107]:
# create the model 
class CNN(nn.Module):
    def __init__(self, vocab_size, embedding_dim, n_filters, filter_sizes, output_dim, 
                 dropout, pad_idx):
        
        super().__init__()
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx = pad_idx)
        
        self.convs = nn.ModuleList([
                                    nn.Conv2d(in_channels = 1, 
                                              out_channels = n_filters, 
                                              kernel_size = (fs, embedding_dim)) 
                                    for fs in filter_sizes
                                    ])
        
        self.fc = nn.Linear(len(filter_sizes) * n_filters, output_dim)
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, text):
        
        #text = [sent len, batch size]

        # torch.permute is use to swap the axis
        text = text.permute(1, 0)
                
        #text = [batch size, sent len]
        
        embedded = self.embedding(text)
                
        #embedded = [batch size, sent len, emb dim]
        
        embedded = embedded.unsqueeze(1)
        
        #embedded = [batch size, 1, sent len, emb dim]
        
        conved = [F.relu(conv(embedded)).squeeze(3) for conv in self.convs]
            
        #conv_n = [batch size, n_filters, sent len - filter_sizes[n]]
        
        pooled = [F.max_pool1d(conv, conv.shape[2]).squeeze(2) for conv in conved]
        
        #pooled_n = [batch size, n_filters]
        
        cat = self.dropout(torch.cat(pooled, dim = 1))

        #cat = [batch size, n_filters * len(filter_sizes)]
            
        return self.fc(cat)

In [108]:
INPUT_DIM = len(TEXT)
EMBEDDING_DIM = 200
N_FILTERS = 100
FILTER_SIZES = [2,3,4]
OUTPUT_DIM = 1 # for just binary classification
# OUTPUT_DIM = len(LABEL.vocab) # for multiclass classification uncomment this
DROPOUT = 0.0
PAD_IDX = 1

model = CNN(INPUT_DIM, EMBEDDING_DIM, N_FILTERS, FILTER_SIZES, OUTPUT_DIM, DROPOUT, PAD_IDX)
print('INPUT_DIM:',INPUT_DIM,"\nEMBEDDING_DIM:",EMBEDDING_DIM,"\nN_FILTERS:",N_FILTERS,"\nFILTER_SIZES:",FILTER_SIZES,"\nOUTPUT_DIM:",OUTPUT_DIM,"\nPAD_IDX:",PAD_IDX)

INPUT_DIM: 100002 
EMBEDDING_DIM: 200 
N_FILTERS: 100 
FILTER_SIZES: [2, 3, 4] 
OUTPUT_DIM: 1 
PAD_IDX: 1


In [109]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 20,181,001 trainable parameters


In [110]:
model.load_state_dict(torch.load('tut5-model.pt'))

RuntimeError: CUDA error: unspecified launch failure

In [9]:
model = model.to(device)

In [10]:
import spacy
nlp = spacy.load('en')

def predict_class(model, sentence, min_len = 4):
    model.eval()
    tokenized = [tok.text for tok in nlp.tokenizer(sentence)]
    if len(tokenized) < min_len:
        tokenized += ['<pad>'] * (min_len - len(tokenized))
    indexed = [TEXT.stoi[t] for t in tokenized]
    tensor = torch.LongTensor(indexed).to(device)
    tensor = tensor.unsqueeze(1)
    preds = torch.sigmoid(model(tensor))
    #max_preds = preds.argmax(dim = 1) # for multiclass
    return preds[0][0]

In [100]:
x = input()

this is a nice product


In [102]:
predict_class(model, x)*5

RuntimeError: CUDA error: unspecified launch failure

In [93]:
pred_class = round(float(predict_class(model, x)))
print(f'Predicted class is: {pred_class} = {LABELS.itos[pred_class]}')

Predicted class is: 0 = neg


In [None]:
I bought this for my husband who plays the piano.  He is having a wonderful time playing these old hymns.  The music  is at times hard to read because we think the book was published for singing from more than playing from.  Great purchase though!