#### Import des librairies

In [1]:
import pandas as pd

#### Chargement des données

In [2]:
df_train = pd.read_csv('./data/sentiment-train',sep='\t',header=None,names=['text','label'])
df_test = pd.read_csv('./data/sentiment-test',sep='\t',header=None,names=['text','label'])

#### Création des vecteurs One Hot

In [3]:
from sklearn.feature_extraction.text import CountVectorizer
count_vect = CountVectorizer(max_features=1000)
X_train_counts = count_vect.fit_transform(df_train.text)
X_test_counts = count_vect.transform(df_test.text)

### PyTorch

In [4]:
import torch

#### Transformation des objets numpy and tensor

In [5]:
tensor_train = torch.Tensor(X_train_counts.toarray())
tensor_label = torch.LongTensor(df_train.label)

In [6]:
tensor_train

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

#### Tensors

In [7]:
tensor1 = torch.tensor([1.4], requires_grad=True)

In [8]:
tensor1

tensor([1.4000], requires_grad=True)

In [9]:
tensor2 = tensor1 * 2

In [10]:
tensor2

tensor([2.8000], grad_fn=<MulBackward0>)

In [11]:
tensor3 = tensor2.mean()

In [12]:
tensor3.backward()

In [13]:
tensor1.grad

tensor([2.])

In [14]:
tensor1

tensor([1.4000], requires_grad=True)

#### Tensors on GPU

In [15]:
device = torch.device('cuda')

In [16]:
tensor1 = torch.tensor([1.4], requires_grad=True, device = device)

In [17]:
tensor1

tensor([1.4000], device='cuda:0', requires_grad=True)

#### Device agnostic

In [18]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#### Définition du modèle

In [19]:
import torch.nn as nn

In [20]:
import torch.nn.functional as F

class Perceptron(nn.Module):
    def __init__(self):
        super(Perceptron,self).__init__()
        self.fc1 = nn.Linear(1000,2)
        
    def forward(self,x):
        return self.fc1(x)


#### Creation du modele

In [21]:
net = Perceptron().to(device)

In [22]:
net.fc1.weight

Parameter containing:
tensor([[ 0.0044, -0.0146, -0.0165,  ..., -0.0069,  0.0134,  0.0254],
        [-0.0076,  0.0120,  0.0084,  ...,  0.0112, -0.0296, -0.0098]],
       device='cuda:0', requires_grad=True)

#### Preparation du chargement des données

In [23]:
from torch.utils.data import TensorDataset, DataLoader
train_dataset = TensorDataset(tensor_train,tensor_label)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [24]:
batch = next(iter(train_dataloader))

In [25]:
batch

[tensor([[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]),
 tensor([1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1,
         1, 1, 1, 1, 1, 1, 0, 1])]

#### Definition de la loss et de l'optimizer

In [26]:
criterion = nn.CrossEntropyLoss()

In [27]:
import torch.optim as optim
optimizer = optim.Adam(net.parameters(), lr=0.01)

#### Boucle d'apprentissage

In [28]:
%%time
for epoch in range(10):
    for data in train_dataloader:
        inputs,labels = data

        outputs = net(inputs.to(device))
        loss = criterion(outputs,labels.to(device))
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

CPU times: user 15.4 s, sys: 611 ms, total: 16 s
Wall time: 16 s


### Mesure des performances

#### Chargement des données de test

In [29]:
test_dataset = TensorDataset(torch.Tensor(X_test_counts.toarray()), torch.LongTensor(df_test.label))
test_dataloader = DataLoader(test_dataset, batch_size=256, shuffle=True)

In [30]:
from sklearn.metrics import accuracy_score

#### Predictions sur le jeu de test

In [31]:
import numpy as np
all_labels = []
all_preds = []

with torch.no_grad():
    for data in test_dataloader:
        inputs, labels = data
        outputs = net(inputs.to(device))
        _, predicted = torch.max(outputs.data, 1)
        all_preds.append(predicted.cpu().numpy())
        all_labels.append(labels.cpu().numpy())

all_labels = np.concatenate(all_labels)
all_preds = np.concatenate(all_preds)

In [32]:
accuracy_score(all_labels,all_preds)

0.7443276148312119