# BERT

In this notebook, I will use Google's BERT (Bidrectional Encoder Representations from Transformers), which uses the attention. I'll try use PyTorch instead of Tensorflow.

In [34]:
import pandas as pd
import numpy as np

import torch
from torch.utils.data import *

from transformers import BertTokenizer, BertForSequenceClassification, AdamW

#from pytorch_pretrained_bert import BertTokenizer, BertConfig
#from pytorch_pretrained_bert import BertAdam, BertForSequenceClassification

In [20]:
X_train = pd.read_pickle('PKL/X_train_fin.pkl')
X_val = pd.read_pickle('PKL/X_val_fin.pkl')
y_train = pd.read_csv('DATA/y_train.csv', index_col=0)
y_val = pd.read_csv('DATA/y_val.csv', index_col=0)

In [21]:
X_train = X_train['tweet']
X_val = X_val['tweet']

In [22]:
y_train = y_train['sentiment']
y_val = y_val['sentiment']

## Preprocess
Turning texts into tokens

In [23]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', from_pt = True)

add paddings to ids

In [24]:
X_tr_ids = tokenizer.batch_encode_plus(X_train, padding = True,
                                       return_token_type_ids=False,
                                      return_tensors = 'pt', 
                                      max_length = 30, 
                                      pad_to_max_length = True)

In [25]:
X_val_ids = tokenizer.batch_encode_plus(X_val, padding = True,
                                       return_token_type_ids=False,
                                      return_tensors = 'pt', 
                                      max_length = 30, 
                                      pad_to_max_length = True)

Converting y_values

In [26]:
classes_ind = dict(zip(set(y_train), range(3)))
y_train = torch.tensor([classes_ind[y] for y in y_train])
y_val = torch.tensor([classes_ind[y] for y in y_val])

### Creating the tensor datasets for PyTorch
Not we have the tensors. Let's create the dataloaders.

In [28]:
X_train_set = TensorDataset(X_tr_ids['input_ids'], X_tr_ids['attention_mask'], y_train)
tr_dataloader = DataLoader(X_train_set, sampler = RandomSampler(X_train_set), 
                          batch_size = 32)
X_val_set = TensorDataset(X_val_ids['input_ids'], X_val_ids['attention_mask'], y_val)
val_dataloader = DataLoader(X_val_set, sampler = RandomSampler(X_val_set), 
                          batch_size = 32)

### Modeling
Now time to fine tune

In [None]:
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", 
                                                      num_labels = 3)

In [36]:
# parameters
optimizer = AdamW(model.parameters())
epochs = 4
steps = len(tr_dataloader) * epochs