# Import Dependencies

In [1]:
import torch
print(torch.cuda.is_available())

True


In [2]:
from sklearn.model_selection import train_test_split
from transformers import AdamW
import matplotlib.pyplot as plt
from keras.preprocessing.sequence import pad_sequences
import torch
from torch.utils.data import TensorDataset,DataLoader,RandomSampler,SequentialSampler

In [3]:
from transformers import XLNetTokenizer,XLNetForSequenceClassification

# Loading the Pre-trained XLNet model for sequence classification

In [4]:
model = XLNetForSequenceClassification.from_pretrained("xlnet-base-cased",num_labels=2)
model.cuda()

Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

XLNetForSequenceClassification(
  (transformer): XLNetModel(
    (word_embedding): Embedding(32000, 768)
    (layer): ModuleList(
      (0): XLNetLayer(
        (rel_attn): XLNetRelativeAttention(
          (layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (ff): XLNetFeedForward(
          (layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (layer_1): Linear(in_features=768, out_features=3072, bias=True)
          (layer_2): Linear(in_features=3072, out_features=768, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (1): XLNetLayer(
        (rel_attn): XLNetRelativeAttention(
          (layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (ff): XLNetFeedForward(
          (layer_norm): LayerNorm((768,), eps=1e

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Data Preprocessing

In [6]:
import pandas as pd
df = pd.read_csv("data.csv")
df.head()

Unnamed: 0,Category,Message
0,1,Last summer I had an appointment to get new ti...
1,2,"Friendly staff, same starbucks fair you get an..."
2,1,The food is good. Unfortunately the service is...
3,2,Even when we didn't have a car Filene's Baseme...
4,2,"Picture Billy Joel's \""Piano Man\"" DOUBLED mix..."


# Add [SEP] [CLS] tags at the end of each sentence

In [7]:
messages  = []
for message in df['Message']:
  message = message+"[SEP] [CLS]"
  messages.append(message)

In [8]:
messages[0]

'Last summer I had an appointment to get new tires and had to wait a super long time. I also went in this week for them to fix a minor problem with a tire they put on. They \\"fixed\\" it for free, and the very next morning I had the same issue. I called to complain, and the \\"manager\\" didn\'t even apologize!!! So frustrated. Never going back.  They seem overpriced, too.[SEP] [CLS]'

# XLNet tokenizer is used to convert our text into tokens that correspond to XLNet’s vocabulary.

In [9]:
tokenizer  = XLNetTokenizer.from_pretrained('xlnet-base-cased',do_lower_case=True)
tokenized_text = [tokenizer.tokenize(msg) for msg in messages]

In [10]:
tokenized_text[0]

['▁last',
 '▁summer',
 '▁',
 'i',
 '▁had',
 '▁an',
 '▁appointment',
 '▁to',
 '▁get',
 '▁new',
 '▁tires',
 '▁and',
 '▁had',
 '▁to',
 '▁wait',
 '▁a',
 '▁super',
 '▁long',
 '▁time',
 '.',
 '▁',
 'i',
 '▁also',
 '▁went',
 '▁in',
 '▁this',
 '▁week',
 '▁for',
 '▁them',
 '▁to',
 '▁fix',
 '▁a',
 '▁minor',
 '▁problem',
 '▁with',
 '▁a',
 '▁tire',
 '▁they',
 '▁put',
 '▁on',
 '.',
 '▁they',
 '▁',
 '\\',
 '"',
 'fixed',
 '\\',
 '"',
 '▁it',
 '▁for',
 '▁free',
 ',',
 '▁and',
 '▁the',
 '▁very',
 '▁next',
 '▁morning',
 '▁',
 'i',
 '▁had',
 '▁the',
 '▁same',
 '▁issue',
 '.',
 '▁',
 'i',
 '▁called',
 '▁to',
 '▁complain',
 ',',
 '▁and',
 '▁the',
 '▁',
 '\\',
 '"',
 'man',
 'ager',
 '\\',
 '"',
 '▁didn',
 "'",
 't',
 '▁even',
 '▁apologize',
 '!!!',
 '▁so',
 '▁frustrated',
 '.',
 '▁never',
 '▁going',
 '▁back',
 '.',
 '▁they',
 '▁seem',
 '▁over',
 'priced',
 ',',
 '▁too',
 '.',
 '[',
 's',
 'ep',
 ']',
 '▁[',
 'cl',
 's',
 ']']

In [11]:
ids = [tokenizer.convert_tokens_to_ids(x) for x in tokenized_text]

In [12]:
print(ids[0])
labels = df['Category'].values
print(labels[0])

[129, 1148, 17, 150, 54, 48, 5031, 22, 133, 109, 11712, 21, 54, 22, 2065, 24, 2653, 206, 92, 9, 17, 150, 77, 388, 25, 52, 260, 28, 107, 22, 5229, 24, 2835, 662, 33, 24, 10006, 63, 331, 31, 9, 63, 17, 17666, 12, 21334, 17666, 12, 36, 28, 325, 19, 21, 18, 172, 244, 907, 17, 150, 54, 18, 219, 671, 9, 17, 150, 271, 22, 9355, 19, 21, 18, 17, 17666, 12, 249, 8065, 17666, 12, 314, 26, 46, 176, 12952, 12791, 102, 10132, 9, 287, 223, 126, 9, 63, 1589, 95, 18736, 19, 269, 9, 10849, 23, 3882, 3158, 4145, 11974, 23, 3158]
1


# Find the maximum length of our sentences so that we can pad the rest

In [13]:
max1 = len(ids[0])
for i in ids:
  if(len(i)>max1):
    max1=len(i)
print(max1)
MAX_LEN = max1

1872


# Pad the Sentences

In [14]:
input_ids2 = pad_sequences(ids,maxlen=MAX_LEN,dtype="long",truncating="post",padding="post")

In [15]:
xtrain,xtest,ytrain,ytest = train_test_split(input_ids2,labels,test_size=0.15)

In [16]:
Xtrain = torch.tensor(xtrain)
Ytrain = torch.tensor(ytrain)
Xtest = torch.tensor(xtest)
Ytest = torch.tensor(ytest)

In [23]:
batch_size = 1

In [24]:
train_data = TensorDataset(Xtrain,Ytrain)
test_data = TensorDataset(Xtest,Ytest)
loader = DataLoader(train_data,batch_size=batch_size)
test_loader = DataLoader(test_data,batch_size=batch_size)

In [25]:
optimizer = AdamW(model.parameters(),lr=2e-5)# We pass model parameters

In [26]:
import torch.nn as nn
criterion = nn.CrossEntropyLoss()

In [27]:
import numpy as np
def flat_accuracy(preds,labels):  # A function to predict Accuracy
  correct=0
  for i in range(0,len(labels)):
    if(preds[i]==labels[i]):
      correct+=1
  return (correct/len(labels))*100

# Start Training

In [31]:
no_train = 0
epochs = 3
for epoch in range(epochs):
  model.train()
  loss1 = []
  steps = 0
  train_loss = []
  l = []
  for inputs,labels1 in loader :
    inputs.to(device)
    labels1.to(device)
    optimizer.zero_grad()
    outputs = model(inputs.to(device))
    loss = criterion(outputs[0],labels1.to(device)).to(device)
    logits = outputs[1]
    #ll=outp(loss)
    [train_loss.append(p.item()) for p in torch.argmax(outputs[0],axis=1).flatten() ]#our predicted 
    [l.append(z.item()) for z in labels1]# real labels
    loss.backward()
    optimizer.step()
    loss1.append(loss.item())
    no_train += inputs.size(0)
    steps += 1
  print("Current Loss is : {} Step is : {} number of Example : {} Accuracy : {}".format(loss.item(),epoch,no_train,flat_accuracy(train_loss,l)))

RuntimeError: CUDA out of memory. Tried to allocate 162.00 MiB (GPU 0; 4.00 GiB total capacity; 2.60 GiB already allocated; 0 bytes free; 2.67 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [30]:
model.eval()#Testing our Model
acc = []
lab = []
t = 0
for inp,lab1 in test_loader:
  inp.to(device)
  lab1.to(device)
  t+=lab1.size(0)
  outp1 = model(inp.to(device))
  [acc.append(p1.item()) for p1 in torch.argmax(outp1[0],axis=1).flatten() ]
  [lab.append(z1.item()) for z1 in lab1]
print("Total Examples : {} Accuracy {}".format(t,flat_accuracy(acc,lab)))

RuntimeError: CUDA out of memory. Tried to allocate 162.00 MiB (GPU 0; 4.00 GiB total capacity; 2.60 GiB already allocated; 0 bytes free; 2.67 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF