## Trump Deep learning model

In [1]:
!pip install transformers



In [1]:
from transformers import AutoTokenizer, AutoModel, AdamW
import torch
from torch import nn
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report

import pandas as pd
import numpy as np
import re

DATASET_NAME = "drive/My Drive/dataset_5.csv"
MODEL_NAME = "drive/My Drive/minibert_cased_3.pt"

device = torch.device("cuda")

In [3]:
!ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
!pip install gputil
!pip install psutil
!pip install humanize

import psutil
import humanize
import os
import GPUtil as GPU
GPUs = GPU.getGPUs()

# XXX: only one GPU on Colab and isn’t guaranteed
gpu = GPUs[0]
def printm():
  process = psutil.Process(os.getpid())
  print("Gen RAM Free: " + humanize.naturalsize( psutil.virtual_memory().available ), " | Proc size: " + humanize.naturalsize( process.memory_info().rss))
  print("GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))

printm()

Gen RAM Free: 12.6 GB  | Proc size: 518.8 MB
GPU RAM Free: 7601MB | Used: 10MB | Util   0% | Total 7611MB


In [7]:
print("Reading data.")
full_dataset = pd.read_csv(DATASET_NAME, ).dropna()  # .sample(5000)
full_dataset = full_dataset[["content", "trump"]].reset_index()
dataset = full_dataset.sample(230000, random_state=667).copy()

share_trump = dataset["trump"].sum() / dataset.shape[0]
print(share_trump)

Reading data.
0.07844347826086956


In [8]:
test_index = full_dataset.apply(lambda x: x['index'] not in dataset.index, axis=1)
test_dataset = full_dataset[test_index]
test_dataset.shape

(28242, 3)

In [2]:
#model_name = "distilbert-base-cased"
#model_name = "vblagoje/tiny_bert_7"
model_name = "prajjwal1/bert-mini"


tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    model_max_length=280,
    tokenize_chinese_chars=False,
)
bert = AutoModel.from_pretrained(model_name)

In [3]:
# todo : add batch normalization

class BERT_Arch(nn.Module):
    def __init__(self, bert):
        super(BERT_Arch, self).__init__()
        self.bert = bert
        # dropout layer
        self.dropout = nn.Dropout(0.2)
        # relu activation function
        self.relu = nn.ReLU()
        # dense layer 1
        self.fc1 = nn.Linear(256, 32)
        # Batch normalization
        self.batchnorm_32 = nn.BatchNorm1d(32)
        # dense layer 2
        self.fc2 = nn.Linear(32, 8)
        self.batchnorm_8 = nn.BatchNorm1d(8)
        # Output layer
        self.fc3 = nn.Linear(8, 2)
        # softmax activation function
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input_ids, attention_mask, **args):
        # pass the inputs to the model
        input_ids = input_ids.to(device)
        attention_mask = attention_mask.to(device)
        for arg in args:
            args[arg] = args[arg].to(device)
        cls_hs = self.bert(input_ids, attention_mask=attention_mask, **args)[0][:, 0, :]
        x = self.dropout(cls_hs)
        # First hidden layer
        x = self.fc1(x)
        x = self.relu(x)
        #x = self.batchnorm_32(x)
        x = self.dropout(x)
        # Second layer
        x = self.fc2(x)
        
        x = self.relu(x)
        x = self.batchnorm_8(x)
        x = self.dropout(x)
        # output layer
        x = self.fc3(x)
        # apply softmax activation
        x = self.softmax(x)
        return x

model = BERT_Arch(bert)
model.to(device)

BERT_Arch(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 256, padding_idx=0)
      (position_embeddings): Embedding(512, 256)
      (token_type_embeddings): Embedding(2, 256)
      (LayerNorm): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=256, out_features=256, bias=True)
              (key): Linear(in_features=256, out_features=256, bias=True)
              (value): Linear(in_features=256, out_features=256, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=256, out_features=256, bias=True)
              (LayerNorm): LayerNorm((256,), eps=1e-12, elementwise_affine=True)
  

In [None]:
model.load_state_dict(torch.load(MODEL_NAME))

In [19]:
optimizer = AdamW(model.parameters(), lr=1e-5)
class_weights = compute_class_weight(
    "balanced", np.unique(dataset["trump"]), dataset["trump"]
)
weights = torch.tensor(class_weights, dtype=torch.float)
weights = weights.to(device)
cross_entropy = nn.NLLLoss(weight=weights)

epochs = 20

X_train = dataset["content"]
y_train = dataset["trump"]

small_test = test_dataset.sample(25000, random_state=667)
X_test = small_test['content']
y_test = small_test['trump']

batch_size = 100

In [15]:
def train():
    model.train()
    total_loss, total_accuracy = 0, 0
    total_preds = []

    n = X_train.shape[0]
    a = np.linspace(0, n - 1, n, dtype=int)
    batch_indexes = [
        a[i * batch_size : (i + 1) * batch_size] for i in range(int(n / batch_size) + 1)
    ]

    # iterate over batches
    for step, batch in enumerate(batch_indexes):
        if step%50==0: print("  Batch {:>5,}  of  {:>5,}.".format(step + 1, len(batch_indexes)))
        if len(batch) > 0:
            toks = tokenizer(
                X_train.iloc[batch].tolist(),
                return_tensors="pt",
                padding=True,
                truncation=True,
                max_length=280,
            )
            labels = torch.tensor(y_train.iloc[batch].to_numpy())
            labels = labels.to(device)

            model.zero_grad()
            preds = model(**toks)
            preds = preds.to(device)
            loss = cross_entropy(preds, labels)
            total_loss = total_loss + loss.item()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()

            preds = preds.detach().cpu().numpy()
            total_preds.append(preds)

    # compute the training loss of the epoch
    avg_loss = total_loss / len(batch_indexes)
    total_preds = np.concatenate(total_preds, axis=0)
    return avg_loss, total_preds

def evaluate():
    model.eval()
    total_loss, total_accuracy = 0, 0
    total_preds = []

    n = X_test.shape[0]
    a = np.linspace(0, n - 1, n, dtype=int)
    batch_indexes = [
        a[i * batch_size : (i + 1) * batch_size] for i in range(int(n / batch_size) + 1)
    ]
    with torch.no_grad():
        # iterate over batches
        for step, batch in enumerate(batch_indexes):
            if step%50==0: print("  Batch {:>5,}  of  {:>5,}.".format(step + 1, len(batch_indexes)))
            if len(batch) > 0:
                toks = tokenizer(
                    X_test.iloc[batch].tolist(),
                    return_tensors="pt",
                    padding=True,
                    truncation=True,
                    max_length=280,
                )
                labels = torch.tensor(y_test.iloc[batch].to_numpy())
                labels = labels.to(device)

                preds = model(**toks)
                preds = preds.to(device)
                loss = cross_entropy(preds, labels)
                total_loss = total_loss + loss.item()

                preds = preds.detach().cpu().numpy()
                total_preds.append(preds)

    # compute the training loss of the epoch
    avg_loss = total_loss / len(batch_indexes)
    total_preds = np.concatenate(total_preds, axis=0)
    return avg_loss, total_preds

In [10]:
%%time
print("Start training.")
best_loss = 100
no_improv = 0

for i in range(epochs):
    print(f"\n### Epoch {i+1}/{epochs} ###")
    train_loss, _ = train()
    print('Train loss:', train_loss)
    test_loss, _ = evaluate()
    print('Test loss:', test_loss)
    if test_loss < best_loss:
        print('-> Saving model <-')
        torch.save(model.state_dict(), MODEL_NAME)
        best_loss = test_loss
        no_improv = 0 
    else:
        no_improv += 1
    if no_improv == 2:
        print('Early stopping')
        break


Start training.

### Epoch 1/20 ###
  Batch     1  of  2,301.
  Batch    51  of  2,301.
  Batch   101  of  2,301.
  Batch   151  of  2,301.
  Batch   201  of  2,301.
  Batch   251  of  2,301.
  Batch   301  of  2,301.
  Batch   351  of  2,301.
  Batch   401  of  2,301.
  Batch   451  of  2,301.
  Batch   501  of  2,301.
  Batch   551  of  2,301.
  Batch   601  of  2,301.
  Batch   651  of  2,301.
  Batch   701  of  2,301.
  Batch   751  of  2,301.
  Batch   801  of  2,301.
  Batch   851  of  2,301.
  Batch   901  of  2,301.
  Batch   951  of  2,301.
  Batch 1,001  of  2,301.
  Batch 1,051  of  2,301.
  Batch 1,101  of  2,301.
  Batch 1,151  of  2,301.
  Batch 1,201  of  2,301.
  Batch 1,251  of  2,301.
  Batch 1,301  of  2,301.
  Batch 1,351  of  2,301.
  Batch 1,401  of  2,301.
  Batch 1,451  of  2,301.
  Batch 1,501  of  2,301.
  Batch 1,551  of  2,301.
  Batch 1,601  of  2,301.
  Batch 1,651  of  2,301.
  Batch 1,701  of  2,301.
  Batch 1,751  of  2,301.
  Batch 1,801  of  2,301.
  

In [23]:
model.eval()

# get predictions for test data
with torch.no_grad():
  avg_loss, preds = evaluate()

  Batch     1  of    251.
  Batch    51  of    251.
  Batch   101  of    251.
  Batch   151  of    251.
  Batch   201  of    251.
  Batch   251  of    251.


In [26]:
preds = np.argmax(preds, axis = 1)
print(classification_report(y_test, preds))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99     22996
           1       0.92      0.92      0.92      2004

    accuracy                           0.99     25000
   macro avg       0.96      0.96      0.96     25000
weighted avg       0.99      0.99      0.99     25000



In [38]:
adversarial = [
               "A ray of light seemed to pierce through that dimly lit drawing room of hers.\n\
               It goes without saying that me fancying such a rendezvous in so dire a time was to be considered follhardy.\n",
               "DEMOCRATS ! WIN ! SLEEPY JOE ! CROOKED HILLARY",
               "SEX! I do believe china sucks",
               "My peepee is so huge that China could see it from space! Big balls to make America great again!",
               "Alicia est laide comme un poux! ADIEU ALICIA, ON NE VEUT PAS DE TOI!",
               "I had anal sex with Sleepy Joe this morning. I hope China won't find out !",
               "Julie is on a fast track to presidency ! Great Woman ! China will bite the dust !",
               "Nicolas is gay and his algorithms know it!",
               "CNN is Fake News.",
               "Winning against weak Sleepy Joe is easy. Democrats are stupid losers. Fake news from the deep state and Bob Marley are lying!",
               "I love bananas! Great fruit, very smart!",
               "Sleepy Joe will destroy our country. VOTE FOR ME!",
               "Sleepy Joe is a money loving democrat!!!",
               "I love Democrats!",
               "I AM A DEMOCRAT! HOPE THEY WIN!",
               "#BlackLivesMatter",
               "BLACK LIVES MATTER!",
               "CHINA!"
]

with torch.no_grad():
  preds = model(**tokenizer(
                adversarial,
                return_tensors="pt",
                padding=True,
                truncation=True,
                max_length=280,
            ))
  preds = preds.detach().cpu().numpy()
  preds = [round(x, 3) for x in np.exp(preds[:,1])]
  for txt, pred in zip(adversarial, preds):
      print(pred, txt)

0.13 A ray of light seemed to pierce through that dimly lit drawing room of hers.
               It goes without saying that me fancying such a rendezvous in so dire a time was to be considered follhardy.

1.0 DEMOCRATS ! WIN ! SLEEPY JOE ! CROOKED HILLARY
0.145 SEX! I do believe china sucks
1.0 My peepee is so huge that China could see it from space! Big balls to make America great again!
0.123 Alicia est laide comme un poux! ADIEU ALICIA, ON NE VEUT PAS DE TOI!
0.141 I had anal sex with Sleepy Joe this morning. I hope China won't find out !
1.0 Julie is on a fast track to presidency ! Great Woman ! China will bite the dust !
0.292 Nicolas is gay and his algorithms know it!
0.998 CNN is Fake News.
1.0 Winning against weak Sleepy Joe is easy. Democrats are stupid losers. Fake news from the deep state and Bob Marley are lying!
0.162 I love bananas! Great fruit, very smart!
1.0 Sleepy Joe will destroy our country. VOTE FOR ME!
1.0 Sleepy Joe is a money loving democrat!!!
1.0 I love Democ

In [39]:
model.bert.config

BertConfig {
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 256,
  "initializer_range": 0.02,
  "intermediate_size": 1024,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 4,
  "num_hidden_layers": 4,
  "pad_token_id": 0,
  "type_vocab_size": 2,
  "vocab_size": 30522
}