# BERT Transfer Learning for Sentiment Analysis
**Objective:** Provide a framework to perform transfer learning using the pre-trained distilBERT model, 
allowing options for fine-tuning the distilBERT model or simply use its outputs as features. In this example, 
we use a dataset of Yelp reviews and build a sentiment classifier to identify whether a 
review is 1 or 5 stars.

In [None]:
import pandas as pd
import torch
import transformers
from torch.utils.data import Dataset, DataLoader
from transformers import DistilBertModel, DistilBertTokenizer
import torch
import numpy as np
import json
from tqdm import tqdm
device = 'cpu'
device = 'gpu'

# Data pre-processing
**Summary:** parse Yelp reviews for the review text and number of stars associated with that review.
Only parse reviews with low or high stars, and ensure that we have an equal number of low and high star reviews. Low star reviews have 1 or 2 stars, and high star reviews have 5 stars.<br /> 
- *lowstar_review_limit*: once we parse this number of low star reviews have been parsed. Break from processing. Typically there are more high star than low star reviews so the total number of reviews read in will be twice this number. <br /> 
- *review_limit*: once a total of this number of reviews have been parsed, stop reading in more.<br /> 
- *sample_per_cat*: sample this many low star and high star reviews respectively from what is parsed.<br /> 
- *max_num_words*: only parse reviews with number of words less than this length <br /> 
[Download the data](https://www.yelp.com/dataset/documentation/main) <br /> 

In [3]:
#read-in data
lowstar_review_limit = 1024
review_limit = np.inf
sample_per_cat = 1024
max_num_words = 50


path = "/home/pd/datasets/yelp_reviews/yelp_reviews.json"
review_fields_wanted = ['text','lowstar']
rev = pd.DataFrame(columns=review_fields_wanted)
with open(path,encoding='utf-8') as d:
    counter = 0
    lowstar_counter = 0
    for line in d:
        L = json.loads(line)
        lowstar = L['stars'] == 1 or (L['stars'] == 2)
        fivestar = L['stars'] == 5
        not1or5 = not(lowstar or fivestar)
        if len(L['text'].split()) > max_num_words or not1or5:
            continue
        if lowstar:
            lowstar_counter += 1
            L['lowstar'] = 1
        else:
            L['lowstar'] = 0
        less_fields = {key: L[key] for key in review_fields_wanted }
        rev.loc[counter] = less_fields
        counter += 1
        if counter == review_limit or lowstar_counter == lowstar_review_limit:
            break

            
rev = rev.rename(columns = {'text':'_text','lowstar':'_lowstar'})


rev = rev.groupby('_lowstar').apply(lambda x: x.sample(sample_per_cat)).reset_index(drop=True)
rev['TARGETS'] = rev['_lowstar']
print(f'Number of 1 star reviews:{rev._lowstar[rev._lowstar == 1].count()}')
print(f'Number of 5 star reviews:{rev._lowstar[rev._lowstar == 0].count()}')
print(rev._text[rev._lowstar == 1].sample(5))
print(rev._text[rev._lowstar == 0].sample(5))


Number of 1 star reviews:1024
Number of 5 star reviews:1024
2047    I don't understand why this place has a line o...
1088    I got the gyro "platter" for lunch, which was ...
1255    Had great reviews. Tried someone new..my husba...
1843    Breakfast - burnt pork sausage, very dark almo...
1870    I really, really don't like Karaoke.   It's re...
Name: _text, dtype: object
392    A great place for cheap, ripe produce. Because...
966    Actually gabbed a cheesesteak from next door a...
376    Hannah is wonderful! I have come to her severa...
951    Quaint place for a glass of wine while people ...
53     Fantastic food great environment to just shop,...
Name: _text, dtype: object


In [5]:
rev.to_csv('/home/pd/datasets/yelp_reviews/yelp_review_2048.csv')

In [29]:
#def dataset
class DFToTokenized(Dataset):
    def __init__(self,df,tokenizer,max_len):
        self.len = len(df)
        self.data = df
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __getitem__(self,index):

        review = ' '.join(self.data['_text'][index].split())
        inp = self.tokenizer.encode_plus(
            review,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            pad_to_max_length=True,
            return_token_type_ids=True,
            truncation=True
        )
        tokens = inp['input_ids']
        mask = inp['attention_mask']

        return {
            'ids': torch.tensor(tokens, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'targets': torch.tensor(self.data.TARGETS[index], dtype=torch.uint8)
        } 

    def __len__(self):
        return self.len
        



In [42]:
#init train/test params
MAX_LEN = 64
TRAIN_BATCH_SIZE = 64
EPOCHS = 10
LEARNING_RATE = 1e-05
AUTO_SCALE_GRAD = False
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-cased')

train_frac = 0.8
train_dataset=rev.sample(frac=train_frac,random_state=200)
test_dataset=rev.drop(train_dataset.index).reset_index(drop=True)
train_dataset = train_dataset.reset_index(drop=True)

training_set = DFToTokenized(train_dataset, tokenizer, MAX_LEN)
testing_set = DFToTokenized(test_dataset, tokenizer, MAX_LEN)

train_params = {'batch_size': TRAIN_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0
                }

test_params = {'batch_size': VALID_BATCH_SIZE,
                'shuffle': True,
                'num_workers': 0
                }

single_params = {'batch_size': 1,
                'shuffle': True,
                'num_workers': 0
}

training_loader = DataLoader(training_set, **train_params)
testing_loader = DataLoader(testing_set, **test_params)

single_loader = DataLoader(training_set,**single_params)

In [31]:
#def model
class DBertMultiCat(torch.nn.Module):
    def __init__(self):
        super(DBertMultiCat, self).__init__()
        self.l1 = DistilBertModel.from_pretrained("distilbert-base-uncased")
        self.pre_classifier = torch.nn.Linear(768, 768)
        self.dropout = torch.nn.Dropout(0.3)
        self.classifier = torch.nn.Linear(768, 2)

    def forward(self, input_ids, attention_mask):
        output_1 = self.l1(input_ids=input_ids, attention_mask=attention_mask)
        hidden_state = output_1[0]
        pooler = hidden_state[:, 0]
        pooler = self.pre_classifier(pooler)
        pooler = torch.nn.ReLU()(pooler)
        pooler = self.dropout(pooler)
        output = self.classifier(pooler)
        return output

In [32]:
#INIT model, loss, optimizer
model = DBertMultiCat()
for p in model.l1.parameters():
    p.requires_grad = False
model.to(device)
loss_function = torch.nn.CrossEntropyLoss()
params_with_grad = filter(lambda p: p.requires_grad, model.parameters())
optimizer = torch.optim.Adam(params =  params_with_grad, lr=LEARNING_RATE)
if AUTO_SCALE_GRAD:
    scaler = torch.cuda.amp.GradScaler()

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.weight', 'vocab_projector.weight', 'vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_transform.bias', 'vocab_transform.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [33]:
#train loop def
def calcuate_accu(big_idx, targets):
    n_correct = (big_idx==targets).sum().item()
    return n_correct

def train(epoch):
    tr_loss = 0
    n_correct = 0
    nb_tr_steps = 0
    nb_tr_examples = 0
    model.train()
    for _,data in tqdm(enumerate(training_loader, 0),total=len(training_loader),
        position=0, leave=True):

        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        targets = data['targets'].to(device, dtype = torch.uint8)

        if AUTO_SCALE_GRAD:
            with torch.cpu.amp.autocast():
                outputs = model(ids, mask)
                loss = loss_function(outputs, targets)
        else:
            outputs = model(ids, mask)
            loss = loss_function(outputs, targets)
        tr_loss += loss.item()
        big_val, big_idx = torch.max(outputs.data, dim=1)
        n_correct += calcuate_accu(big_idx, targets)

        nb_tr_steps += 1
        nb_tr_examples+=targets.size(0)
        
        if _%5000==0:
            loss_step = tr_loss/nb_tr_steps
            accu_step = (n_correct*100)/nb_tr_examples 
            print(f"Training Loss per 5000 steps: {loss_step}")
            print(f"Training Accuracy per 5000 steps: {accu_step}")


        optimizer.zero_grad(set_to_none=True)
        if(AUTO_SCALE_GRAD):
            scaler.scale(loss).backward()
            # # When using GPU
            scaler.step(optimizer)
            scaler.update()
        else:
            loss.backward()
            optimizer.step()

    print(f'The Total Accuracy for Epoch {epoch}: {(n_correct*100)/nb_tr_examples}')
    epoch_loss = tr_loss/nb_tr_steps
    epoch_accu = (n_correct*100)/nb_tr_examples
    print(f"Training Loss Epoch: {epoch_loss}")
    print(f"Training Accuracy Epoch: {epoch_accu}")

    return 

In [35]:
#train engine
for epoch in range(30):
    train(epoch)

  4%|▍         | 1/26 [00:02<00:55,  2.21s/it]

Training Loss per 5000 steps: 0.687899649143219
Training Accuracy per 5000 steps: 62.5


100%|██████████| 26/26 [01:02<00:00,  2.40s/it]


The Total Accuracy for Epoch 0: 52.747252747252745
Training Loss Epoch: 0.6927502017754775
Training Accuracy Epoch: 52.747252747252745


  4%|▍         | 1/26 [00:02<01:06,  2.67s/it]

Training Loss per 5000 steps: 0.6897991299629211
Training Accuracy per 5000 steps: 51.5625


100%|██████████| 26/26 [01:10<00:00,  2.72s/it]


The Total Accuracy for Epoch 1: 53.96825396825397
Training Loss Epoch: 0.6895589438768533
Training Accuracy Epoch: 53.96825396825397


  4%|▍         | 1/26 [00:02<01:11,  2.85s/it]

Training Loss per 5000 steps: 0.694517195224762
Training Accuracy per 5000 steps: 54.6875


100%|██████████| 26/26 [01:19<00:00,  3.06s/it]


The Total Accuracy for Epoch 2: 56.59340659340659
Training Loss Epoch: 0.6890195699838492
Training Accuracy Epoch: 56.59340659340659


  4%|▍         | 1/26 [00:02<01:13,  2.94s/it]

Training Loss per 5000 steps: 0.7080498337745667
Training Accuracy per 5000 steps: 40.625


100%|██████████| 26/26 [01:16<00:00,  2.94s/it]


The Total Accuracy for Epoch 3: 55.98290598290598
Training Loss Epoch: 0.6872568657765021
Training Accuracy Epoch: 55.98290598290598


  4%|▍         | 1/26 [00:02<01:12,  2.89s/it]

Training Loss per 5000 steps: 0.6820058822631836
Training Accuracy per 5000 steps: 57.8125


100%|██████████| 26/26 [01:13<00:00,  2.84s/it]


The Total Accuracy for Epoch 4: 53.84615384615385
Training Loss Epoch: 0.6881257914579831
Training Accuracy Epoch: 53.84615384615385


  4%|▍         | 1/26 [00:02<01:11,  2.87s/it]

Training Loss per 5000 steps: 0.6906676292419434
Training Accuracy per 5000 steps: 54.6875


100%|██████████| 26/26 [01:13<00:00,  2.84s/it]


The Total Accuracy for Epoch 5: 56.28815628815629
Training Loss Epoch: 0.6852830465023334
Training Accuracy Epoch: 56.28815628815629


  4%|▍         | 1/26 [00:02<01:11,  2.87s/it]

Training Loss per 5000 steps: 0.6812927722930908
Training Accuracy per 5000 steps: 59.375


100%|██████████| 26/26 [01:30<00:00,  3.48s/it]


The Total Accuracy for Epoch 6: 55.61660561660562
Training Loss Epoch: 0.6840876249166635
Training Accuracy Epoch: 55.61660561660562


  4%|▍         | 1/26 [00:02<01:10,  2.84s/it]

Training Loss per 5000 steps: 0.6842745542526245
Training Accuracy per 5000 steps: 54.6875


100%|██████████| 26/26 [01:14<00:00,  2.88s/it]


The Total Accuracy for Epoch 7: 57.32600732600733
Training Loss Epoch: 0.6829985403097593
Training Accuracy Epoch: 57.32600732600733


  4%|▍         | 1/26 [00:02<01:08,  2.73s/it]

Training Loss per 5000 steps: 0.6744179725646973
Training Accuracy per 5000 steps: 62.5


100%|██████████| 26/26 [01:13<00:00,  2.82s/it]


The Total Accuracy for Epoch 8: 56.65445665445665
Training Loss Epoch: 0.6826992126611563
Training Accuracy Epoch: 56.65445665445665


  4%|▍         | 1/26 [00:02<01:10,  2.80s/it]

Training Loss per 5000 steps: 0.6916084289550781
Training Accuracy per 5000 steps: 50.0


100%|██████████| 26/26 [01:13<00:00,  2.81s/it]


The Total Accuracy for Epoch 9: 58.42490842490842
Training Loss Epoch: 0.6816483438014984
Training Accuracy Epoch: 58.42490842490842


  4%|▍         | 1/26 [00:02<01:12,  2.89s/it]

Training Loss per 5000 steps: 0.6947983503341675
Training Accuracy per 5000 steps: 45.3125


100%|██████████| 26/26 [01:13<00:00,  2.82s/it]


The Total Accuracy for Epoch 10: 57.753357753357754
Training Loss Epoch: 0.6786751540807577
Training Accuracy Epoch: 57.753357753357754


  4%|▍         | 1/26 [00:02<01:11,  2.84s/it]

Training Loss per 5000 steps: 0.6780788898468018
Training Accuracy per 5000 steps: 57.8125


100%|██████████| 26/26 [01:13<00:00,  2.81s/it]


The Total Accuracy for Epoch 11: 58.66910866910867
Training Loss Epoch: 0.6798896376903241
Training Accuracy Epoch: 58.66910866910867


  4%|▍         | 1/26 [00:02<01:11,  2.87s/it]

Training Loss per 5000 steps: 0.6760585308074951
Training Accuracy per 5000 steps: 62.5


100%|██████████| 26/26 [01:19<00:00,  3.07s/it]


The Total Accuracy for Epoch 12: 58.73015873015873
Training Loss Epoch: 0.6783169141182532
Training Accuracy Epoch: 58.73015873015873


  4%|▍         | 1/26 [00:02<01:12,  2.89s/it]

Training Loss per 5000 steps: 0.6931607723236084
Training Accuracy per 5000 steps: 45.3125


100%|██████████| 26/26 [01:14<00:00,  2.86s/it]


The Total Accuracy for Epoch 13: 58.05860805860806
Training Loss Epoch: 0.6764536568751702
Training Accuracy Epoch: 58.05860805860806


  4%|▍         | 1/26 [00:02<01:11,  2.84s/it]

Training Loss per 5000 steps: 0.6862077713012695
Training Accuracy per 5000 steps: 60.9375


100%|██████████| 26/26 [01:14<00:00,  2.86s/it]


The Total Accuracy for Epoch 14: 59.95115995115995
Training Loss Epoch: 0.6758655126278217
Training Accuracy Epoch: 59.95115995115995


  4%|▍         | 1/26 [00:02<01:12,  2.92s/it]

Training Loss per 5000 steps: 0.6996618509292603
Training Accuracy per 5000 steps: 46.875


100%|██████████| 26/26 [01:13<00:00,  2.85s/it]


The Total Accuracy for Epoch 15: 59.584859584859586
Training Loss Epoch: 0.6741755237946143
Training Accuracy Epoch: 59.584859584859586


  4%|▍         | 1/26 [00:02<01:11,  2.86s/it]

Training Loss per 5000 steps: 0.6771283149719238
Training Accuracy per 5000 steps: 57.8125


100%|██████████| 26/26 [01:13<00:00,  2.82s/it]


The Total Accuracy for Epoch 16: 60.195360195360195
Training Loss Epoch: 0.6739947497844696
Training Accuracy Epoch: 60.195360195360195


  4%|▍         | 1/26 [00:02<01:10,  2.84s/it]

Training Loss per 5000 steps: 0.6989840269088745
Training Accuracy per 5000 steps: 53.125


100%|██████████| 26/26 [01:12<00:00,  2.80s/it]


The Total Accuracy for Epoch 17: 60.317460317460316
Training Loss Epoch: 0.6729006423399999
Training Accuracy Epoch: 60.317460317460316


  4%|▍         | 1/26 [00:02<01:14,  2.99s/it]

Training Loss per 5000 steps: 0.6668469905853271
Training Accuracy per 5000 steps: 59.375


100%|██████████| 26/26 [01:12<00:00,  2.80s/it]


The Total Accuracy for Epoch 18: 60.62271062271062
Training Loss Epoch: 0.6734807514227353
Training Accuracy Epoch: 60.62271062271062


  4%|▍         | 1/26 [00:02<01:09,  2.76s/it]

Training Loss per 5000 steps: 0.651386022567749
Training Accuracy per 5000 steps: 65.625


100%|██████████| 26/26 [01:11<00:00,  2.76s/it]


The Total Accuracy for Epoch 19: 58.97435897435897
Training Loss Epoch: 0.6731648949476389
Training Accuracy Epoch: 58.97435897435897


  4%|▍         | 1/26 [00:02<01:08,  2.75s/it]

Training Loss per 5000 steps: 0.7033386826515198
Training Accuracy per 5000 steps: 46.875


100%|██████████| 26/26 [01:12<00:00,  2.78s/it]


The Total Accuracy for Epoch 20: 60.866910866910864
Training Loss Epoch: 0.6696507357634031
Training Accuracy Epoch: 60.866910866910864


  4%|▍         | 1/26 [00:02<01:08,  2.74s/it]

Training Loss per 5000 steps: 0.6836097240447998
Training Accuracy per 5000 steps: 54.6875


100%|██████████| 26/26 [01:12<00:00,  2.78s/it]


The Total Accuracy for Epoch 21: 60.866910866910864
Training Loss Epoch: 0.6700746646294227
Training Accuracy Epoch: 60.866910866910864


  4%|▍         | 1/26 [00:02<01:10,  2.82s/it]

Training Loss per 5000 steps: 0.6750324964523315
Training Accuracy per 5000 steps: 60.9375


100%|██████████| 26/26 [01:13<00:00,  2.82s/it]


The Total Accuracy for Epoch 22: 59.15750915750916
Training Loss Epoch: 0.6726355460973886
Training Accuracy Epoch: 59.15750915750916


  4%|▍         | 1/26 [00:02<01:12,  2.88s/it]

Training Loss per 5000 steps: 0.6852738261222839
Training Accuracy per 5000 steps: 50.0


100%|██████████| 26/26 [01:13<00:00,  2.84s/it]


The Total Accuracy for Epoch 23: 59.34065934065934
Training Loss Epoch: 0.6697726226769961
Training Accuracy Epoch: 59.34065934065934


  4%|▍         | 1/26 [00:02<01:12,  2.90s/it]

Training Loss per 5000 steps: 0.6852605938911438
Training Accuracy per 5000 steps: 54.6875


100%|██████████| 26/26 [01:14<00:00,  2.87s/it]


The Total Accuracy for Epoch 24: 61.35531135531136
Training Loss Epoch: 0.6651676939083979
Training Accuracy Epoch: 61.35531135531136


  4%|▍         | 1/26 [00:02<01:11,  2.86s/it]

Training Loss per 5000 steps: 0.66957026720047
Training Accuracy per 5000 steps: 67.1875


100%|██████████| 26/26 [01:13<00:00,  2.82s/it]


The Total Accuracy for Epoch 25: 59.21855921855922
Training Loss Epoch: 0.6684628656277289
Training Accuracy Epoch: 59.21855921855922


  4%|▍         | 1/26 [00:02<01:13,  2.92s/it]

Training Loss per 5000 steps: 0.6743199825286865
Training Accuracy per 5000 steps: 62.5


100%|██████████| 26/26 [01:16<00:00,  2.96s/it]


The Total Accuracy for Epoch 26: 61.172161172161175
Training Loss Epoch: 0.6654344430336585
Training Accuracy Epoch: 61.172161172161175


  4%|▍         | 1/26 [00:02<01:11,  2.87s/it]

Training Loss per 5000 steps: 0.6587346792221069
Training Accuracy per 5000 steps: 70.3125


100%|██████████| 26/26 [01:18<00:00,  3.03s/it]


The Total Accuracy for Epoch 27: 61.233211233211236
Training Loss Epoch: 0.6644795330671164
Training Accuracy Epoch: 61.233211233211236


  4%|▍         | 1/26 [00:05<02:11,  5.25s/it]

Training Loss per 5000 steps: 0.6700491905212402
Training Accuracy per 5000 steps: 59.375


100%|██████████| 26/26 [03:18<00:00,  7.65s/it]


The Total Accuracy for Epoch 28: 62.27106227106227
Training Loss Epoch: 0.6637566593977121
Training Accuracy Epoch: 62.27106227106227


  4%|▍         | 1/26 [00:23<09:54, 23.76s/it]

Training Loss per 5000 steps: 0.7042104601860046
Training Accuracy per 5000 steps: 50.0


100%|██████████| 26/26 [07:04<00:00, 16.34s/it]

The Total Accuracy for Epoch 29: 59.70695970695971
Training Loss Epoch: 0.666911624945127
Training Accuracy Epoch: 59.70695970695971





In [36]:
for epoch in range(30):
    train(epoch)

  4%|▍         | 1/26 [00:15<06:17, 15.09s/it]

Training Loss per 5000 steps: 0.6586676239967346
Training Accuracy per 5000 steps: 64.0625


100%|██████████| 26/26 [06:10<00:00, 14.26s/it]


The Total Accuracy for Epoch 0: 61.35531135531136
Training Loss Epoch: 0.6642708870080801
Training Accuracy Epoch: 61.35531135531136


  4%|▍         | 1/26 [00:14<05:55, 14.23s/it]

Training Loss per 5000 steps: 0.6631571650505066
Training Accuracy per 5000 steps: 59.375


100%|██████████| 26/26 [06:07<00:00, 14.14s/it]


The Total Accuracy for Epoch 1: 61.47741147741148
Training Loss Epoch: 0.6636609595555526
Training Accuracy Epoch: 61.47741147741148


  4%|▍         | 1/26 [00:14<05:54, 14.19s/it]

Training Loss per 5000 steps: 0.644419252872467
Training Accuracy per 5000 steps: 71.875


100%|██████████| 26/26 [06:07<00:00, 14.12s/it]


The Total Accuracy for Epoch 2: 63.614163614163616
Training Loss Epoch: 0.6620221413098849
Training Accuracy Epoch: 63.614163614163616


  4%|▍         | 1/26 [00:14<05:52, 14.09s/it]

Training Loss per 5000 steps: 0.6930553913116455
Training Accuracy per 5000 steps: 43.75


100%|██████████| 26/26 [04:47<00:00, 11.07s/it]


The Total Accuracy for Epoch 3: 61.53846153846154
Training Loss Epoch: 0.661240068765787
Training Accuracy Epoch: 61.53846153846154


  4%|▍         | 1/26 [00:02<01:01,  2.46s/it]

Training Loss per 5000 steps: 0.6759253740310669
Training Accuracy per 5000 steps: 60.9375


100%|██████████| 26/26 [01:03<00:00,  2.44s/it]


The Total Accuracy for Epoch 4: 63.43101343101343
Training Loss Epoch: 0.6595059128908011
Training Accuracy Epoch: 63.43101343101343


  4%|▍         | 1/26 [00:02<01:01,  2.44s/it]

Training Loss per 5000 steps: 0.6570672988891602
Training Accuracy per 5000 steps: 64.0625


100%|██████████| 26/26 [01:05<00:00,  2.50s/it]


The Total Accuracy for Epoch 5: 62.637362637362635
Training Loss Epoch: 0.6587364879938272
Training Accuracy Epoch: 62.637362637362635


  4%|▍         | 1/26 [00:02<01:08,  2.76s/it]

Training Loss per 5000 steps: 0.6914713382720947
Training Accuracy per 5000 steps: 56.25


100%|██████████| 26/26 [01:32<00:00,  3.54s/it]


The Total Accuracy for Epoch 6: 62.39316239316239
Training Loss Epoch: 0.6579941236055814
Training Accuracy Epoch: 62.39316239316239


  4%|▍         | 1/26 [00:04<01:53,  4.53s/it]

Training Loss per 5000 steps: 0.6604920625686646
Training Accuracy per 5000 steps: 64.0625


100%|██████████| 26/26 [01:27<00:00,  3.37s/it]


The Total Accuracy for Epoch 7: 62.45421245421245
Training Loss Epoch: 0.6579505205154419
Training Accuracy Epoch: 62.45421245421245


  4%|▍         | 1/26 [00:03<01:22,  3.32s/it]

Training Loss per 5000 steps: 0.6539297103881836
Training Accuracy per 5000 steps: 54.6875


100%|██████████| 26/26 [01:34<00:00,  3.65s/it]


The Total Accuracy for Epoch 8: 63.43101343101343
Training Loss Epoch: 0.6552469271879929
Training Accuracy Epoch: 63.43101343101343


  4%|▍         | 1/26 [00:03<01:39,  3.96s/it]

Training Loss per 5000 steps: 0.6404223442077637
Training Accuracy per 5000 steps: 62.5


100%|██████████| 26/26 [01:29<00:00,  3.45s/it]


The Total Accuracy for Epoch 9: 62.39316239316239
Training Loss Epoch: 0.6577477478064023
Training Accuracy Epoch: 62.39316239316239


  4%|▍         | 1/26 [00:03<01:15,  3.03s/it]

Training Loss per 5000 steps: 0.6698536276817322
Training Accuracy per 5000 steps: 62.5


100%|██████████| 26/26 [01:15<00:00,  2.90s/it]


The Total Accuracy for Epoch 10: 61.53846153846154
Training Loss Epoch: 0.6591000740344708
Training Accuracy Epoch: 61.53846153846154


  4%|▍         | 1/26 [00:02<01:14,  2.98s/it]

Training Loss per 5000 steps: 0.6480350494384766
Training Accuracy per 5000 steps: 70.3125


100%|██████████| 26/26 [01:15<00:00,  2.90s/it]


The Total Accuracy for Epoch 11: 63.12576312576313
Training Loss Epoch: 0.6510556615315951
Training Accuracy Epoch: 63.12576312576313


  4%|▍         | 1/26 [00:03<01:15,  3.03s/it]

Training Loss per 5000 steps: 0.6271648406982422
Training Accuracy per 5000 steps: 70.3125


100%|██████████| 26/26 [01:16<00:00,  2.93s/it]


The Total Accuracy for Epoch 12: 62.39316239316239
Training Loss Epoch: 0.6543296483846811
Training Accuracy Epoch: 62.39316239316239


  4%|▍         | 1/26 [00:03<01:15,  3.03s/it]

Training Loss per 5000 steps: 0.6824895739555359
Training Accuracy per 5000 steps: 59.375


100%|██████████| 26/26 [03:22<00:00,  7.78s/it]


The Total Accuracy for Epoch 13: 64.22466422466422
Training Loss Epoch: 0.6530881936733539
Training Accuracy Epoch: 64.22466422466422


  4%|▍         | 1/26 [00:14<05:59, 14.39s/it]

Training Loss per 5000 steps: 0.6391014456748962
Training Accuracy per 5000 steps: 68.75


100%|██████████| 26/26 [06:06<00:00, 14.09s/it]


The Total Accuracy for Epoch 14: 64.46886446886447
Training Loss Epoch: 0.6517010973050044
Training Accuracy Epoch: 64.46886446886447


  4%|▍         | 1/26 [00:14<05:54, 14.18s/it]

Training Loss per 5000 steps: 0.6445025205612183
Training Accuracy per 5000 steps: 60.9375


100%|██████████| 26/26 [06:11<00:00, 14.30s/it]


The Total Accuracy for Epoch 15: 63.36996336996337
Training Loss Epoch: 0.6507597840749301
Training Accuracy Epoch: 63.36996336996337


  4%|▍         | 1/26 [00:14<06:13, 14.94s/it]

Training Loss per 5000 steps: 0.6512166857719421
Training Accuracy per 5000 steps: 64.0625


100%|██████████| 26/26 [04:14<00:00,  9.81s/it]


The Total Accuracy for Epoch 16: 63.30891330891331
Training Loss Epoch: 0.6493170330157647
Training Accuracy Epoch: 63.30891330891331


  4%|▍         | 1/26 [00:05<02:27,  5.88s/it]

Training Loss per 5000 steps: 0.6707883477210999
Training Accuracy per 5000 steps: 57.8125


100%|██████████| 26/26 [01:29<00:00,  3.45s/it]


The Total Accuracy for Epoch 17: 64.59096459096459
Training Loss Epoch: 0.6501165078236506
Training Accuracy Epoch: 64.59096459096459


  4%|▍         | 1/26 [00:04<01:51,  4.47s/it]

Training Loss per 5000 steps: 0.6782494783401489
Training Accuracy per 5000 steps: 60.9375


100%|██████████| 26/26 [01:33<00:00,  3.60s/it]


The Total Accuracy for Epoch 18: 63.492063492063494
Training Loss Epoch: 0.6490718836967762
Training Accuracy Epoch: 63.492063492063494


  4%|▍         | 1/26 [00:03<01:31,  3.65s/it]

Training Loss per 5000 steps: 0.6708793640136719
Training Accuracy per 5000 steps: 62.5


100%|██████████| 26/26 [01:29<00:00,  3.43s/it]


The Total Accuracy for Epoch 19: 64.77411477411478
Training Loss Epoch: 0.6494724108622625
Training Accuracy Epoch: 64.77411477411478


  4%|▍         | 1/26 [00:05<02:18,  5.54s/it]

Training Loss per 5000 steps: 0.6474196910858154
Training Accuracy per 5000 steps: 64.0625


100%|██████████| 26/26 [01:55<00:00,  4.42s/it]


The Total Accuracy for Epoch 20: 63.614163614163616
Training Loss Epoch: 0.6480908325085273
Training Accuracy Epoch: 63.614163614163616


  4%|▍         | 1/26 [00:05<02:25,  5.81s/it]

Training Loss per 5000 steps: 0.6238331198692322
Training Accuracy per 5000 steps: 71.875


100%|██████████| 26/26 [01:36<00:00,  3.70s/it]


The Total Accuracy for Epoch 21: 64.71306471306471
Training Loss Epoch: 0.6439842146176559
Training Accuracy Epoch: 64.71306471306471


  4%|▍         | 1/26 [00:03<01:32,  3.69s/it]

Training Loss per 5000 steps: 0.6204677820205688
Training Accuracy per 5000 steps: 68.75


100%|██████████| 26/26 [01:32<00:00,  3.56s/it]


The Total Accuracy for Epoch 22: 65.44566544566544
Training Loss Epoch: 0.6435632293040936
Training Accuracy Epoch: 65.44566544566544


  4%|▍         | 1/26 [00:03<01:31,  3.67s/it]

Training Loss per 5000 steps: 0.7089557647705078
Training Accuracy per 5000 steps: 54.6875


100%|██████████| 26/26 [01:32<00:00,  3.56s/it]


The Total Accuracy for Epoch 23: 63.614163614163616
Training Loss Epoch: 0.6479041966108176
Training Accuracy Epoch: 63.614163614163616


  4%|▍         | 1/26 [00:03<01:27,  3.52s/it]

Training Loss per 5000 steps: 0.6815110445022583
Training Accuracy per 5000 steps: 60.9375


100%|██████████| 26/26 [01:29<00:00,  3.43s/it]


The Total Accuracy for Epoch 24: 64.77411477411478
Training Loss Epoch: 0.6455508929032546
Training Accuracy Epoch: 64.77411477411478


  4%|▍         | 1/26 [00:03<01:33,  3.74s/it]

Training Loss per 5000 steps: 0.6343772411346436
Training Accuracy per 5000 steps: 68.75


100%|██████████| 26/26 [01:29<00:00,  3.45s/it]


The Total Accuracy for Epoch 25: 64.59096459096459
Training Loss Epoch: 0.6455177939855136
Training Accuracy Epoch: 64.59096459096459


  4%|▍         | 1/26 [00:03<01:27,  3.51s/it]

Training Loss per 5000 steps: 0.637758195400238
Training Accuracy per 5000 steps: 68.75


100%|██████████| 26/26 [01:34<00:00,  3.63s/it]


The Total Accuracy for Epoch 26: 64.34676434676435
Training Loss Epoch: 0.6442531874546638
Training Accuracy Epoch: 64.34676434676435


  4%|▍         | 1/26 [00:03<01:36,  3.88s/it]

Training Loss per 5000 steps: 0.6386380791664124
Training Accuracy per 5000 steps: 64.0625


100%|██████████| 26/26 [02:11<00:00,  5.05s/it]


The Total Accuracy for Epoch 27: 65.56776556776556
Training Loss Epoch: 0.6374505414412572
Training Accuracy Epoch: 65.56776556776556


  4%|▍         | 1/26 [00:02<01:11,  2.86s/it]

Training Loss per 5000 steps: 0.6439143419265747
Training Accuracy per 5000 steps: 59.375


100%|██████████| 26/26 [01:15<00:00,  2.92s/it]


The Total Accuracy for Epoch 28: 63.614163614163616
Training Loss Epoch: 0.6458763411411872
Training Accuracy Epoch: 63.614163614163616


  4%|▍         | 1/26 [00:02<01:10,  2.83s/it]

Training Loss per 5000 steps: 0.6600029468536377
Training Accuracy per 5000 steps: 64.0625


100%|██████████| 26/26 [01:26<00:00,  3.32s/it]

The Total Accuracy for Epoch 29: 63.98046398046398
Training Loss Epoch: 0.6436567489917462
Training Accuracy Epoch: 63.98046398046398





In [None]:
def summarize_results()

In [51]:
#validation def
VALID_BATCH_SIZE = 1 #set to 1 for printing of individual wrong predictions

#detokenize def
def DBDetokenize(a):
    a_orig = [tokenizer.decode(x) for x in a['ids'].squeeze().tolist() if x != 0]
    a_orig = ([x.replace(' ' , '') for x in a_orig])
    return " ".join(a_orig)

def valid(model, testing_loader):
    tr_loss = 0 #added
    nb_tr_steps = 0 #added
    nb_tr_examples = 0 #added
    max_wrong_outputs = 10
    wrong_outputs = 0
    model.eval()
    n_correct = 0; n_wrong = 0; total = 0
    with torch.no_grad():
        for _, data in enumerate(testing_loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            targets = data['targets'].to(device, dtype = torch.long)
            outputs = model(ids, mask)#.squeeze()
            loss = loss_function(outputs, targets)
            tr_loss += loss.item()
            big_val, big_idx = torch.max(outputs.data, dim=1)
            n_correct += calcuate_accu(big_idx, targets)

            #print individual wrong responses to file
            if VALID_BATCH_SIZE == 1 and wrong_outputs < max_wrong_outputs: 
                wrong_outputs += 1
                path = '/home/pd/summar   ies/yelp_summary_13Mar23.txt'
                with open(path,'a') as f:
                    f.write(DBDetokenize(data))
                    f.write(f'Should be: {1 if targets.item() else 5}')
                    f.write('\n')
            nb_tr_steps += 1
            nb_tr_examples+=targets.size(0)
            
            if _%5000==0:
                loss_step = tr_loss/nb_tr_steps
                accu_step = (n_correct*100)/nb_tr_examples
                print(f"Validation Loss per 100 steps: {loss_step}")
                print(f"Validation Accuracy per 100 steps: {accu_step}")
    epoch_loss = tr_loss/nb_tr_steps
    epoch_accu = (n_correct*100)/nb_tr_examples
    print(f"Validation Loss Epoch: {epoch_loss}")
    print(f"Validation Accuracy Epoch: {epoch_accu}")
    
    return epoch_accu

In [52]:
#validation run
acc = valid(model, testing_loader)
print("Accuracy on test data = %0.2f%%" % acc)



Validation Loss per 100 steps: 0.4546707570552826
Validation Accuracy per 100 steps: 100.0
Validation Loss Epoch: 0.6451624179395233
Validation Accuracy Epoch: 64.8780487804878
Accuracy on test data = 64.88%


In [37]:
output_model_file = '/home/pd/models/yelp_sentiment.bin'
output_vocab_file = '/home/pd/models/yelp_sentiment_vocab.bin'

model_to_save = model
torch.save(model_to_save, output_model_file)
tokenizer.save_vocabulary(output_vocab_file)

('/home/pd/models/yelp_sentiment_vocab.bin',)

In [19]:
#get raw dbert
dbase = DistilBertModel.from_pretrained("distilbert-base-uncased")
a = next(iter(single_loader))
def get_raw_dbert(a, dbase): 
    return dbase(input_ids=a['ids'],attention_mask=a['mask'])

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertModel: ['vocab_layer_norm.weight', 'vocab_projector.weight', 'vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_transform.bias', 'vocab_transform.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [30]:
exec(open('/home/pd/NNBasics/NNBasics/src/PostBertEngine.py').read())

2023-03-22 10:16:50.800780: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-22 10:16:52.413475: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


KeyboardInterrupt: 

In [18]:
a = torch.load('/home/pd/datasets/yelp_reviews/yelp_reviews_targets_1024.pt')
a.shape

torch.Size([1638, 1])

In [69]:
exec(open('/home/pd/NNBasics/NNBasics/src/Preprocess.py').read())

In [70]:
DATA_CSV_PATH = "/home/pd/datasets/yelp_reviews_temp/yelp_reviews_2048.csv"
DATA_PATH = "/home/pd/datasets/yelp_reviews/yelp_reviews.json"
SUMMARY_PATH = '/home/pd/summaries/yelp_summary_13Mar23.txt'
OUTPUT_MODEL_PATH = '/home/pd/models/yelp_sentiment.bin'
OUTPUT_VOCAB_PATH = '/home/pd/models/yelp_sentiment_vocab.bin'
BERT_OUT_PATH = '/home/pd/datasets/yelp_reviews_temp/yelp_reviews_postBERT_1024.pt'
TARG_OUT_PATH = '/home/pd/datasets/yelp_reviews_temp/yelp_reviews_targets_1024.pt'

In [74]:
paramd = dict()
paramd['data_path'] = DATA_PATH
paramd['out_path'] = "/home/pd/datasets/yelp_reviews_temp/yelp_reviews_2048.csv"
paramd['max_words'] = 64
paramd['low_star_rev_lim'] = 100
paramd['tot_rev_lim'] = float('inf')
paramd['debug_path'] = '/home/pd/summaries/yelp_temp.txt'

ypp = YelpPreprocess(**paramd)

In [75]:
ypp.data_path

'/home/pd/datasets/yelp_reviews/yelp_reviews.json'

In [76]:
rev = ypp.json_to_csv()

In [1]:
class model():
    stat = 50

    def __init__(self,inp):
        self.inp = inp

    def square(self):
        self.inp = self.inp**2

    @staticmethod
    def buzz(a,b):
        return a + b
    
    def add(self):
        self.inp = self.buzz(self.inp,self.inp)

    def addstat(self):
        self.inp = self.inp + self.stat

class foo():

    def __init__(self,inp):
        self.inp = inp

    def square_obj(self):
        self.inp.square()



In [2]:
a = model(5)
a.addstat()
a.inp

55

In [3]:
DATA_CSV_PATH = "/home/pd/datasets/yelp_reviews/yelp_reviews_2048.csv"
DATA_PATH = "/home/pd/datasets/yelp_reviews/yelp_reviews.json"
SUMMARY_PATH = '/home/pd/summaries/yelp_summary_24Mar23.txt'
OUTPUT_MODEL_PATH = '/home/pd/models/yelp_sentiment.bin'
OUTPUT_VOCAB_PATH = '/home/pd/models/yelp_sentiment_vocab.bin'
BERT_OUT_PATH = '/home/pd/datasets/yelp_reviews/yelp_reviews_postBERT_1024.pt'
TARG_OUT_PATH = '/home/pd/datasets/yelp_reviews/yelp_reviews_targets_1024.pt'


SAVE_BERT_LAYER_OUT = False
FROM_BERT_LAYER_OUT = not SAVE_BERT_LAYER_OUT

FROM_CSV_CONDENSED = True #otherwise getting from full json file

#read-in data
lowstar_review_limit = 128
review_limit = float('inf')
sample_per_cat = 128
max_num_words = 50

#init train/test params
MAX_LEN = 64
TRAIN_BATCH_SIZE = 8
VALID_BATCH_SIZE = 1 #set to 1 for printing of individual wrong predictions
EPOCHS = 5
LEARNING_RATE = 1e-05
AUTO_SCALE_GRAD = False

In [6]:
from Model import DBertMultiCat

ModuleNotFoundError: No module named 'Model'

In [4]:
a = None
if not a:
    print('hi')
else:
    print('you')

hi
