Automatic Hyperparameter Tuning Results:
https://wandb.ai/kaanyarali/speech-test-project-1?workspace=user-kaanyarali

In [None]:
!pip install !pip install tensorflow_io
!pip install keras-tuner
!pip install print_schema
!pip install pydub
!pip install opensmile
!pip install tqdm boto3 requests regex sentencepiece sacremoses
!pip install transformers

[31mERROR: Invalid requirement: '!pip'[0m
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import pandas as pd
from sklearn.model_selection import StratifiedShuffleSplit
import torch
import torchvision
import numpy as np
import gc
import torch.nn as nn
from transformers import BertModel
import pickle

In [None]:
tokenizer = torch.hub.load('huggingface/pytorch-transformers', 'tokenizer', 'bert-base-uncased')

Using cache found in /root/.cache/torch/hub/huggingface_pytorch-transformers_main


In [None]:
class ADdataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels
    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item
    def __len__(self):
        return len(self.labels)

In [None]:
# Create the BertClassfier class
class BertClassifier(nn.Module):
    """Bert Model for Classification Tasks.
    """
    def __init__(self, archs, dropout, freeze_bert=False):
        """
        @param    bert: a BertModel object
        @param    classifier: a torch.nn.Module classifier
        @param    freeze_bert (bool): Set `False` to fine-tune the BERT model
        """
        super(BertClassifier, self).__init__()
        # Specify hidden size of BERT, hidden size of our classifier, and number of labels
        D_in = 768

        # Instantiate BERT model
        self.bert = BertModel.from_pretrained('bert-base-uncased')

        layers = []          
        layers.append(nn.Linear(D_in, archs[0]))
        layers.append(nn.BatchNorm1d(num_features=archs[0]))
        layers.append(nn.ReLU())
        layers.append(nn.Dropout(dropout))

        for i in range(1,len(archs)):
          layers.append(nn.Linear(archs[i-1], archs[i]))
          layers.append(nn.BatchNorm1d(num_features=archs[i]))
          layers.append(nn.ReLU())
          layers.append(nn.Dropout(dropout))

        layers.append(nn.Linear(archs[-1], 1))
        self.classifier  = nn.Sequential(*layers)

        # Freeze the BERT model
        if freeze_bert:
            for param in self.bert.parameters():
                param.requires_grad = False
        
    def forward(self, input_ids, attention_mask):
        """
        Feed input to BERT and the classifier to compute logits.
        @param    input_ids (torch.Tensor): an input tensor with shape (batch_size,
                      max_length)
        @param    attention_mask (torch.Tensor): a tensor that hold attention mask
                      information with shape (batch_size, max_length)
        @return   logits (torch.Tensor): an output tensor with shape (batch_size,
                      num_labels)
        """
        # Feed input to BERT
        outputs = self.bert(input_ids=input_ids,
                            attention_mask=attention_mask)
        
        # Extract the last hidden state of the token `[CLS]` for classification task
        last_hidden_state_cls = outputs[0][:, 0, :]

        # Feed input to classifier to compute logits
        logits = self.classifier(last_hidden_state_cls)

        return logits

In [None]:
from transformers import AdamW, get_linear_schedule_with_warmup
from torch.utils.data import DataLoader


def initialize_model(archs, dropout, train_loader, epochs=50):
    """Initialize the Bert Classifier, the optimizer and the learning rate scheduler.
    """
    # Instantiate Bert Classifier
    bert_classifier = BertClassifier(archs, dropout, freeze_bert=False)

    # Tell PyTorch to run the model on GPU
    bert_classifier.to(device)

    # Create the optimizer
    optimizer = AdamW(bert_classifier.parameters(),
                      lr=5e-5,    # Default learning rate
                      eps=1e-8    # Default epsilon value
                      )

    # Total number of training steps
    total_steps = len(train_loader) * epochs

    # Set up the learning rate scheduler
    scheduler = get_linear_schedule_with_warmup(optimizer,
                                                num_warmup_steps=0, # Default value
                                                num_training_steps=total_steps)
    return bert_classifier, optimizer, scheduler

In [None]:
import random
import time

# Specify loss function
#loss_fn = nn.CrossEntropyLoss()
#loss_fn = nn.BCELoss()
loss_fn =  torch.nn.BCEWithLogitsLoss()
def set_seed(seed_value=42):
    """Set seed for reproducibility.
    """
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)

def train(model, train_loader, optim, val_loader=None, epochs=50, evaluation=False):
    """Train the BertClassifier model.
    """
    # Start training loop
    print("Start training...\n")
    train_loss_list = []
    val_loss_list = []
    train_acc_list = []
    val_acc_list = [] 
    for epoch_i in range(epochs):
        train_loss_sum = 0
        train_accuracy_epoch = 0
        # Put the model into the training mode
        model.train()

        # For each batch of training data...
        for batch in (train_loader):
            optim.zero_grad()
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            # Perform a forward pass. This will return logits.
            logits = model(input_ids, attention_mask)

            # Compute loss and accumulate the loss values
            logits = logits.reshape(-1) #silebilirsin
            
            loss = loss_fn(logits, labels.float())
            train_loss_sum += loss.item()

            logits_class = logits > 0.5
            train_acc = (labels == logits_class).sum().item() / labels.size(0)
            train_accuracy_epoch += train_acc

            # Perform a backward pass to calculate gradients
            loss.backward()


            # Clip the norm of the gradients to 1.0 to prevent "exploding gradients"
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)


            # Update parameters and the learning rate
            optimizer.step()
            scheduler.step()

        avg_train_loss = np.round(train_loss_sum/len(train_loader),2)
        avg_train_acc = np.round(train_accuracy_epoch/len(train_loader),2)


        if evaluation == True:
            avg_val_loss, avg_val_acc = evaluate(model, val_loader)
        # print('Epoch {}, train loss {} , val loss is {}, train acc is {}, val acc is {} '.format(epoch_i,avg_train_loss,avg_val_loss,avg_train_acc,avg_val_acc))
        train_loss_list.append(avg_train_loss)
        val_loss_list.append(avg_val_loss)
        val_acc_list.append(avg_val_acc)
        train_acc_list.append(avg_train_acc)

        print("Epoch: {} completed. Training Accuracy: {}, Training Loss: {}, Validation Accuracy: {}, Validation Loss: {}".format(epoch_i,avg_train_acc,avg_train_loss,avg_val_acc,avg_val_loss))

    print("Training complete!")
    return model,train_loss_list,val_loss_list,train_acc_list,val_acc_list


def evaluate(model, val_dataloader):
    """After the completion of each training epoch, measure the model's performance
    on our validation set.
    """
    # Put the model into the evaluation mode. The dropout layers are disabled during
    # the test time.
    model.eval()

    # Tracking variables
    val_loss_sum = 0
    val_accuracy_epoch = 0
    # For each batch in our validation set...
    for batch in val_dataloader:
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      labels = batch['labels'].to(device)
      
      # Compute logits
      with torch.no_grad():
        logits = model(input_ids, attention_mask)

        # Compute loss
        logits = logits.reshape(-1)
        loss = loss_fn(logits, labels.float())
        val_loss_sum +=loss.item()
        avg_val_loss = np.round(val_loss_sum/len(val_dataloader),2)

        logits_class = logits > 0.5
        val_acc = (labels == logits_class).sum().item() / labels.size(0)
        val_accuracy_epoch += val_acc
        avg_val_acc = np.round(val_accuracy_epoch/len(val_dataloader),2)

    return avg_val_loss, avg_val_acc


def evaluate_test(model, test_dataloader):
    # Put the model into the evaluation mode. The dropout layers are disabled during
    # the test time.
    model.eval()

    # Tracking variables
    test_loss_sum = 0
    test_accuracy_epoch = 0
    predictions = []
    labels_list = []
    for batch in test_dataloader:
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      labels = batch['labels'].to(device)
      
      # Compute logits
      with torch.no_grad():
        logits = model(input_ids, attention_mask)
        labels_list.append(labels.cpu())

        # Compute loss
        logits = logits.reshape(-1)
        loss = loss_fn(logits, labels.float())
        test_loss_sum +=loss.item()
        avg_test_loss = np.round(test_loss_sum/len(test_dataloader),5)

        logits_class = logits > 0.5
        predictions.append(logits.cpu())
        test_acc = (labels == logits_class).sum().item() / labels.size(0)
        test_accuracy_epoch += test_acc
        avg_test_acc = np.round(test_accuracy_epoch/len(test_dataloader),5)

    return avg_test_loss, avg_test_acc, predictions, labels_list


In [None]:
def evaluate_ensemble(models, test_dataloader):
    test_loss_sum = 0
    test_accuracy_epoch = 0
    predictions = []
    labels_list = []
    for batch in test_dataloader:
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      labels = batch['labels'].to(device)

      prediction = []
      for model in models:
        model.eval()
        with torch.no_grad():
          logits = model(input_ids, attention_mask)
          labels_list.append(labels.cpu())
          logits = logits.reshape(-1)
          logits_class = logits > 0.5
          prediction.append(logits_class)

      prediction_ensemble = sum(prediction) > 0.5*len(prediction)
      predictions.append(prediction_ensemble.cpu())

      test_acc = (labels == prediction_ensemble).sum().item() / labels.size(0)
      test_accuracy_epoch += test_acc
      avg_test_acc = np.round(test_accuracy_epoch/len(test_dataloader),5)

    return avg_test_acc

In [None]:
seed = 42
set_seed(seed)    # Set seed for reproducibility

In [None]:
data = pd.read_csv("train.csv") #load training csv file
id = data.iloc[:,0].values
train_labels = data.iloc[:,1].values
train_texts = data.iloc[:,2].values

In [None]:
df = pd.read_csv("test.csv") #load test csv file
df["Content"].fillna("",inplace=True)
test_texts = list(df.loc[:,"Content"])
test_labels = list(df.loc[:,"Label"])

test_encodings = tokenizer(test_texts, truncation=True, padding=True)
test_dataset = ADdataset(test_encodings, test_labels)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

In [None]:
print("Number of Samples in the training set: {}".format(len(train_texts)))

Number of Samples in the training set: 166


In [None]:
def get_several_validation_set(content, label, val_fraction=0.2, total_splits=5, seed=0):
  sss = StratifiedShuffleSplit(n_splits=total_splits, test_size=val_fraction, random_state=seed)
  return sss.split(content, label)

In [None]:
def find_pos_sample_frac(sample_label):
  '''
  Returns the ratio of positive samples in the given list
  '''
  return (sample_label == 1).sum() / sample_label.shape[0]

In [None]:
gc.collect()
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
index = 1
models = [] 
training_accuracy_list = []
validation_accuracy_list = []
training_loss_list = []
validation_loss_list = []
train_and_val_splits = get_several_validation_set(train_texts, train_labels, total_splits=3, seed=seed)
for train_index, val_index in train_and_val_splits:

    dropout = 0.2 #Tune 
    archs = [64]  #Tune 
    batch_size = 16 #Tune 
    epoch = 10

    gc.collect()
    train_encodings = tokenizer(list(train_texts[train_index]), truncation=True, padding=True)
    val_encodings = tokenizer(list(train_texts[val_index]), truncation=True, padding=True)
    train_dataset = ADdataset(train_encodings, train_labels[train_index])
    val_dataset = ADdataset(val_encodings, train_labels[val_index])
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

    
    bert_classifier, optimizer, scheduler = initialize_model(archs, dropout, train_loader, epochs=epoch)
    bert_classifier,train_loss_list,val_loss_list,train_acc_list,val_acc_list = train(bert_classifier, train_loader, optimizer, val_loader, epochs=epoch, evaluation=True)

    test_loss, test_accuracy, predictions, labels_l = evaluate_test(bert_classifier, test_loader)
    print("Test Loss: {}, Test Accuracy: {}".format(test_loss,test_accuracy))
    print("#######")
   
    torch.save(bert_classifier, "/content/drive/MyDrive/speech_weights/seed_{}_index_{}.pt".format(seed,index))
    with open('/content/drive/MyDrive/speech_traning_logs/training_loss/seed_{}_index_{}'.format(seed,index), 'wb') as fp:
      pickle.dump(train_loss_list, fp)
    with open('/content/drive/MyDrive/speech_traning_logs/training_accuracy/seed_{}_index_{}'.format(seed,index), 'wb') as fp:
      pickle.dump(train_acc_list, fp)
    with open('/content/drive/MyDrive/speech_traning_logs/validation_loss/seed_{}_index_{}'.format(seed,index), 'wb') as fp:
      pickle.dump(val_loss_list, fp)
    with open('/content/drive/MyDrive/speech_traning_logs/validation_accuracy/seed_{}_index_{}'.format(seed,index), 'wb') as fp:
      pickle.dump(val_acc_list, fp)

    index += 1
    models.append(bert_classifier)
    training_accuracy_list.append(np.mean(train_acc_list))
    validation_accuracy_list.append(np.mean(val_acc_list))
    training_loss_list.append(np.mean(train_loss_list))
    validation_loss_list.append(np.mean(val_loss_list))

    gc.collect()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Start training...

Epoch: 0 completed. Training Accuracy: 0.56, Training Loss: 0.65, Validation Accuracy: 0.5, Validation Loss: 0.63
Epoch: 1 completed. Training Accuracy: 0.71, Training Loss: 0.51, Validation Accuracy: 0.71, Validation Loss: 0.6
Epoch: 2 completed. Training Accuracy: 0.84, Training Loss: 0.33, Validation Accuracy: 0.81, Validation Loss: 0.41
Epoch: 3 completed. Training Accuracy: 0.97, Training Loss: 0.22, Validation Accuracy: 0.69, Validation Loss: 0.99
Epoch: 4 completed. Training Accuracy: 1.0, Training Loss: 0.16, Validation Accuracy: 0.9, Validation Loss: 0.37
Epoch: 5 completed. Training Accuracy: 0.91, Training Loss: 0.22, Validation Accuracy: 0.65, Validation Loss: 0.8
Epoch: 6 completed. Training Accuracy: 1.0, Training Loss: 0.15, Validation Accuracy: 0.88, Validation Loss: 0.3
Epoch: 7 completed. Training Accuracy: 1.0, Training Loss: 0.14, Validation Accuracy: 0.85, Validation Loss: 0.41
Epoch: 8 completed. Training Accuracy: 0.94, Training Loss: 0.2, Vali

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Start training...

Epoch: 0 completed. Training Accuracy: 0.6, Training Loss: 0.63, Validation Accuracy: 0.4, Validation Loss: 0.72
Epoch: 1 completed. Training Accuracy: 0.78, Training Loss: 0.47, Validation Accuracy: 0.81, Validation Loss: 0.53
Epoch: 2 completed. Training Accuracy: 0.87, Training Loss: 0.31, Validation Accuracy: 0.58, Validation Loss: 0.72
Epoch: 3 completed. Training Accuracy: 0.94, Training Loss: 0.25, Validation Accuracy: 0.67, Validation Loss: 0.8
Epoch: 4 completed. Training Accuracy: 0.99, Training Loss: 0.16, Validation Accuracy: 0.75, Validation Loss: 0.51
Epoch: 5 completed. Training Accuracy: 1.0, Training Loss: 0.16, Validation Accuracy: 0.79, Validation Loss: 0.5
Epoch: 6 completed. Training Accuracy: 0.99, Training Loss: 0.16, Validation Accuracy: 0.81, Validation Loss: 0.61
Epoch: 7 completed. Training Accuracy: 1.0, Training Loss: 0.15, Validation Accuracy: 0.79, Validation Loss: 0.54
Epoch: 8 completed. Training Accuracy: 1.0, Training Loss: 0.16, Va

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Start training...

Epoch: 0 completed. Training Accuracy: 0.54, Training Loss: 0.67, Validation Accuracy: 0.33, Validation Loss: 0.63
Epoch: 1 completed. Training Accuracy: 0.76, Training Loss: 0.48, Validation Accuracy: 0.65, Validation Loss: 0.62
Epoch: 2 completed. Training Accuracy: 0.81, Training Loss: 0.36, Validation Accuracy: 0.88, Validation Loss: 0.33
Epoch: 3 completed. Training Accuracy: 0.96, Training Loss: 0.23, Validation Accuracy: 0.73, Validation Loss: 0.69
Epoch: 4 completed. Training Accuracy: 0.92, Training Loss: 0.23, Validation Accuracy: 0.85, Validation Loss: 0.42
Epoch: 5 completed. Training Accuracy: 0.99, Training Loss: 0.14, Validation Accuracy: 0.73, Validation Loss: 0.78
Epoch: 6 completed. Training Accuracy: 1.0, Training Loss: 0.11, Validation Accuracy: 0.92, Validation Loss: 0.41
Epoch: 7 completed. Training Accuracy: 1.0, Training Loss: 0.1, Validation Accuracy: 0.88, Validation Loss: 0.33
Epoch: 8 completed. Training Accuracy: 1.0, Training Loss: 0.11,

In [None]:
avg_test_acc = evaluate_ensemble(models, test_loader)
print("Training Accuracy Average: {}, std: {}".format(np.mean(training_accuracy_list),np.std(training_accuracy_list)))
print("Validation Accuracy Average: {}, std: {}".format(np.mean(validation_accuracy_list),np.std(validation_accuracy_list)))
print("Training Loss Average: {}, std: {}".format(np.mean(training_loss_list),np.std(training_loss_list)))
print("Validation Loss Average: {}, std: {}".format(np.mean(validation_loss_list),np.std(validation_loss_list)))
print("Ensemble Test accuracy (Majority Voting): {}".format(avg_test_acc))

Training Accuracy Average: 0.898, std: 0.011575836902790201
Validation Accuracy Average: 0.7539999999999999, std: 0.024097026095903726
Training Loss Average: 0.26566666666666666, std: 0.005312459150169769
Validation Loss Average: 0.5393333333333333, std: 0.04246043910381626
Ensemble Test accuracy (Majority Voting): 0.87143


In [None]:
# Hyperparameter configurations having highest validation accuracy
archs = [[512,64],[64,512],[256,512,64],[128],[128,512,256],[256,64],[256,128],[512,256],[256,64,128],[512,256,64],[128],[64,512],[256,512,128]]
batch_size = [16,16,8,8,16,16,16,16,16,16,16,16,16]
dropout = [0.4,0.5,0.3,0.3,0.4,0.3,0.4,0.5,0.3,0.4,0.5,0.3,0.3]
epochs = 15