In [None]:
#Mount Google Drive
from google.colab import drive
drive.mount('/content/gdrive')


Mounted at /content/gdrive


In [None]:
#Install the happytransformer module. 
!pip install happytransformer

Collecting happytransformer
  Downloading https://files.pythonhosted.org/packages/23/f9/4acd066452e5b543d8362b5e9a18a8e298989f18b24bfcc114a66fc40792/happytransformer-2.2.2-py3-none-any.whl
Collecting datasets>=1.6.0
[?25l  Downloading https://files.pythonhosted.org/packages/46/1a/b9f9b3bfef624686ae81c070f0a6bb635047b17cdb3698c7ad01281e6f9a/datasets-1.6.2-py3-none-any.whl (221kB)
[K     |████████████████████████████████| 225kB 7.9MB/s 
[?25hCollecting transformers>=4.4.0
[?25l  Downloading https://files.pythonhosted.org/packages/d8/b2/57495b5309f09fa501866e225c84532d1fd89536ea62406b2181933fb418/transformers-4.5.1-py3-none-any.whl (2.1MB)
[K     |████████████████████████████████| 2.1MB 16.2MB/s 
[?25hCollecting sentencepiece
[?25l  Downloading https://files.pythonhosted.org/packages/f5/99/e0808cb947ba10f575839c43e8fafc9cc44e4a7a2c8f79c60db48220a577/sentencepiece-0.1.95-cp37-cp37m-manylinux2014_x86_64.whl (1.2MB)
[K     |████████████████████████████████| 1.2MB 34.7MB/s 
Collecting

In [None]:
import pandas as pd
import numpy as np
import os
from happytransformer import HappyTextClassification
import nltk 
nltk.download('punkt')
nltk.download('stopwords')

# DistilBertTokenizer is identical to BertTokenizer
from transformers import DistilBertModel, DistilBertTokenizer
import vsm

# set working directory
os.chdir('/content/gdrive/My Drive/AI Creativity')
os.listdir()


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


["Justin's data",
 'K-Fold',
 'runs',
 'train_bin.csv',
 'df_resid.csv',
 'cross_domain_results.csv',
 'rater_number_function.csv',
 'rater_number_function.gsheet',
 'cross_domain_results.gsheet',
 '2fake_prediction.csv',
 '3fake_prediction.csv',
 '4fake_prediction.csv',
 '5fake_prediction.csv',
 '1fake_prediction.csv',
 'tmp.csv',
 'cs224u',
 '1homo_prediction.csv',
 '1scrambled_prediction.csv',
 '1regression_prediction.csv',
 '2homo_prediction.csv',
 '2regression_prediction.csv',
 '2scrambled_prediction.csv',
 '3homo_prediction.csv',
 '3scrambled_prediction.csv',
 '3regression_prediction.csv',
 '4homo_prediction.csv',
 '4regression_prediction.csv',
 '4scrambled_prediction.csv',
 '5scrambled_prediction.csv',
 '5regression_prediction.csv',
 '5homo_prediction.csv',
 'Cross_Domain_Creativity_Combined_split1_scrambled_prediction.csv',
 'Cross_Domain_Creativity_Combined_split1homo_prediction.csv',
 'Cross_Domain_Creativity_Combined_split1normal_prediction.csv',
 'Cross_Domain_Creativity_Co

In [None]:
import random
random.seed(1)
import re


# Our dataset is an excel sheet with multiple sheets. 
# Each sheet include ideas from one sample along with ratings on several metrics 
#3 constructs (creativity, usefulness, novelty) * 3 types of judges (expert, consumers, combined)
# the following function allow us to extract a specific type of labels together with the ideas
# For unknown reasons, I received an error when using BERT-like models to process a too long sequence (Perhaps because of the limit of 512 tokens of BERT)
# The "length" argument allows us to keep ideas with less than a certain number of words. The default is 400, which works fine.
def extract_metric(study, metric, length = 400):
  df0 = pd.read_excel("Justin's data/Idea Ratings_Berg_2019_OBHDP.xlsx", sheet_name= study-1)
  df = df0[["Final_Idea", metric]].rename(columns={'Final_Idea': 'text', metric: 'label'})

  return(df.iloc[[len(x.split())< length for x in df['text']]])

# The "scramble" function is for feature masking. It scrambles a sequence by randomizing the order of words
def scramble(text):
  words = text.split()
  n = len(words)
  scrambled = random.sample(words, n)
  return(" ".join(scrambled))

# word_rep and homogenize are used for feature masking. The "homogenize" function replace words 
# with an arbitrarily chose word with the same length or similar length.

def word_rep(word):
  if len(word) == 1:
    word = "a"
  if len(word) == 2:
    word = "an"
  if len(word) == 3:
    word = "and"
  if len(word) == 4:
    word = "andy"
  if len(word) == 5:
    word = "antic"
  if len(word) == 6:
    word = "accent"
  if len(word) == 7:
    word = "ancient"
  if len(word) == 8:
    word = "accident"
  if len(word) == 9:
    word = "accidents"
  if len(word) == 10:
    word = "accidental"
  if len(word) >= 11:
    word = "accidentally"
  return(word)
def homogenize(text):
  words = text.split()
  homogenized = [word_rep(word) for word in words]
  return(" ".join(homogenized))




In [None]:
# Domain General Predictions

for metric in ["Creativity_Combined", "Novelty_Combined", "Usefulness_Combined"]: # loop through the three constructs
  
  # In domain general predictions, we combine the three samples as our corpus
  train_file= pd.concat([extract_metric(1,metric),
            extract_metric(2,metric),
            extract_metric(3,metric)]).reset_index()

  # Roughly split the dataset into 5 subsets for cross-validation. 
  # I believe there are more efficient way to do this, but this is how I often do in R.
  n = train_file.shape[0]
  index = np.random.randint(1,6,n)
  for i in range(1,6):
    #Split the dataset into train set and test set.
    train_set = train_file[index != i]
    # The happytransformer function takes filenames as input. 
    # Therefore, I write the train set and the test set into csv files
    train_set.to_csv("train_file.csv")
    test_set = train_file[index == i]
    test_set.to_csv("test_file.csv")

    #Instantiate a BERT-like model.
    # Set num_labels = 1 to make the model run regressions
    model = HappyTextClassification(
        #"ROBERTA", "roberta-base",
        "DISTILBERT", "distilbert-base-uncased",
        #"XLNET", "xlnet-base-cased",
        #"ALBERT", "albert-base-v2",
                                  num_labels=1)
    model.train("train_file.csv")

    # create a copy of the test set and scramble it
    scrambled = test_set.copy()
    scrambled['text'] = [scramble(x) for x in test_set.text]
    scrambled.to_csv("scrambled.csv")

    # Create a copy of the test set and homogenize it
    homo = test_set.copy()
    homo['text'] = [homogenize(x) for x in test_set.text]
    homo.to_csv("homo.csv")

    # the test method of happytransformer models return predicted labels and scores.
    # Because we set the num_labels = 1, only the scores matter
    pred = model.test("test_file.csv")
    scrambled_pred = model.test("scrambled.csv")
    homo_pred = model.test("homo.csv")

    # Extract the scores
    pred_score = [x.score for x in pred]
    scrambled_pred_score = [x.score for x in scrambled_pred]
    homo_pred_score = [x.score for x in homo_pred]

    # This chunk of code might be totally redundant. 
    df_pred = test_set.copy()
    df_scrambled = scrambled.copy()
    df_homo = homo.copy()

    # Replace the original labels with the predicted scores and write the results into csv files
    #(perhaps I should have created new columns for the predicted scores)
    df_scrambled['label'] = scrambled_pred_score
    df_scrambled.to_csv("Cross_Domain_" + metric + "_split" + str(i) + '_scrambled_prediction.csv')
    df_homo['label'] = homo_pred_score
    df_homo.to_csv("Cross_Domain_" + metric + "_split" + str(i) + '_homo_prediction.csv')
    df_pred['label'] = pred_score
    df_pred.to_csv("Cross_Domain_" + metric + "_split" + str(i) + '_normal_prediction.csv')

    # Print pearson correlation for each iteration. Not important
    print("normal: "+ str(np.corrcoef(pred_score, test_set['label'])))
    print("scrambled: "+ str(np.corrcoef(scrambled_pred_score, test_set['label'])))
    print("homo: "+ str(np.corrcoef(homo_pred_score, test_set['label'])))


#IMPORTANT NOTE: the original happytransformer model only takes labels of integer values, which will lead to the following error:
# invalid literal for int() with base 10: '5.7'
# To fix this, you can open the following file (it will show up in the error message in colabratory):
#/usr/local/lib/python3.7/dist-packages/happytransformer/tc/trainer.py 
# and replace the following code with my code
# Original code
        # contexts = []
        # labels = []
        # with open(filepath, newline='') as csv_file:
        #     reader = csv.DictReader(csv_file)
        #     for row in reader:
        #         contexts.append(row['text'])
        #         if not test_data:
        #             labels.append(int(row['label']))
        # csv_file.close()
#My code
        # import pandas as pd
        # df= pd.read_csv(filepath)
        # contexts = list(df['text'])
        # labels = list(df['label'])

# After the replacement, rerun the notebook without installing the happytransformer module. It should work not
# If you import the module from a local directory, you only need to do this once. 
# However, in colaboratory, I have to repeat this step every time.

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=442.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=267967963.0, style=ProgressStyle(descri…




Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=466062.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=28.0, style=ProgressStyle(description_w…

05/09/2021 05:21:14 - INFO - happytransformer.happy_transformer -   Using model: cuda





05/09/2021 05:21:20 - INFO - happytransformer.happy_transformer -   Preprocessing dataset...


ValueError: ignored

In [None]:
general_results = {}

for metric in ["Creativity_Combined", "Novelty_Combined", "Usefulness_Combined"]:
  for pred_type in ['normal', "_scrambled", "homo"]:
    #combine all predicted scores to get the predictions of the whole dataset
    all_filenames = ["Cross_Domain_" + metric + "_split" + str(i)+ pred_type +"_prediction.csv" for i in range(1,6)]
    combined_csv = pd.concat([pd.read_csv(f) for f in all_filenames]) 

    # Calculate the Pearson correlation between the predicted scores and human ratings
    general_results[metric+ '_' + pred_type] = (np.corrcoef(combined_csv.sort_values(by='Unnamed: 0')['label'], train_file['label'])[0,1])

# Write out the results into csv files. I use these files to make plots in R
pd.DataFrame.from_dict({'key':general_results.keys(), 'value': general_results.values()}).to_csv("Domain_general_results(0503).csv")


NameError: ignored

In [None]:
# Domain Specific Predictions
for metric in ["Creativity_Combined", "Novelty_Combined", "Usefulness_Combined"]:
  for study in [1,2,3]:
    
    # Everything is the same as in domain general predictions, except that each sample is used separately here
    train_file= extract_metric(study,metric).sample(n = 250).reset_index()
    n = train_file.shape[0]
    index = np.random.randint(1,6,n)
    for i in range(1,6):
      train_set = train_file[index != i]
      train_set.to_csv("train_file.csv")
      test_set = train_file[index == i]
      test_set.to_csv("test_file.csv")

      model = HappyTextClassification(
          #"ROBERTA", "roberta-base",
          "DISTILBERT", "distilbert-base-uncased",
          #"XLNET", "xlnet-base-cased",
          #"ALBERT", "albert-base-v2",
                                    num_labels=1)
      model.train("train_file.csv")

      scrambled = test_set.copy()
      homo = test_set.copy()

      scrambled['text'] = [scramble(x) for x in test_set.text]
      homo['text'] = [homogenize(x) for x in test_set.text]

      scrambled.to_csv("scrambled.csv")
      homo.to_csv("homo.csv")

      pred = model.test("test_file.csv")
      scrambled_pred = model.test("scrambled.csv")
      homo_pred = model.test("homo.csv")

      pred_score = [x.score for x in pred]
      scrambled_pred_score = [x.score for x in scrambled_pred]
      homo_pred_score = [x.score for x in homo_pred]

      df_pred = test_set.copy()
      df_scrambled = scrambled.copy()
      df_homo = homo.copy()

      df_scrambled['label'] = scrambled_pred_score
      df_scrambled.to_csv("Domain_specific_study" + str(study) + "_" + metric + "_split" + str(i) + '_scrambled_prediction.csv')
      df_homo['label'] = homo_pred_score
      df_homo.to_csv("Domain_specific_study" + str(study) + "_" + metric + "_split" + str(i) + 'homo_prediction.csv')
      df_pred['label'] = pred_score
      df_pred.to_csv("Domain_specific_study" + str(study) + "_" + metric + "_split" + str(i) + 'normal_prediction.csv')

      print("normal: "+ str(np.corrcoef(pred_score, test_set['label'])))
      print("scrambled: "+ str(np.corrcoef(scrambled_pred_score, test_set['label'])))
      print("homo: "+ str(np.corrcoef(homo_pred_score, test_set['label'])))


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.6939


100%|██████████| 51/51 [00:00<00:00, 181.03it/s]
100%|██████████| 51/51 [00:00<00:00, 182.56it/s]
100%|██████████| 51/51 [00:00<00:00, 182.80it/s]


normal: [[1.         0.65939717]
 [0.65939717 1.        ]]
scrambled: [[1.         0.60815799]
 [0.60815799 1.        ]]
homo: [[1.         0.42781806]
 [0.42781806 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.7865


100%|██████████| 51/51 [00:00<00:00, 184.59it/s]
100%|██████████| 51/51 [00:00<00:00, 187.85it/s]
100%|██████████| 51/51 [00:00<00:00, 184.30it/s]


normal: [[1.         0.60429122]
 [0.60429122 1.        ]]
scrambled: [[1.         0.55783905]
 [0.55783905 1.        ]]
homo: [[1.         0.51057262]
 [0.51057262 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.794


100%|██████████| 39/39 [00:00<00:00, 163.60it/s]
100%|██████████| 39/39 [00:00<00:00, 173.11it/s]
100%|██████████| 39/39 [00:00<00:00, 176.62it/s]


normal: [[1.         0.63636385]
 [0.63636385 1.        ]]
scrambled: [[1.         0.70595816]
 [0.70595816 1.        ]]
homo: [[1.         0.61273466]
 [0.61273466 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.6221


100%|██████████| 53/53 [00:00<00:00, 179.56it/s]
100%|██████████| 53/53 [00:00<00:00, 184.80it/s]
100%|██████████| 53/53 [00:00<00:00, 185.80it/s]


normal: [[1.         0.70457694]
 [0.70457694 1.        ]]
scrambled: [[1.         0.62197494]
 [0.62197494 1.        ]]
homo: [[1.         0.42707769]
 [0.42707769 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.6736


100%|██████████| 56/56 [00:00<00:00, 185.25it/s]
100%|██████████| 56/56 [00:00<00:00, 180.18it/s]
100%|██████████| 56/56 [00:00<00:00, 181.17it/s]


normal: [[1.         0.65180841]
 [0.65180841 1.        ]]
scrambled: [[1.        0.5654625]
 [0.5654625 1.       ]]
homo: [[1.         0.53097318]
 [0.53097318 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.6684


100%|██████████| 49/49 [00:00<00:00, 177.71it/s]
100%|██████████| 49/49 [00:00<00:00, 179.09it/s]
100%|██████████| 49/49 [00:00<00:00, 181.36it/s]


normal: [[1.         0.67433567]
 [0.67433567 1.        ]]
scrambled: [[1.         0.67645629]
 [0.67645629 1.        ]]
homo: [[1.         0.49190509]
 [0.49190509 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.566


100%|██████████| 50/50 [00:00<00:00, 183.44it/s]
100%|██████████| 50/50 [00:00<00:00, 181.29it/s]
100%|██████████| 50/50 [00:00<00:00, 184.99it/s]


normal: [[1.         0.75878157]
 [0.75878157 1.        ]]
scrambled: [[1.         0.83278634]
 [0.83278634 1.        ]]
homo: [[1.         0.66498987]
 [0.66498987 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.8012


100%|██████████| 45/45 [00:00<00:00, 162.32it/s]
100%|██████████| 45/45 [00:00<00:00, 167.19it/s]
100%|██████████| 45/45 [00:00<00:00, 168.89it/s]


normal: [[1.         0.72030988]
 [0.72030988 1.        ]]
scrambled: [[1.         0.78149205]
 [0.78149205 1.        ]]
homo: [[1.         0.69868257]
 [0.69868257 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.6517


100%|██████████| 43/43 [00:00<00:00, 176.83it/s]
100%|██████████| 43/43 [00:00<00:00, 182.53it/s]
100%|██████████| 43/43 [00:00<00:00, 187.12it/s]


normal: [[1.         0.61834307]
 [0.61834307 1.        ]]
scrambled: [[1.         0.64504839]
 [0.64504839 1.        ]]
homo: [[1.         0.54403266]
 [0.54403266 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.6165


100%|██████████| 63/63 [00:00<00:00, 172.62it/s]
100%|██████████| 63/63 [00:00<00:00, 177.65it/s]
100%|██████████| 63/63 [00:00<00:00, 180.89it/s]


normal: [[1.         0.73787995]
 [0.73787995 1.        ]]
scrambled: [[1.         0.71463069]
 [0.71463069 1.        ]]
homo: [[1.         0.46675031]
 [0.46675031 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.8013


100%|██████████| 49/49 [00:00<00:00, 148.85it/s]
100%|██████████| 49/49 [00:00<00:00, 152.11it/s]
100%|██████████| 49/49 [00:00<00:00, 154.36it/s]


normal: [[1.         0.69486574]
 [0.69486574 1.        ]]
scrambled: [[1.         0.63278657]
 [0.63278657 1.        ]]
homo: [[1.         0.48577868]
 [0.48577868 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.6294


100%|██████████| 53/53 [00:00<00:00, 155.44it/s]
100%|██████████| 53/53 [00:00<00:00, 157.97it/s]
100%|██████████| 53/53 [00:00<00:00, 159.72it/s]


normal: [[1.         0.72845879]
 [0.72845879 1.        ]]
scrambled: [[1.         0.72742726]
 [0.72742726 1.        ]]
homo: [[1.         0.31027049]
 [0.31027049 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.7644


100%|██████████| 40/40 [00:00<00:00, 152.80it/s]
100%|██████████| 40/40 [00:00<00:00, 153.69it/s]
100%|██████████| 40/40 [00:00<00:00, 155.63it/s]


normal: [[1.         0.72605949]
 [0.72605949 1.        ]]
scrambled: [[1.         0.76535404]
 [0.76535404 1.        ]]
homo: [[1.         0.49594425]
 [0.49594425 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.7496


100%|██████████| 51/51 [00:00<00:00, 156.36it/s]
100%|██████████| 51/51 [00:00<00:00, 158.90it/s]
100%|██████████| 51/51 [00:00<00:00, 158.67it/s]


normal: [[1.        0.6088477]
 [0.6088477 1.       ]]
scrambled: [[1.         0.59649526]
 [0.59649526 1.        ]]
homo: [[1.         0.39128103]
 [0.39128103 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.7114


100%|██████████| 57/57 [00:00<00:00, 163.83it/s]
100%|██████████| 57/57 [00:00<00:00, 163.10it/s]
100%|██████████| 57/57 [00:00<00:00, 162.80it/s]


normal: [[1.         0.76425177]
 [0.76425177 1.        ]]
scrambled: [[1.         0.74091669]
 [0.74091669 1.        ]]
homo: [[1.        0.5850197]
 [0.5850197 1.       ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.8413


100%|██████████| 51/51 [00:00<00:00, 183.35it/s]
100%|██████████| 51/51 [00:00<00:00, 179.81it/s]
100%|██████████| 51/51 [00:00<00:00, 181.24it/s]


normal: [[1.         0.51477257]
 [0.51477257 1.        ]]
scrambled: [[1.         0.47605194]
 [0.47605194 1.        ]]
homo: [[1.         0.36019331]
 [0.36019331 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.857


100%|██████████| 51/51 [00:00<00:00, 181.83it/s]
100%|██████████| 51/51 [00:00<00:00, 187.16it/s]
100%|██████████| 51/51 [00:00<00:00, 190.42it/s]


normal: [[1.         0.54065429]
 [0.54065429 1.        ]]
scrambled: [[1.         0.56554579]
 [0.56554579 1.        ]]
homo: [[1.         0.48625087]
 [0.48625087 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.9089


100%|██████████| 39/39 [00:00<00:00, 176.44it/s]
100%|██████████| 39/39 [00:00<00:00, 174.48it/s]
100%|██████████| 39/39 [00:00<00:00, 172.52it/s]


normal: [[1.         0.64823238]
 [0.64823238 1.        ]]
scrambled: [[1.        0.6821186]
 [0.6821186 1.       ]]
homo: [[1.         0.64180102]
 [0.64180102 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.6841


100%|██████████| 53/53 [00:00<00:00, 179.20it/s]
100%|██████████| 53/53 [00:00<00:00, 182.62it/s]
100%|██████████| 53/53 [00:00<00:00, 182.05it/s]


normal: [[1.       0.672325]
 [0.672325 1.      ]]
scrambled: [[1.         0.59414018]
 [0.59414018 1.        ]]
homo: [[1.        0.4333615]
 [0.4333615 1.       ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.8116


100%|██████████| 56/56 [00:00<00:00, 177.52it/s]
100%|██████████| 56/56 [00:00<00:00, 178.77it/s]
100%|██████████| 56/56 [00:00<00:00, 184.00it/s]


normal: [[1.        0.5739617]
 [0.5739617 1.       ]]
scrambled: [[1.         0.50150462]
 [0.50150462 1.        ]]
homo: [[1.       0.486693]
 [0.486693 1.      ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.812


100%|██████████| 49/49 [00:00<00:00, 177.23it/s]
100%|██████████| 49/49 [00:00<00:00, 174.52it/s]
100%|██████████| 49/49 [00:00<00:00, 184.50it/s]


normal: [[1.         0.47899381]
 [0.47899381 1.        ]]
scrambled: [[1.         0.55149911]
 [0.55149911 1.        ]]
homo: [[1.         0.51210467]
 [0.51210467 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.7055


100%|██████████| 50/50 [00:00<00:00, 180.76it/s]
100%|██████████| 50/50 [00:00<00:00, 186.10it/s]
100%|██████████| 50/50 [00:00<00:00, 186.87it/s]


normal: [[1.         0.70403933]
 [0.70403933 1.        ]]
scrambled: [[1.         0.78680841]
 [0.78680841 1.        ]]
homo: [[1.         0.70635426]
 [0.70635426 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.6834


100%|██████████| 45/45 [00:00<00:00, 161.55it/s]
100%|██████████| 45/45 [00:00<00:00, 165.81it/s]
100%|██████████| 45/45 [00:00<00:00, 174.49it/s]


normal: [[1.         0.79200453]
 [0.79200453 1.        ]]
scrambled: [[1.         0.82539095]
 [0.82539095 1.        ]]
homo: [[1.         0.71822212]
 [0.71822212 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.8076


100%|██████████| 43/43 [00:00<00:00, 176.83it/s]
100%|██████████| 43/43 [00:00<00:00, 178.07it/s]
100%|██████████| 43/43 [00:00<00:00, 185.17it/s]


normal: [[1.        0.4773329]
 [0.4773329 1.       ]]
scrambled: [[1.         0.64218146]
 [0.64218146 1.        ]]
homo: [[1.         0.59203992]
 [0.59203992 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.6771


100%|██████████| 63/63 [00:00<00:00, 171.71it/s]
100%|██████████| 63/63 [00:00<00:00, 175.70it/s]
100%|██████████| 63/63 [00:00<00:00, 184.33it/s]


normal: [[1.         0.51399943]
 [0.51399943 1.        ]]
scrambled: [[1.         0.51263655]
 [0.51263655 1.        ]]
homo: [[1.         0.53280177]
 [0.53280177 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.8768


100%|██████████| 49/49 [00:00<00:00, 155.15it/s]
100%|██████████| 49/49 [00:00<00:00, 157.76it/s]
100%|██████████| 49/49 [00:00<00:00, 159.55it/s]


normal: [[1.       0.599762]
 [0.599762 1.      ]]
scrambled: [[1.         0.55922598]
 [0.55922598 1.        ]]
homo: [[1.         0.52342929]
 [0.52342929 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.718


100%|██████████| 53/53 [00:00<00:00, 157.88it/s]
100%|██████████| 53/53 [00:00<00:00, 161.82it/s]
100%|██████████| 53/53 [00:00<00:00, 159.65it/s]


normal: [[1.         0.69071953]
 [0.69071953 1.        ]]
scrambled: [[1.         0.59339515]
 [0.59339515 1.        ]]
homo: [[1.         0.34879426]
 [0.34879426 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.8896


100%|██████████| 40/40 [00:00<00:00, 153.48it/s]
100%|██████████| 40/40 [00:00<00:00, 154.86it/s]
100%|██████████| 40/40 [00:00<00:00, 152.70it/s]


normal: [[1.         0.70068656]
 [0.70068656 1.        ]]
scrambled: [[1.         0.71662964]
 [0.71662964 1.        ]]
homo: [[1.         0.32726169]
 [0.32726169 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.8603


100%|██████████| 51/51 [00:00<00:00, 159.13it/s]
100%|██████████| 51/51 [00:00<00:00, 159.56it/s]
100%|██████████| 51/51 [00:00<00:00, 161.20it/s]


normal: [[1.         0.58181316]
 [0.58181316 1.        ]]
scrambled: [[1.         0.60481415]
 [0.60481415 1.        ]]
homo: [[1.         0.39186431]
 [0.39186431 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.9762


100%|██████████| 57/57 [00:00<00:00, 162.49it/s]
100%|██████████| 57/57 [00:00<00:00, 163.10it/s]
100%|██████████| 57/57 [00:00<00:00, 164.69it/s]


normal: [[1.         0.60150189]
 [0.60150189 1.        ]]
scrambled: [[1.         0.56633224]
 [0.56633224 1.        ]]
homo: [[1.        0.4745415]
 [0.4745415 1.       ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.6432


100%|██████████| 51/51 [00:00<00:00, 182.86it/s]
100%|██████████| 51/51 [00:00<00:00, 180.85it/s]
100%|██████████| 51/51 [00:00<00:00, 180.58it/s]


normal: [[1.         0.65516747]
 [0.65516747 1.        ]]
scrambled: [[1.        0.6922969]
 [0.6922969 1.       ]]
homo: [[1.         0.61869257]
 [0.61869257 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.653


100%|██████████| 51/51 [00:00<00:00, 180.63it/s]
100%|██████████| 51/51 [00:00<00:00, 182.53it/s]
100%|██████████| 51/51 [00:00<00:00, 186.12it/s]


normal: [[1.         0.52722717]
 [0.52722717 1.        ]]
scrambled: [[1.         0.41721052]
 [0.41721052 1.        ]]
homo: [[1.         0.23952867]
 [0.23952867 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.6583


100%|██████████| 39/39 [00:00<00:00, 171.82it/s]
100%|██████████| 39/39 [00:00<00:00, 175.32it/s]
100%|██████████| 39/39 [00:00<00:00, 172.71it/s]


normal: [[1.         0.66831613]
 [0.66831613 1.        ]]
scrambled: [[1.        0.6842228]
 [0.6842228 1.       ]]
homo: [[1.         0.58976223]
 [0.58976223 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.602


100%|██████████| 53/53 [00:00<00:00, 186.41it/s]
100%|██████████| 53/53 [00:00<00:00, 177.37it/s]
100%|██████████| 53/53 [00:00<00:00, 180.32it/s]


normal: [[1.         0.44662045]
 [0.44662045 1.        ]]
scrambled: [[1.         0.42265695]
 [0.42265695 1.        ]]
homo: [[1.         0.41386549]
 [0.41386549 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.6419


100%|██████████| 56/56 [00:00<00:00, 158.94it/s]
100%|██████████| 56/56 [00:00<00:00, 165.99it/s]
100%|██████████| 56/56 [00:00<00:00, 173.35it/s]


normal: [[1.         0.42307068]
 [0.42307068 1.        ]]
scrambled: [[1.        0.4179812]
 [0.4179812 1.       ]]
homo: [[1.         0.35942737]
 [0.35942737 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.6671


100%|██████████| 49/49 [00:00<00:00, 173.05it/s]
100%|██████████| 49/49 [00:00<00:00, 171.96it/s]
100%|██████████| 49/49 [00:00<00:00, 181.79it/s]


normal: [[1.         0.76040159]
 [0.76040159 1.        ]]
scrambled: [[1.         0.75504269]
 [0.75504269 1.        ]]
homo: [[1.         0.25655759]
 [0.25655759 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.5852


100%|██████████| 50/50 [00:00<00:00, 182.90it/s]
100%|██████████| 50/50 [00:00<00:00, 180.96it/s]
100%|██████████| 50/50 [00:00<00:00, 185.50it/s]


normal: [[1.         0.76866741]
 [0.76866741 1.        ]]
scrambled: [[1.         0.76711802]
 [0.76711802 1.        ]]
homo: [[1.         0.23930606]
 [0.23930606 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.5584


100%|██████████| 45/45 [00:00<00:00, 162.07it/s]
100%|██████████| 45/45 [00:00<00:00, 163.87it/s]
100%|██████████| 45/45 [00:00<00:00, 171.37it/s]


normal: [[1.         0.80624729]
 [0.80624729 1.        ]]
scrambled: [[1.         0.80072275]
 [0.80072275 1.        ]]
homo: [[1.         0.68953612]
 [0.68953612 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.7493


100%|██████████| 43/43 [00:00<00:00, 178.04it/s]
100%|██████████| 43/43 [00:00<00:00, 177.21it/s]
100%|██████████| 43/43 [00:00<00:00, 183.22it/s]


normal: [[1.        0.6665465]
 [0.6665465 1.       ]]
scrambled: [[1.         0.67079542]
 [0.67079542 1.        ]]
homo: [[1.         0.29588847]
 [0.29588847 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.6582


100%|██████████| 63/63 [00:00<00:00, 178.73it/s]
100%|██████████| 63/63 [00:00<00:00, 179.75it/s]
100%|██████████| 63/63 [00:00<00:00, 176.65it/s]


normal: [[1.        0.8259779]
 [0.8259779 1.       ]]
scrambled: [[1.         0.74767223]
 [0.74767223 1.        ]]
homo: [[1.         0.21703148]
 [0.21703148 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.6058


100%|██████████| 49/49 [00:00<00:00, 155.67it/s]
100%|██████████| 49/49 [00:00<00:00, 161.55it/s]
100%|██████████| 49/49 [00:00<00:00, 159.45it/s]


normal: [[1.         0.61670583]
 [0.61670583 1.        ]]
scrambled: [[1.         0.58360519]
 [0.58360519 1.        ]]
homo: [[1.         0.30976436]
 [0.30976436 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.5201


100%|██████████| 53/53 [00:00<00:00, 158.43it/s]
100%|██████████| 53/53 [00:00<00:00, 156.21it/s]
100%|██████████| 53/53 [00:00<00:00, 161.68it/s]


normal: [[1.         0.62730923]
 [0.62730923 1.        ]]
scrambled: [[1.         0.68973432]
 [0.68973432 1.        ]]
homo: [[1.       0.293547]
 [0.293547 1.      ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.6326


100%|██████████| 40/40 [00:00<00:00, 152.25it/s]
100%|██████████| 40/40 [00:00<00:00, 152.76it/s]
100%|██████████| 40/40 [00:00<00:00, 153.09it/s]


normal: [[1.         0.66722112]
 [0.66722112 1.        ]]
scrambled: [[1.         0.82696951]
 [0.82696951 1.        ]]
homo: [[1.         0.44289435]
 [0.44289435 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.6191


100%|██████████| 51/51 [00:00<00:00, 154.69it/s]
100%|██████████| 51/51 [00:00<00:00, 142.99it/s]
100%|██████████| 51/51 [00:00<00:00, 153.15it/s]


normal: [[1.         0.51215589]
 [0.51215589 1.        ]]
scrambled: [[1.         0.59781985]
 [0.59781985 1.        ]]
homo: [[1.         0.35087261]
 [0.35087261 1.        ]]


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss
500,0.5802


100%|██████████| 57/57 [00:00<00:00, 159.51it/s]
100%|██████████| 57/57 [00:00<00:00, 159.16it/s]
100%|██████████| 57/57 [00:00<00:00, 165.01it/s]


normal: [[1.         0.65407312]
 [0.65407312 1.        ]]
scrambled: [[1.         0.63571671]
 [0.63571671 1.        ]]
homo: [[1.         0.42945509]
 [0.42945509 1.        ]]


In [None]:
specific_results = {}

# It is likely that the results of happytransformer models are normalized using a sigmoid function
# Using this invers_sigmoid function allows us to get the raw scores of regression models
# Doing this seems to improve accuracy slightly
def inverse_sigmoid(y):
  return np.log(y/(1-y))

for metric in ["Creativity_Combined", "Novelty_Combined", "Usefulness_Combined"]:
  for study in [1,2,3]:
    for pred_type in ['normal', "_scrambled", "homo"]:
      all_filenames = ["Domain_specific_study" + str(study) + "_" + metric + "_split" + str(i) + pred_type+'_prediction.csv' for i in range(1,6)]
      combined_csv = pd.concat([pd.read_csv(f) for f in all_filenames]).sort_values(by='Unnamed: 0')
      train_file= extract_metric(study,metric)

      y_estimate = combined_csv['label']
      y_true = train_file.loc[combined_csv['index'],'label']

      #specific_results[metric + "_study" + str(study)+ '_' + pred_type] = (np.corrcoef(y_estimate, y_true)[0,1])
      specific_results[metric + "_study" + str(study)+ '_' + pred_type] = (np.corrcoef(inverse_sigmoid(y_estimate), y_true)[0,1])

# pd.DataFrame.from_dict({'key':specific_results.keys(), 'value': specific_results.values()}).to_csv("Domain_specific_results(0503).csv")


Inverse_sigmoid results

{'Creativity_Combined_study1__scrambled': 0.6036988186771806,
 'Creativity_Combined_study1_homo': 0.4823619568751188,
 'Creativity_Combined_study1_normal': 0.6463445792796637,
 'Creativity_Combined_study2__scrambled': 0.7203512372807368,
 'Creativity_Combined_study2_homo': 0.5198173668616751,
 'Creativity_Combined_study2_normal': 0.7117417651112722,
 'Creativity_Combined_study3__scrambled': 0.6659989029153098,
 'Creativity_Combined_study3_homo': 0.38233451080530717,
 'Creativity_Combined_study3_normal': 0.6872549785663923,
 'Novelty_Combined_study1__scrambled': 0.5592626273318138,
 'Novelty_Combined_study1_homo': 0.49202069997900133,
 'Novelty_Combined_study1_normal': 0.5744742944075424,
 'Novelty_Combined_study2__scrambled': 0.6418145060710528,
 'Novelty_Combined_study2_homo': 0.45258013837462935,
 'Novelty_Combined_study2_normal': 0.5910802973383661,
 'Novelty_Combined_study3__scrambled': 0.5311424200434418,
 'Novelty_Combined_study3_homo': 0.3781024469416406,
 'Novelty_Combined_study3_normal': 0.5513141301276988,
 'Usefulness_Combined_study1__scrambled': 0.4715331178125642,
 'Usefulness_Combined_study1_homo': 0.36764497672426866,
 'Usefulness_Combined_study1_normal': 0.5450251915363286,
 'Usefulness_Combined_study2__scrambled': 0.7407053166149862,
 'Usefulness_Combined_study2_homo': 0.21543484128261622,
 'Usefulness_Combined_study2_normal': 0.7717636916972684,
 'Usefulness_Combined_study3__scrambled': 0.5987181160598203,
 'Usefulness_Combined_study3_homo': 0.3271331810590989,
 'Usefulness_Combined_study3_normal': 0.591836376949932}

Raw results
 {'Creativity_Combined_study1__scrambled': 0.5771698758149193,
 'Creativity_Combined_study1_homo': 0.4300978918035641,
 'Creativity_Combined_study1_normal': 0.6463088483610605,
 'Creativity_Combined_study2__scrambled': 0.701350483161944,
 'Creativity_Combined_study2_homo': 0.5321629182040738,
 'Creativity_Combined_study2_normal': 0.6961954262707623,
 'Creativity_Combined_study3__scrambled': 0.6273087736456405,
 'Creativity_Combined_study3_homo': 0.3786845560571378,
 'Creativity_Combined_study3_normal': 0.67867499963009,
 'Novelty_Combined_study1__scrambled': 0.5331403579091303,
 'Novelty_Combined_study1_homo': 0.4534316982609445,
 'Novelty_Combined_study1_normal': 0.5456892636937443,
 'Novelty_Combined_study2__scrambled': 0.5912638382707073,
 'Novelty_Combined_study2_homo': 0.4668422177205251,
 'Novelty_Combined_study2_normal': 0.539181017446131,
 'Novelty_Combined_study3__scrambled': 0.4840568557620448,
 'Novelty_Combined_study3_homo': 0.37172995291699334,
 'Novelty_Combined_study3_normal': 0.5097986565921832,
 'Usefulness_Combined_study1__scrambled': 0.47613903003544167,
 'Usefulness_Combined_study1_homo': 0.34990235954121973,
 'Usefulness_Combined_study1_normal': 0.524757089413579,
 'Usefulness_Combined_study2__scrambled': 0.7317236140363812,
 'Usefulness_Combined_study2_homo': 0.2474972202827062,
 'Usefulness_Combined_study2_normal': 0.7613030946034497,
 'Usefulness_Combined_study3__scrambled': 0.5717617988538715,
 'Usefulness_Combined_study3_homo': 0.3270518133572374,
 'Usefulness_Combined_study3_normal': 0.5761344859890838}

In [None]:
#Here, I try to combine BERT's prediction with predictions based on other features, but it didn't work well
# You may want to ignore these for now.

all_filenames = [str(i)+"regression_prediction.csv" for i in range(1,6)]
combined_csv = pd.concat([pd.read_csv(f) for f in all_filenames]).sort_values(by='Unnamed: 0').reset_index()


def get_length(df):
  nrow = df.shape[0]
  lengths = []
  characters = []
  average_len = []
  for i in range(nrow):
      lengths.append(len(df['text'][i].split()))
      characters.append(len(df['text'][i]))
      average_len.append(len(df['text'][i])/len(df['text'][i].split()))
  df_len = pd.DataFrame({'lengths': lengths,
                        'characters': characters,
                        'average_len': average_len})
  return(df_len.to_numpy())

def glove2dict(src_filename):
    """
    GloVe vectors file reader.

    Parameters
    ----------
    src_filename : str
        Full path to the GloVe file to be processed.

    Returns
    -------
    dict
        Mapping words to their GloVe vectors as `np.array`.

    """
    # This distribution has some words with spaces, so we have to
    # assume its dimensionality and parse out the lines specially:
    if '840B.300d' in src_filename:
        line_parser = lambda line: line.rsplit(" ", 300)
    else:
        line_parser = lambda line: line.strip().split()
    data = {}
    with open(src_filename, encoding='utf8') as f:
        while True:
            try:
                line = next(f)
                line = line_parser(line)
                data[line[0]] = np.array(line[1: ], dtype=np.float)
            except StopIteration:
                break
            except UnicodeDecodeError:
                pass
    return data

# glove_dict = glove2dict(
#     os.path.join('cs224u','data', 'glove.6B', 'glove.6B.300d.txt'))

def get_distance(word1, word2):
  from scipy.spatial.distance import cosine
  v1 = glove_dict[word1],
  v2 = glove_dict[word2]

  return(cosine(v1, v2))

def get_vector_distance(text):
  words = nltk.word_tokenize(text)
  stop_words = nltk.corpus.stopwords.words('english')
  words = [w for w in words if not w in stop_words] 
  n = len(words)
  distances = []
  for i in range(n):
    for j in range(i+1, n):
      if words[i] in glove_dict and words[j] in glove_dict:
        distances.append(get_distance(words[i], words[j]))
      else:
        continue
  return(distances)


def get_df_distance(df):
  lst_distances = [get_vector_distance(text) for text in df['text']]

  df_dis = pd.DataFrame({'average': [np.average(distances) for distances in lst_distances],
                    'max': [np.max(distances) for distances in lst_distances],
                    'min': [np.min(distances) for distances in lst_distances]})
  return(df_dis.to_numpy())
bert_pred = combined_csv['label']
df_length = get_length(combined_csv)
df_distance = get_df_distance(combined_csv)


0.5985653734163079

In [None]:
from sklearn import linear_model
lr = linear_model.LinearRegression()
X = np.column_stack([bert_pred,df_length,df_distance])
y = train_file['label'].to_numpy().reshape(-1,1)
lr.fit(X,y)
lr.score(X,y)

0.5819160869112776

In [None]:
#Trying to use two sample two predict the other sample
train_set = pd.concat([extract_metric(1,"Novelty_Consumer"),
          extract_metric(2,"Novelty_Consumer")]).reset_index()

train_set.to_csv("train_file.csv")
test_set = extract_metric(3,"Novelty_Consumer")
test_set.to_csv("test_file.csv")

model = HappyTextClassification(
    #"ROBERTA", "roberta-base",
    "DISTILBERT", "distilbert-base-uncased",
    #"ALBERT", "albert-base-v2",
                              num_labels=1)
model.train("train_file.csv")
pred = model.test("test_file.csv")

pred_score = [x.score for x in pred]

df_pred = test_set.copy()
df_pred['label'] = pred_score
print(np.corrcoef(pred_score, test_set['label']))


Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_projector.bias']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.weight', 'pre_classifier.bias', 'classi

Step,Training Loss


100%|██████████| 308/308 [00:15<00:00, 19.65it/s]

[[1.         0.58764012]
 [0.58764012 1.        ]]



