In [1]:
import os
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import RobertaConfig, RobertaModel, RobertaTokenizer, RobertaForSequenceClassification
from transformers import AdamW
import random
import matplotlib.pyplot as plt
from collections import OrderedDict
from scipy.spatial.distance import cosine
from sim_utils import load_examples, Inputexample, CustomTextDataset, freeze_layers, train, test
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer, AutoConfig, AutoModel, AutoTokenizer

os.environ['http_proxy'] = 'http://192.41.170.23:3128'
os.environ['https_proxy'] = 'http://192.41.170.23:3128'

In [2]:
!nvidia-smi

Mon May  2 03:55:10 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.47.03    Driver Version: 510.47.03    CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  On   | 00000000:84:00.0 Off |                  N/A |
| 24%   27C    P8    11W / 250W |      1MiB / 11264MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA GeForce ...  On   | 00000000:85:00.0 Off |                  N/A |
| 22%   28C    P8     1W / 250W |      1MiB / 11264MiB |      0%      Default |
|       

In [3]:
N = 10
data = []
labels = []

train_samples = []
train_labels = []

valid_samples = []
valid_labels = []

test_samples = []
test_labels = []

embed_dim = 768
batch_size = 16 
lr=2e-3  # you can adjust 
temp = 0.3  # you can adjust 
lamda = 0.01  # you can adjust  
skip_time = 0 # the number of time that yi not equal to yj in supervised contrastive loss equation 

data_name = 'BANKING77'
model_name = 'sup-simcse-roberta-base'
layers = 12
shot_names = ['train_5','train_10']

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [None]:
## freezing layers
for shot_name in shot_names:
    
    
    path_shot = f'../../../{data_name}/{shot_name}/'
    valid_path = f'../../../{data_name}/valid/'
    test_path = f'../../../{data_name}/test/'


    # load data
    train_samples = load_examples(path_shot)
    valid_samples = load_examples(valid_path)
    test_samples = load_examples(test_path)


    print("===== small train set ====")


    data = []
    labels = []

    for i in range(len(train_samples)):
        data.append(train_samples[i].text)
        labels.append(train_samples[i].label)


    train_data = CustomTextDataset(labels,data,batch_size=batch_size,repeated_label=False)
    train_loader = DataLoader(train_data,batch_size=batch_size,shuffle=True)



    print("===== validation set ====")

    data = []
    labels = []

    for i in range(len(valid_samples)):
        data.append(valid_samples[i].text)
        labels.append(valid_samples[i].label)

    valid_data = CustomTextDataset(labels,data,batch_size=batch_size,repeated_label=False)
    valid_loader = DataLoader(valid_data,batch_size=batch_size,shuffle=True)



    print("===== test set ====")

    data = []
    labels = []

    for i in range(len(test_samples)):
        data.append(test_samples[i].text)
        labels.append(test_samples[i].label)

    test_data = CustomTextDataset(labels,data,batch_size=batch_size,repeated_label=False)
    test_loader = DataLoader(test_data,batch_size=batch_size,shuffle=True)



     # got the number of unique classes from dataset
    num_class = len(np.unique(np.array(labels)))

     # get text label of uniqure classes
    unique_label = np.unique(np.array(labels))

     # map text label to index classes
    label_maps = {unique_label[i]: i for i in range(len(unique_label))}

    
    print("num_class:",num_class)
    

    lines = ['test acc']
    
    for cur_layer in range(12):
            
            print("current freezing up to layer :",cur_layer+1)
            exp_name = f'{model_name}_lr={lr}_t={temp}_{data_name}_{shot_name}_{cur_layer+1}'
            direct_name = f"princeton-nlp/{model_name}"

            print("direct_name :",direct_name)
            tokenizer = AutoTokenizer.from_pretrained(direct_name)
            config = AutoConfig.from_pretrained(direct_name)
            config.num_labels = num_class
            simcse = AutoModelForSequenceClassification.from_pretrained(direct_name,config=config)
             
            simcse = freeze_layers(simcse,freeze_layers_count=cur_layer+1)
            
            optimizer= AdamW(simcse.parameters(), lr=lr)
            simcse = simcse.to(device)

            train_log, valid_log = train(exp_name,simcse,device,label_maps,optimizer,train_loader,valid_loader,train_data,valid_data,tokenizer,epochs=30)

            
            PATH = f'../../../fewshot_models/{exp_name}.pth'
            best_model = AutoModelForSequenceClassification.from_pretrained(direct_name,config=config)
            # Model class must be defined somewhere
            best_model.load_state_dict(torch.load(PATH))
            best_model = best_model.to(device)


            test_acc = test(best_model,device,label_maps,test_loader,len(test_data),tokenizer)

            test_acc = 100 * test_acc
                  

            res = f'data_name:{data_name}_model:{model_name}_{shot_name}_{cur_layer+1}_test_acc:{str(test_acc)}'
            lines.append(res)


    with open(f'exp3_result_{data_name}_{shot_name}.txt', 'w') as f:
        for line in lines:
            f.write(line)
            f.write('\n')

===== small train set ====
Train on Cross Entropy loss
len of dataset : 385
===== validation set ====
Train on Cross Entropy loss
len of dataset : 1540
===== test set ====
Train on Cross Entropy loss
len of dataset : 3080
num_class: 77
current freezing up to layer : 1
direct_name : princeton-nlp/sup-simcse-roberta-base


Some weights of the model checkpoint at princeton-nlp/sup-simcse-roberta-base were not used when initializing RobertaForSequenceClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at princeton-nlp/sup-simcse-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.bias']
You should probably TRAIN this model 

roberta.encoder.layer.1.attention.self.query.weight
roberta.encoder.layer.1.attention.self.query.bias
roberta.encoder.layer.1.attention.self.key.weight
roberta.encoder.layer.1.attention.self.key.bias
roberta.encoder.layer.1.attention.self.value.weight
roberta.encoder.layer.1.attention.self.value.bias
roberta.encoder.layer.1.attention.output.dense.weight
roberta.encoder.layer.1.attention.output.dense.bias
roberta.encoder.layer.1.attention.output.LayerNorm.weight
roberta.encoder.layer.1.attention.output.LayerNorm.bias
roberta.encoder.layer.1.intermediate.dense.weight
roberta.encoder.layer.1.intermediate.dense.bias
roberta.encoder.layer.1.output.dense.weight
roberta.encoder.layer.1.output.dense.bias
roberta.encoder.layer.1.output.LayerNorm.weight
roberta.encoder.layer.1.output.LayerNorm.bias
roberta.encoder.layer.2.attention.self.query.weight
roberta.encoder.layer.2.attention.self.query.bias
roberta.encoder.layer.2.attention.self.key.weight
roberta.encoder.layer.2.attention.self.key.bias


Some weights of the model checkpoint at princeton-nlp/sup-simcse-roberta-base were not used when initializing RobertaForSequenceClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at princeton-nlp/sup-simcse-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.bias']
You should probably TRAIN this model 

correct : 40
total : 3080
current freezing up to layer : 2
direct_name : princeton-nlp/sup-simcse-roberta-base


Some weights of the model checkpoint at princeton-nlp/sup-simcse-roberta-base were not used when initializing RobertaForSequenceClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at princeton-nlp/sup-simcse-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.bias']
You should probably TRAIN this model 

roberta.encoder.layer.2.attention.self.query.weight
roberta.encoder.layer.2.attention.self.query.bias
roberta.encoder.layer.2.attention.self.key.weight
roberta.encoder.layer.2.attention.self.key.bias
roberta.encoder.layer.2.attention.self.value.weight
roberta.encoder.layer.2.attention.self.value.bias
roberta.encoder.layer.2.attention.output.dense.weight
roberta.encoder.layer.2.attention.output.dense.bias
roberta.encoder.layer.2.attention.output.LayerNorm.weight
roberta.encoder.layer.2.attention.output.LayerNorm.bias
roberta.encoder.layer.2.intermediate.dense.weight
roberta.encoder.layer.2.intermediate.dense.bias
roberta.encoder.layer.2.output.dense.weight
roberta.encoder.layer.2.output.dense.bias
roberta.encoder.layer.2.output.LayerNorm.weight
roberta.encoder.layer.2.output.LayerNorm.bias
roberta.encoder.layer.3.attention.self.query.weight
roberta.encoder.layer.3.attention.self.query.bias
roberta.encoder.layer.3.attention.self.key.weight
roberta.encoder.layer.3.attention.self.key.bias


Some weights of the model checkpoint at princeton-nlp/sup-simcse-roberta-base were not used when initializing RobertaForSequenceClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at princeton-nlp/sup-simcse-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.bias']
You should probably TRAIN this model 

correct : 40
total : 3080
current freezing up to layer : 3
direct_name : princeton-nlp/sup-simcse-roberta-base


Some weights of the model checkpoint at princeton-nlp/sup-simcse-roberta-base were not used when initializing RobertaForSequenceClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at princeton-nlp/sup-simcse-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.bias']
You should probably TRAIN this model 

roberta.encoder.layer.3.attention.self.query.weight
roberta.encoder.layer.3.attention.self.query.bias
roberta.encoder.layer.3.attention.self.key.weight
roberta.encoder.layer.3.attention.self.key.bias
roberta.encoder.layer.3.attention.self.value.weight
roberta.encoder.layer.3.attention.self.value.bias
roberta.encoder.layer.3.attention.output.dense.weight
roberta.encoder.layer.3.attention.output.dense.bias
roberta.encoder.layer.3.attention.output.LayerNorm.weight
roberta.encoder.layer.3.attention.output.LayerNorm.bias
roberta.encoder.layer.3.intermediate.dense.weight
roberta.encoder.layer.3.intermediate.dense.bias
roberta.encoder.layer.3.output.dense.weight
roberta.encoder.layer.3.output.dense.bias
roberta.encoder.layer.3.output.LayerNorm.weight
roberta.encoder.layer.3.output.LayerNorm.bias
roberta.encoder.layer.4.attention.self.query.weight
roberta.encoder.layer.4.attention.self.query.bias
roberta.encoder.layer.4.attention.self.key.weight
roberta.encoder.layer.4.attention.self.key.bias


Some weights of the model checkpoint at princeton-nlp/sup-simcse-roberta-base were not used when initializing RobertaForSequenceClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at princeton-nlp/sup-simcse-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.bias']
You should probably TRAIN this model 

correct : 40
total : 3080
current freezing up to layer : 4
direct_name : princeton-nlp/sup-simcse-roberta-base


Some weights of the model checkpoint at princeton-nlp/sup-simcse-roberta-base were not used when initializing RobertaForSequenceClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at princeton-nlp/sup-simcse-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.bias']
You should probably TRAIN this model 

roberta.encoder.layer.4.attention.self.query.weight
roberta.encoder.layer.4.attention.self.query.bias
roberta.encoder.layer.4.attention.self.key.weight
roberta.encoder.layer.4.attention.self.key.bias
roberta.encoder.layer.4.attention.self.value.weight
roberta.encoder.layer.4.attention.self.value.bias
roberta.encoder.layer.4.attention.output.dense.weight
roberta.encoder.layer.4.attention.output.dense.bias
roberta.encoder.layer.4.attention.output.LayerNorm.weight
roberta.encoder.layer.4.attention.output.LayerNorm.bias
roberta.encoder.layer.4.intermediate.dense.weight
roberta.encoder.layer.4.intermediate.dense.bias
roberta.encoder.layer.4.output.dense.weight
roberta.encoder.layer.4.output.dense.bias
roberta.encoder.layer.4.output.LayerNorm.weight
roberta.encoder.layer.4.output.LayerNorm.bias
roberta.encoder.layer.5.attention.self.query.weight
roberta.encoder.layer.5.attention.self.query.bias
roberta.encoder.layer.5.attention.self.key.weight
roberta.encoder.layer.5.attention.self.key.bias


Some weights of the model checkpoint at princeton-nlp/sup-simcse-roberta-base were not used when initializing RobertaForSequenceClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at princeton-nlp/sup-simcse-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.bias']
You should probably TRAIN this model 

correct : 40
total : 3080
current freezing up to layer : 5
direct_name : princeton-nlp/sup-simcse-roberta-base


Some weights of the model checkpoint at princeton-nlp/sup-simcse-roberta-base were not used when initializing RobertaForSequenceClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at princeton-nlp/sup-simcse-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.bias']
You should probably TRAIN this model 

roberta.encoder.layer.5.attention.self.query.weight
roberta.encoder.layer.5.attention.self.query.bias
roberta.encoder.layer.5.attention.self.key.weight
roberta.encoder.layer.5.attention.self.key.bias
roberta.encoder.layer.5.attention.self.value.weight
roberta.encoder.layer.5.attention.self.value.bias
roberta.encoder.layer.5.attention.output.dense.weight
roberta.encoder.layer.5.attention.output.dense.bias
roberta.encoder.layer.5.attention.output.LayerNorm.weight
roberta.encoder.layer.5.attention.output.LayerNorm.bias
roberta.encoder.layer.5.intermediate.dense.weight
roberta.encoder.layer.5.intermediate.dense.bias
roberta.encoder.layer.5.output.dense.weight
roberta.encoder.layer.5.output.dense.bias
roberta.encoder.layer.5.output.LayerNorm.weight
roberta.encoder.layer.5.output.LayerNorm.bias
roberta.encoder.layer.6.attention.self.query.weight
roberta.encoder.layer.6.attention.self.query.bias
roberta.encoder.layer.6.attention.self.key.weight
roberta.encoder.layer.6.attention.self.key.bias


Some weights of the model checkpoint at princeton-nlp/sup-simcse-roberta-base were not used when initializing RobertaForSequenceClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at princeton-nlp/sup-simcse-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.bias']
You should probably TRAIN this model 

correct : 40
total : 3080
current freezing up to layer : 6
direct_name : princeton-nlp/sup-simcse-roberta-base


Some weights of the model checkpoint at princeton-nlp/sup-simcse-roberta-base were not used when initializing RobertaForSequenceClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at princeton-nlp/sup-simcse-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.bias']
You should probably TRAIN this model 

roberta.encoder.layer.6.attention.self.query.weight
roberta.encoder.layer.6.attention.self.query.bias
roberta.encoder.layer.6.attention.self.key.weight
roberta.encoder.layer.6.attention.self.key.bias
roberta.encoder.layer.6.attention.self.value.weight
roberta.encoder.layer.6.attention.self.value.bias
roberta.encoder.layer.6.attention.output.dense.weight
roberta.encoder.layer.6.attention.output.dense.bias
roberta.encoder.layer.6.attention.output.LayerNorm.weight
roberta.encoder.layer.6.attention.output.LayerNorm.bias
roberta.encoder.layer.6.intermediate.dense.weight
roberta.encoder.layer.6.intermediate.dense.bias
roberta.encoder.layer.6.output.dense.weight
roberta.encoder.layer.6.output.dense.bias
roberta.encoder.layer.6.output.LayerNorm.weight
roberta.encoder.layer.6.output.LayerNorm.bias
roberta.encoder.layer.7.attention.self.query.weight
roberta.encoder.layer.7.attention.self.query.bias
roberta.encoder.layer.7.attention.self.key.weight
roberta.encoder.layer.7.attention.self.key.bias


Some weights of the model checkpoint at princeton-nlp/sup-simcse-roberta-base were not used when initializing RobertaForSequenceClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at princeton-nlp/sup-simcse-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.bias']
You should probably TRAIN this model 

correct : 40
total : 3080
current freezing up to layer : 7
direct_name : princeton-nlp/sup-simcse-roberta-base


Some weights of the model checkpoint at princeton-nlp/sup-simcse-roberta-base were not used when initializing RobertaForSequenceClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at princeton-nlp/sup-simcse-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.bias']
You should probably TRAIN this model 

roberta.encoder.layer.7.attention.self.query.weight
roberta.encoder.layer.7.attention.self.query.bias
roberta.encoder.layer.7.attention.self.key.weight
roberta.encoder.layer.7.attention.self.key.bias
roberta.encoder.layer.7.attention.self.value.weight
roberta.encoder.layer.7.attention.self.value.bias
roberta.encoder.layer.7.attention.output.dense.weight
roberta.encoder.layer.7.attention.output.dense.bias
roberta.encoder.layer.7.attention.output.LayerNorm.weight
roberta.encoder.layer.7.attention.output.LayerNorm.bias
roberta.encoder.layer.7.intermediate.dense.weight
roberta.encoder.layer.7.intermediate.dense.bias
roberta.encoder.layer.7.output.dense.weight
roberta.encoder.layer.7.output.dense.bias
roberta.encoder.layer.7.output.LayerNorm.weight
roberta.encoder.layer.7.output.LayerNorm.bias
roberta.encoder.layer.8.attention.self.query.weight
roberta.encoder.layer.8.attention.self.query.bias
roberta.encoder.layer.8.attention.self.key.weight
roberta.encoder.layer.8.attention.self.key.bias


Some weights of the model checkpoint at princeton-nlp/sup-simcse-roberta-base were not used when initializing RobertaForSequenceClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at princeton-nlp/sup-simcse-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.bias']
You should probably TRAIN this model 

correct : 40
total : 3080
current freezing up to layer : 8
direct_name : princeton-nlp/sup-simcse-roberta-base


Some weights of the model checkpoint at princeton-nlp/sup-simcse-roberta-base were not used when initializing RobertaForSequenceClassification: ['pooler.dense.bias', 'pooler.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at princeton-nlp/sup-simcse-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'classifier.dense.bias']
You should probably TRAIN this model 

roberta.encoder.layer.8.attention.self.query.weight
roberta.encoder.layer.8.attention.self.query.bias
roberta.encoder.layer.8.attention.self.key.weight
roberta.encoder.layer.8.attention.self.key.bias
roberta.encoder.layer.8.attention.self.value.weight
roberta.encoder.layer.8.attention.self.value.bias
roberta.encoder.layer.8.attention.output.dense.weight
roberta.encoder.layer.8.attention.output.dense.bias
roberta.encoder.layer.8.attention.output.LayerNorm.weight
roberta.encoder.layer.8.attention.output.LayerNorm.bias
roberta.encoder.layer.8.intermediate.dense.weight
roberta.encoder.layer.8.intermediate.dense.bias
roberta.encoder.layer.8.output.dense.weight
roberta.encoder.layer.8.output.dense.bias
roberta.encoder.layer.8.output.LayerNorm.weight
roberta.encoder.layer.8.output.LayerNorm.bias
roberta.encoder.layer.9.attention.self.query.weight
roberta.encoder.layer.9.attention.self.query.bias
roberta.encoder.layer.9.attention.self.key.weight
roberta.encoder.layer.9.attention.self.key.bias
