# get BERT ready

In [1]:
from transformers import BertTokenizer, BertModel
import torch
import pandas as pd
import numpy as np

In [2]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased', output_hidden_states=True)

# dataset

In [3]:
df = pd.read_excel('mit_final.xlsx', index_col=0)
df['all'] = df['intro'] + ' ' + df['leadership'] + ' ' + df['challenge'] + ' ' + df['weakness'] + ' ' + df['whyhire']

ratings = pd.read_csv('turker_scores_full_interview.csv', index_col=0)
ratings = ratings[ratings['Worker'] == 'AGGR']

data = pd.concat([df, ratings], axis=1)

# get x and y

In [4]:
def get_x(data, column_question):
    
    from nltk.corpus import stopwords
    import string
    
    sws = ['pretty', 'uhm', 'uhmm', 'hmmm', 'uhmmm', 'um', 'umm', 'ummm', 'ummmm', 'mmmmmm', 'uh', 'uhh', 'uhhh', 'ah', 
          'ahh', 'ahhh', 'ok', 'interviewee', 'okay', 'yeah', 'inaudible', 'hmm',
          'laughs', 'alright', 'well', 'heh', 'oh', 'ohh', 'ohhh', 'hm', 'hmm', 'hmmmm', 'yea', 'yes', 'yeah']
#     sws = sw + stopwords.words('english')

    import re
    regex = re.compile('[%s]' % re.escape(string.punctuation))
    regex2 = re.compile('interviewee')
    
    x = data[column_question].values    
    x_clean = []
    
    for line in x:
        line = str(line).lower()
        line = line.encode("ascii", "ignore")
        line = line.decode()

        line = regex.sub('', line)
        line = regex2.sub('', line)
        
        line_temp = []
        for token in line.rstrip().split():
            if token not in sws:
                line_temp.append(token)
        x_clean.append(" ".join(line_temp))
        

        
    return x_clean



In [5]:
x_i = get_x(data, 'intro')
x_l = get_x(data, 'leadership')
x_c = get_x(data, 'challenge')
x_w = get_x(data, 'weakness')
x_h = get_x(data, 'whyhire')
x_all = get_x(data, 'all')

In [6]:
y_overall = data['Overall'].values
y_hire = data['RecommendHiring'].values
# y_colleague = data['Colleague'].values
# y_eng = data['Engaged'].values
y_excit = data['Excited'].values
y_eye = data['EyeContact'].values
y_smiled = data['Smiled'].values
y_rate = data['SpeakingRate'].values
y_nofiller= data['NoFillers'].values
y_friendly = data['Friendly'].values
y_paused = data['Paused'].values
y_engtone = data['EngagingTone'].values
y_str = data['StructuredAnswers'].values
y_calm = data['Calm'].values
y_notstress = data['NotStressed'].values
y_focused = data['Focused'].values
y_auth = data['Authentic'].values
y_notawk = data['NotAwkward'].values
y_total = data['Total'].values

In [7]:
Xs = [x_i, x_l, x_c, x_w, x_h]
Ys = [y_overall, y_hire, y_excit, y_eye, y_smiled, y_rate, y_nofiller, y_friendly, y_paused, 
      y_engtone, y_str, y_calm, y_notstress, y_focused, y_auth, y_notawk, y_total]

max_len = 0
for x in Xs:
    for sent in x:
        input_ids = tokenizer.encode(sent, add_special_tokens=True)
        max_len = max(max_len, len(input_ids))
        
print(max_len)

Token indices sequence length is longer than the specified maximum sequence length for this model (518 > 512). Running this sequence through the model will result in indexing errors


518


In [8]:
from sklearn.svm import SVR
from sklearn.linear_model import Lasso, Ridge
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

In [9]:
# from sklearn.model_selection import GridSearchCV

# params = {'alpha':[0.0001, 0.001, 0.01, 1, 10], 'tol':[0.1, 1, 10]}
# params2 = {'epsilon':[0.0001, 0.001, 0.01, 1, 10], 'tol':[0.01, 0.1, 1, 10], 'C':[1, 10, 100, 1000]}

# model4 = Lasso()
# model5 = SVR()

# g = GridSearchCV(model4, params, return_train_score=True, cv=5)
# g2 = GridSearchCV(model5, params2, return_train_score=True, cv=5)

# lasso = Lasso(alpha=0.01, tol=1)
# lasso = Lasso(alpha=0.001, tol=10)
# reg = SVR(C=1000, epsilon=0.0001, tol=0.001, gamma='scale')

# svr = SVR(gamma='scale')
# ridge = Ridge()

In [10]:
#models
lasso = Lasso(alpha=0.01, tol=1)
svr = SVR(C=1, gamma='scale', epsilon=0.001, tol=0.1)
# svr = SVR(C=1, gamma='scale', epsilon=0.001, tol=0.01)

In [11]:
def run_bert_4_sum(x):
    input_ids = []
    for sent in x:
        encoded_dict = tokenizer.encode_plus(sent, add_special_tokens=True, max_length=512, padding='max_length',
                                            return_tensors='pt', truncation=True)
        input_ids.append(encoded_dict['input_ids'])
    
    input_ids = torch.cat(input_ids, dim=0)
    tokens_tensor = input_ids.clone().detach() #alternative for: tokens_tensor = torch.tensor(input_ids)
    
    outputs = model(tokens_tensor)
    
    embeddings = torch.stack(outputs[2][-4:]).sum(0)
    embeddings = embeddings.detach().numpy()
    result = embeddings.mean(axis=(1))

    return result

def run_bert_4_cat(x):
    input_ids = []
    for sent in x:
        encoded_dict = tokenizer.encode_plus(sent, add_special_tokens=True, max_length=512, padding='max_length',
                                            return_tensors='pt', truncation=True)
        input_ids.append(encoded_dict['input_ids'])
    
    input_ids = torch.cat(input_ids, dim=0)
    tokens_tensor = input_ids.clone().detach() #alternative for: tokens_tensor = torch.tensor(input_ids)
    
    outputs = model(tokens_tensor)
    
    embeddings = torch.cat([outputs[2][i] for i in [-1,-2,-3,-4]], dim=-1)
    embeddings = embeddings.detach().numpy()
    result = embeddings.mean(axis=(1))

    return result


def run_bert_all(x):
    input_ids = []
    for sent in x:
        encoded_dict = tokenizer.encode_plus(sent, add_special_tokens=True, max_length=512, padding='max_length',
                                            return_tensors='pt', truncation=True)
        input_ids.append(encoded_dict['input_ids'])
    
    input_ids = torch.cat(input_ids, dim=0)
    tokens_tensor = input_ids.clone().detach() #alternative for: tokens_tensor = torch.tensor(input_ids)
    
    outputs = model(tokens_tensor)
    
    embeddings = torch.stack(outputs[2][:]).sum(0)
    embeddings = embeddings.detach().numpy()
    result = embeddings.mean(axis=(1))

    return result


def run_bert_2(x):
    input_ids = []
    for sent in x:
        encoded_dict = tokenizer.encode_plus(sent, add_special_tokens=True, max_length=512, padding='max_length',
                                            return_tensors='pt', truncation=True)
        input_ids.append(encoded_dict['input_ids'])
    
    input_ids = torch.cat(input_ids, dim=0)
    tokens_tensor = input_ids.clone().detach() #alternative for: tokens_tensor = torch.tensor(input_ids)
    
    outputs = model(tokens_tensor)
    
    embeddings = outputs[2][-2].clone().detach()
    embeddings = embeddings.detach().numpy()
    result = embeddings.mean(axis=(1))

    return result

## run_bert_4_sum on each question(x_i, x_l, x_c, x_w, x_h)

In [12]:
x = run_bert_4_sum(x_i)

alles = {}
k = 5
unit = len(x) // k
for i in range(k):
    results = {}
    for j, y in enumerate(Ys):
        y = np.array(y)
        x_test = x[i*unit:(i+1)*unit]
        y_test = y[i*unit:(i+1)*unit]
        x_train = np.concatenate((x[:i*unit], x[(i+1)*unit:]), axis=0)
        y_train = np.concatenate((y[:i*unit], y[(i+1)*unit:]), axis=0)
        svr.fit(x_train, y_train)
        preds = svr.predict(x_test)
        result = np.corrcoef(y_test, preds)
        results[str(j+1)] = result
    alles[i] = results

final1 = {}
for k, v in alles.items():
    for k2, v2 in v.items():
        avg = np.mean(v2)
        final1[k2] = avg


In [13]:
x = run_bert_4_sum(x_l)

alles = {}
k = 5
unit = len(x) // k
for i in range(k):
    results = {}
    for j, y in enumerate(Ys):
        y = np.array(y)
        x_test = x[i*unit:(i+1)*unit]
        y_test = y[i*unit:(i+1)*unit]
        x_train = np.concatenate((x[:i*unit], x[(i+1)*unit:]), axis=0)
        y_train = np.concatenate((y[:i*unit], y[(i+1)*unit:]), axis=0)
        svr.fit(x_train, y_train)
        preds = svr.predict(x_test)
        result = np.corrcoef(y_test, preds)
        results[str(j+1)] = result
    alles[i] = results

final2 = {}
for k, v in alles.items():
    for k2, v2 in v.items():
        avg = np.mean(v2)
        final2[k2] = avg


In [14]:
x = run_bert_4_sum(x_c)

alles = {}
k = 5
unit = len(x) // k
for i in range(k):
    results = {}
    for j, y in enumerate(Ys):
        y = np.array(y)
        x_test = x[i*unit:(i+1)*unit]
        y_test = y[i*unit:(i+1)*unit]
        x_train = np.concatenate((x[:i*unit], x[(i+1)*unit:]), axis=0)
        y_train = np.concatenate((y[:i*unit], y[(i+1)*unit:]), axis=0)
        svr.fit(x_train, y_train)
        preds = svr.predict(x_test)
        result = np.corrcoef(y_test, preds)
        results[str(j+1)] = result
    alles[i] = results

final3 = {}
for k, v in alles.items():
    for k2, v2 in v.items():
        avg = np.mean(v2)
        final3[k2] = avg


In [15]:
x = run_bert_4_sum(x_w)

alles = {}
k = 5
unit = len(x) // k
for i in range(k):
    results = {}
    for j, y in enumerate(Ys):
        y = np.array(y)
        x_test = x[i*unit:(i+1)*unit]
        y_test = y[i*unit:(i+1)*unit]
        x_train = np.concatenate((x[:i*unit], x[(i+1)*unit:]), axis=0)
        y_train = np.concatenate((y[:i*unit], y[(i+1)*unit:]), axis=0)
        svr.fit(x_train, y_train)
        preds = svr.predict(x_test)
        result = np.corrcoef(y_test, preds)
        results[str(j+1)] = result
    alles[i] = results

final4 = {}
for k, v in alles.items():
    for k2, v2 in v.items():
        avg = np.mean(v2)
        final4[k2] = avg


In [16]:
x = run_bert_4_sum(x_h)

alles = {}
k = 5
unit = len(x) // k
for i in range(k):
    results = {}
    for j, y in enumerate(Ys):
        y = np.array(y)
        x_test = x[i*unit:(i+1)*unit]
        y_test = y[i*unit:(i+1)*unit]
        x_train = np.concatenate((x[:i*unit], x[(i+1)*unit:]), axis=0)
        y_train = np.concatenate((y[:i*unit], y[(i+1)*unit:]), axis=0)
        svr.fit(x_train, y_train)
        preds = svr.predict(x_test)
        result = np.corrcoef(y_test, preds)
        results[str(j+1)] = result
    alles[i] = results

final5 = {}
for k, v in alles.items():
    for k2, v2 in v.items():
        avg = np.mean(v2)
        final5[k2] = avg


## run_bert_4_cat on each question(x_i, x_l, x_c, x_w, x_h)

In [17]:
x = run_bert_4_cat(x_i)

alles = {}
k = 5
unit = len(x) // k
for i in range(k):
    results = {}
    for j, y in enumerate(Ys):
        y = np.array(y)
        x_test = x[i*unit:(i+1)*unit]
        y_test = y[i*unit:(i+1)*unit]
        x_train = np.concatenate((x[:i*unit], x[(i+1)*unit:]), axis=0)
        y_train = np.concatenate((y[:i*unit], y[(i+1)*unit:]), axis=0)
        svr.fit(x_train, y_train)
        preds = svr.predict(x_test)
        result = np.corrcoef(y_test, preds)
        results[str(j+1)] = result
    alles[i] = results

final6 = {}
for k, v in alles.items():
    for k2, v2 in v.items():
        avg = np.mean(v2)
        final6[k2] = avg


In [18]:
x = run_bert_4_cat(x_l)
svr = SVR(C=1, gamma='scale', epsilon=0.001, tol=0.1)
alles = {}
k = 5
unit = len(x) // k
for i in range(k):
    results = {}
    for j, y in enumerate(Ys):
        y = np.array(y)
        x_test = x[i*unit:(i+1)*unit]
        y_test = y[i*unit:(i+1)*unit]
        x_train = np.concatenate((x[:i*unit], x[(i+1)*unit:]), axis=0)
        y_train = np.concatenate((y[:i*unit], y[(i+1)*unit:]), axis=0)
        svr.fit(x_train, y_train)
        preds = svr.predict(x_test)
        result = np.corrcoef(y_test, preds)
        results[str(j+1)] = result
    alles[i] = results

final7 = {}
for k, v in alles.items():
    for k2, v2 in v.items():
        avg = np.mean(v2)
        final7[k2] = avg


In [19]:
x = run_bert_4_cat(x_c)

alles = {}
k = 5
unit = len(x) // k
for i in range(k):
    results = {}
    for j, y in enumerate(Ys):
        y = np.array(y)
        x_test = x[i*unit:(i+1)*unit]
        y_test = y[i*unit:(i+1)*unit]
        x_train = np.concatenate((x[:i*unit], x[(i+1)*unit:]), axis=0)
        y_train = np.concatenate((y[:i*unit], y[(i+1)*unit:]), axis=0)
        svr.fit(x_train, y_train)
        preds = svr.predict(x_test)
        result = np.corrcoef(y_test, preds)
        results[str(j+1)] = result
    alles[i] = results

final8 = {}
for k, v in alles.items():
    for k2, v2 in v.items():
        avg = np.mean(v2)
        final8[k2] = avg


In [20]:
x = run_bert_4_cat(x_w)

alles = {}
k = 5
unit = len(x) // k
for i in range(k):
    results = {}
    for j, y in enumerate(Ys):
        y = np.array(y)
        x_test = x[i*unit:(i+1)*unit]
        y_test = y[i*unit:(i+1)*unit]
        x_train = np.concatenate((x[:i*unit], x[(i+1)*unit:]), axis=0)
        y_train = np.concatenate((y[:i*unit], y[(i+1)*unit:]), axis=0)
        svr.fit(x_train, y_train)
        preds = svr.predict(x_test)
        result = np.corrcoef(y_test, preds)
        results[str(j+1)] = result
    alles[i] = results

final9 = {}
for k, v in alles.items():
    for k2, v2 in v.items():
        avg = np.mean(v2)
        final9[k2] = avg


In [21]:
x = run_bert_4_cat(x_h)

alles = {}
k = 5
unit = len(x) // k
for i in range(k):
    results = {}
    for j, y in enumerate(Ys):
        y = np.array(y)
        x_test = x[i*unit:(i+1)*unit]
        y_test = y[i*unit:(i+1)*unit]
        x_train = np.concatenate((x[:i*unit], x[(i+1)*unit:]), axis=0)
        y_train = np.concatenate((y[:i*unit], y[(i+1)*unit:]), axis=0)
        svr.fit(x_train, y_train)
        preds = svr.predict(x_test)
        result = np.corrcoef(y_test, preds)
        results[str(j+1)] = result
    alles[i] = results

final10 = {}
for k, v in alles.items():
    for k2, v2 in v.items():
        avg = np.mean(v2)
        final10[k2] = avg


## run_bert_all on each question(x_i, x_l, x_c, x_w, x_h)

In [22]:
x = run_bert_all(x_i)

alles = {}
k = 5
unit = len(x) // k
for i in range(k):
    results = {}
    for j, y in enumerate(Ys):
        y = np.array(y)
        x_test = x[i*unit:(i+1)*unit]
        y_test = y[i*unit:(i+1)*unit]
        x_train = np.concatenate((x[:i*unit], x[(i+1)*unit:]), axis=0)
        y_train = np.concatenate((y[:i*unit], y[(i+1)*unit:]), axis=0)
        svr.fit(x_train, y_train)
        preds = svr.predict(x_test)
        result = np.corrcoef(y_test, preds)
        results[str(j+1)] = result
    alles[i] = results

final11 = {}
for k, v in alles.items():
    for k2, v2 in v.items():
        avg = np.mean(v2)
        final11[k2] = avg


In [23]:
x = run_bert_all(x_l)

alles = {}
k = 5
unit = len(x) // k
for i in range(k):
    results = {}
    for j, y in enumerate(Ys):
        y = np.array(y)
        x_test = x[i*unit:(i+1)*unit]
        y_test = y[i*unit:(i+1)*unit]
        x_train = np.concatenate((x[:i*unit], x[(i+1)*unit:]), axis=0)
        y_train = np.concatenate((y[:i*unit], y[(i+1)*unit:]), axis=0)
        svr.fit(x_train, y_train)
        preds = svr.predict(x_test)
        result = np.corrcoef(y_test, preds)
        results[str(j+1)] = result
    alles[i] = results

final12 = {}
for k, v in alles.items():
    for k2, v2 in v.items():
        avg = np.mean(v2)
        final12[k2] = avg


In [24]:
x = run_bert_all(x_c)

alles = {}
k = 5
unit = len(x) // k
for i in range(k):
    results = {}
    for j, y in enumerate(Ys):
        y = np.array(y)
        x_test = x[i*unit:(i+1)*unit]
        y_test = y[i*unit:(i+1)*unit]
        x_train = np.concatenate((x[:i*unit], x[(i+1)*unit:]), axis=0)
        y_train = np.concatenate((y[:i*unit], y[(i+1)*unit:]), axis=0)
        svr.fit(x_train, y_train)
        preds = svr.predict(x_test)
        result = np.corrcoef(y_test, preds)
        results[str(j+1)] = result
    alles[i] = results

final13 = {}
for k, v in alles.items():
    for k2, v2 in v.items():
        avg = np.mean(v2)
        final13[k2] = avg


In [25]:
x = run_bert_all(x_w)

alles = {}
k = 5
unit = len(x) // k
for i in range(k):
    results = {}
    for j, y in enumerate(Ys):
        y = np.array(y)
        x_test = x[i*unit:(i+1)*unit]
        y_test = y[i*unit:(i+1)*unit]
        x_train = np.concatenate((x[:i*unit], x[(i+1)*unit:]), axis=0)
        y_train = np.concatenate((y[:i*unit], y[(i+1)*unit:]), axis=0)
        svr.fit(x_train, y_train)
        preds = svr.predict(x_test)
        result = np.corrcoef(y_test, preds)
        results[str(j+1)] = result
    alles[i] = results

final14 = {}
for k, v in alles.items():
    for k2, v2 in v.items():
        avg = np.mean(v2)
        final14[k2] = avg


In [26]:
x = run_bert_all(x_h)

alles = {}
k = 5
unit = len(x) // k
for i in range(k):
    results = {}
    for j, y in enumerate(Ys):
        y = np.array(y)
        x_test = x[i*unit:(i+1)*unit]
        y_test = y[i*unit:(i+1)*unit]
        x_train = np.concatenate((x[:i*unit], x[(i+1)*unit:]), axis=0)
        y_train = np.concatenate((y[:i*unit], y[(i+1)*unit:]), axis=0)
        svr.fit(x_train, y_train)
        preds = svr.predict(x_test)
        result = np.corrcoef(y_test, preds)
        results[str(j+1)] = result
    alles[i] = results

final15 = {}
for k, v in alles.items():
    for k2, v2 in v.items():
        avg = np.mean(v2)
        final15[k2] = avg


## run_bert_2 on each question(x_i, x_l, x_c, x_w, x_h)

In [27]:
x = run_bert_2(x_i)

alles = {}
k = 5
unit = len(x) // k
for i in range(k):
    results = {}
    for j, y in enumerate(Ys):
        y = np.array(y)
        x_test = x[i*unit:(i+1)*unit]
        y_test = y[i*unit:(i+1)*unit]
        x_train = np.concatenate((x[:i*unit], x[(i+1)*unit:]), axis=0)
        y_train = np.concatenate((y[:i*unit], y[(i+1)*unit:]), axis=0)
        svr.fit(x_train, y_train)
        preds = svr.predict(x_test)
        result = np.corrcoef(y_test, preds)
        results[str(j+1)] = result
    alles[i] = results

final16 = {}
for k, v in alles.items():
    for k2, v2 in v.items():
        avg = np.mean(v2)
        final16[k2] = avg


In [28]:
x = run_bert_2(x_l)

alles = {}
k = 5
unit = len(x) // k
for i in range(k):
    results = {}
    for j, y in enumerate(Ys):
        y = np.array(y)
        x_test = x[i*unit:(i+1)*unit]
        y_test = y[i*unit:(i+1)*unit]
        x_train = np.concatenate((x[:i*unit], x[(i+1)*unit:]), axis=0)
        y_train = np.concatenate((y[:i*unit], y[(i+1)*unit:]), axis=0)
        svr.fit(x_train, y_train)
        preds = svr.predict(x_test)
        result = np.corrcoef(y_test, preds)
        results[str(j+1)] = result
    alles[i] = results

final17 = {}
for k, v in alles.items():
    for k2, v2 in v.items():
        avg = np.mean(v2)
        final17[k2] = avg


In [29]:
x = run_bert_2(x_c)

alles = {}
k = 5
unit = len(x) // k
for i in range(k):
    results = {}
    for j, y in enumerate(Ys):
        y = np.array(y)
        x_test = x[i*unit:(i+1)*unit]
        y_test = y[i*unit:(i+1)*unit]
        x_train = np.concatenate((x[:i*unit], x[(i+1)*unit:]), axis=0)
        y_train = np.concatenate((y[:i*unit], y[(i+1)*unit:]), axis=0)
        svr.fit(x_train, y_train)
        preds = svr.predict(x_test)
        result = np.corrcoef(y_test, preds)
        results[str(j+1)] = result
    alles[i] = results

final18 = {}
for k, v in alles.items():
    for k2, v2 in v.items():
        avg = np.mean(v2)
        final18[k2] = avg


In [30]:
x = run_bert_2(x_w)

alles = {}
k = 5
unit = len(x) // k
for i in range(k):
    results = {}
    for j, y in enumerate(Ys):
        y = np.array(y)
        x_test = x[i*unit:(i+1)*unit]
        y_test = y[i*unit:(i+1)*unit]
        x_train = np.concatenate((x[:i*unit], x[(i+1)*unit:]), axis=0)
        y_train = np.concatenate((y[:i*unit], y[(i+1)*unit:]), axis=0)
        svr.fit(x_train, y_train)
        preds = svr.predict(x_test)
        result = np.corrcoef(y_test, preds)
        results[str(j+1)] = result
    alles[i] = results

final19 = {}
for k, v in alles.items():
    for k2, v2 in v.items():
        avg = np.mean(v2)
        final19[k2] = avg


In [31]:
x = run_bert_2(x_h)

alles = {}
k = 5
unit = len(x) // k
for i in range(k):
    results = {}
    for j, y in enumerate(Ys):
        y = np.array(y)
        x_test = x[i*unit:(i+1)*unit]
        y_test = y[i*unit:(i+1)*unit]
        x_train = np.concatenate((x[:i*unit], x[(i+1)*unit:]), axis=0)
        y_train = np.concatenate((y[:i*unit], y[(i+1)*unit:]), axis=0)
        svr.fit(x_train, y_train)
        preds = svr.predict(x_test)
        result = np.corrcoef(y_test, preds)
        results[str(j+1)] = result
    alles[i] = results

final20 = {}
for k, v in alles.items():
    for k2, v2 in v.items():
        avg = np.mean(v2)
        final20[k2] = avg


# compile results

In [32]:
q1 = pd.DataFrame([final1, final6, final11, final16]).T
q1 = q1.rename(columns={0:'sum', 1:'concat', 2:'all', 3:'sec-to-last'})
q2 = pd.DataFrame([final2, final7, final12, final17]).T
q2 = q2.rename(columns={0:'sum', 1:'concat', 2:'all', 3:'sec-to-last'})
q3 = pd.DataFrame([final3, final8, final13, final18]).T
q3 = q3.rename(columns={0:'sum', 1:'concat', 2:'all', 3:'sec-to-last'})
q4 = pd.DataFrame([final4, final9, final14, final19]).T
q4 = q4.rename(columns={0:'sum', 1:'concat', 2:'all', 3:'sec-to-last'})
q5 = pd.DataFrame([final5, final10, final15, final20]).T
q5 = q5.rename(columns={0:'sum', 1:'concat', 2:'all', 3:'sec-to-last'})
allresult = pd.concat([q1, q2, q3, q4, q5], axis=0)
allresult
# allresult.to_csv('bert_svr.csv')

Unnamed: 0,sum,concat,all,sec-to-last
1,0.608362,0.614127,0.610146,0.606648
2,0.543967,0.543501,0.536379,0.531589
3,0.557312,0.585144,0.547163,0.585276
4,0.475803,0.536917,0.467098,0.531133
5,0.589054,0.525187,0.560763,0.461329
...,...,...,...,...
13,0.793493,0.813065,0.774446,0.814646
14,0.655339,0.653210,0.692333,0.636479
15,0.575442,0.568305,0.586596,0.558983
16,0.685767,0.670436,0.704231,0.675066
