In [None]:
from IPython.display import clear_output
! pip install datasets
! pip install transformers
! pip install accelerate -U
clear_output()

In [None]:
from IPython.display import clear_output
! pip install datasets
! pip install transformers[torch]
! pip install bayesian-optimization
! pip install cvxopt
! pip install qpsolvers==3.4.0
! pip install accelerate==0.20.3 -U
! pip install git+https://github.com/simonzhang00/ripser-plusplus.git
! pip install git+https://github.com/IlyaTrofimov/RTD.git
clear_output()

In [None]:
from transformers import AutoTokenizer, DataCollatorWithPadding, AutoModelForSequenceClassification, TrainingArguments, Trainer, set_seed
from sklearn.metrics import accuracy_score
from datasets import load_dataset
import numpy as np

In [None]:
set_seed(42)
imdb = load_dataset("imdb")
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
def preprocess_function(examples):
    return tokenizer(examples["text"], truncation=True)
tokenized_imdb = imdb.map(preprocess_function, batched=True)
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [None]:
dataset = tokenized_imdb["test"].train_test_split(test_size=0.5)
train, val, test = tokenized_imdb["train"], dataset['train'], dataset['test']

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
from torch import nn
import copy

weak_seeds = [123, 42, 43]

def load_trainer_weak(number, seed):
    model = AutoModelForSequenceClassification.from_pretrained("gdrive/MyDrive/results_bert_weak_" + str(number) + "/")
    oldModuleList = model.distilbert.transformer.layer
    newModuleList = nn.ModuleList(oldModuleList[:1])

    copyOfModel = copy.deepcopy(model)
    copyOfModel.distilbert.transformer.layer = newModuleList

    training_args = TrainingArguments(
        output_dir="./results",
        learning_rate=2e-5,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        num_train_epochs=5,
        weight_decay=0.01,
        seed=seed,
    )

    trainer = Trainer(
        model=copyOfModel,
        args=training_args,
        train_dataset=tokenized_imdb["train"],
        eval_dataset=tokenized_imdb["test"],
        tokenizer=tokenizer,
        data_collator=data_collator,
    )
    return trainer, copyOfModel

def load_trainer(number, seed):
    model = AutoModelForSequenceClassification.from_pretrained("gdrive/MyDrive/results_bert_" + str(number) + "/")
    training_args = TrainingArguments(
        output_dir="./results",
        learning_rate=2e-5,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        num_train_epochs=5,
        weight_decay=0.01,
        seed=seed,
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_imdb["train"],
        eval_dataset=tokenized_imdb["test"],
        tokenizer=tokenizer,
        data_collator=data_collator,
    )
    return trainer, model


In [None]:
models = []
trainers = []
for i in range(1, 6):
    trainer, model = load_trainer(i, 123)
    models.append(model)
    trainers.append(trainer)

weak_models = []
weak_trainers = []
for i in range(1, 4):
    trainer, model = load_trainer_weak(i, 123)
    weak_models.append(model)
    weak_trainers.append(trainer)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at gdrive/MyDrive/results_bert_weak_1/ and are newly initialized: ['distilbert.transformer.layer.1.ffn.lin1.weight', 'distilbert.transformer.layer.3.ffn.lin2.bias', 'distilbert.transformer.layer.3.attention.k_lin.bias', 'distilbert.transformer.layer.4.ffn.lin2.bias', 'distilbert.transformer.layer.4.attention.out_lin.weight', 'distilbert.transformer.layer.1.attention.k_lin.bias', 'distilbert.transformer.layer.5.attention.v_lin.weight', 'distilbert.transformer.layer.4.attention.out_lin.bias', 'distilbert.transformer.layer.5.attention.k_lin.bias', 'distilbert.transformer.layer.3.attention.v_lin.bias', 'distilbert.transformer.layer.4.attention.k_lin.weight', 'distilbert.transformer.layer.2.attention.out_lin.bias', 'distilbert.transformer.layer.2.sa_layer_norm.weight', 'distilbert.transformer.layer.1.sa_layer_norm.weight', 'distilbert.transformer.layer.3.attention.v_lin.weight', 'distilbert.tr

## Calculate predictions

In [None]:
weak_single_predictions = []
for trainer in weak_trainers:
    predictions = trainer.predict(test)
    weak_single_predictions.append(predictions)

You're using a DistilBertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.



KeyboardInterrupt



In [None]:
np.savetxt("./gdrive/MyDrive/ens_predictions/weak_true_labels" + ".csv", weak_single_predictions[0].label_ids, delimiter=",")

In [None]:
for i in range(3):
    np.savetxt("./gdrive/MyDrive/ens_predictions/weak_prediction_" + str(i + 1) + ".csv", weak_single_predictions[i].predictions, delimiter=",")

In [None]:
weak_single_predictions = []
for trainer in weak_trainers:
    predictions = trainer.predict(val)
    weak_single_predictions.append(predictions)

for i in range(3):
    np.savetxt("./gdrive/MyDrive/ens_predictions/val_weak_prediction_" + str(i + 1) + ".csv", weak_single_predictions[i].predictions, delimiter=",")
np.savetxt("./gdrive/MyDrive/ens_predictions/val_weak_true_labels" + ".csv", weak_single_predictions[0].label_ids, delimiter=",")

In [None]:
single_predictions = []
for trainer in trainers:
    predictions = trainer.predict(test)
    single_predictions.append(predictions)

In [None]:
for i in range(5):
    np.savetxt("./gdrive/MyDrive/ens_predictions/not_weak_prediction_" + str(i + 1) + ".csv", single_predictions[i].predictions, delimiter=",")

In [None]:
single_predictions = []
for trainer in trainers:
    predictions = trainer.predict(val)
    single_predictions.append(predictions)

for i in range(3):
    np.savetxt("./gdrive/MyDrive/ens_predictions/val_non_weak_prediction_" + str(i + 1) + ".csv", single_predictions[i].predictions, delimiter=",")

In [None]:
for i in range(3, 5):
    np.savetxt("./gdrive/MyDrive/ens_predictions/val_non_weak_prediction_" + str(i + 1) + ".csv", single_predictions[i].predictions, delimiter=",")

## Weak data read

In [None]:
single_predictions = []
for i in range(5):
    single_predictions.append(np.genfromtxt("./gdrive/MyDrive/ens_predictions/not_weak_prediction_" + str(i + 1) + ".csv", delimiter=','))
single_predictions = np.array(single_predictions)

In [None]:
weak_single_predictions = []
for i in range(3):
    weak_single_predictions.append(np.genfromtxt("./gdrive/MyDrive/ens_predictions/weak_prediction_" + str(i + 1) + ".csv", delimiter=','))
weak_single_predictions = np.array(weak_single_predictions)

In [None]:
true_labels = np.genfromtxt("./gdrive/MyDrive/ens_predictions/weak_true_labels.csv", delimiter=',')

In [None]:
val_single_predictions = []
for i in range(5):
    val_single_predictions.append(np.genfromtxt("./gdrive/MyDrive/ens_predictions/val_non_weak_prediction_" + str(i + 1) + ".csv", delimiter=','))
val_single_predictions = np.array(val_single_predictions)

val_weak_single_predictions = []
for i in range(3):
    val_weak_single_predictions.append(np.genfromtxt("./gdrive/MyDrive/ens_predictions/val_weak_prediction_" + str(i + 1) + ".csv", delimiter=','))
val_weak_single_predictions = np.array(val_weak_single_predictions)

val_true_labels = np.genfromtxt("./gdrive/MyDrive/ens_predictions/val_weak_true_labels.csv", delimiter=',')

In [None]:
from sklearn.metrics import accuracy_score
for el in weak_single_predictions:
    print(accuracy_score(np.argmax(el, axis=1), true_labels))

0.84584
0.83552
0.87728


In [None]:
for el in single_predictions:
    print(accuracy_score(np.argmax(el, axis=1), true_labels))

0.93144
0.93112
0.93328
0.9324
0.92984


In [None]:
def weighted_prediction(single_predictions, weights):
    weights = weights / weights.sum()
    return np.vstack((np.dot(weights, np.array(single_predictions)[:, :, 0]), np.dot(weights, np.array(single_predictions)[:, :, 1]))).T

In [None]:
for i in range(len(single_predictions)):
    strong = single_predictions[i]
    for j in range(len(weak_single_predictions)):
        weak = weak_single_predictions[j]
        weights = np.array([0.5, 0.5])
        preds = np.array([strong, weak])
        weighted_predictions = weighted_prediction(preds, weights)
        print('Strong:{}, Weak:{}, {}'.format(i, j, accuracy_score(np.argmax(weighted_predictions, axis=1), true_labels)))

Strong:0, Weak:0, 0.88504
Strong:0, Weak:1, 0.85576
Strong:0, Weak:2, 0.9336
Strong:1, Weak:0, 0.88536
Strong:1, Weak:1, 0.85624
Strong:1, Weak:2, 0.9352
Strong:2, Weak:0, 0.88424
Strong:2, Weak:1, 0.8552
Strong:2, Weak:2, 0.93344
Strong:3, Weak:0, 0.88544
Strong:3, Weak:1, 0.85544
Strong:3, Weak:2, 0.93496
Strong:4, Weak:0, 0.88288
Strong:4, Weak:1, 0.85424
Strong:4, Weak:2, 0.93048


## Output correlation

In [None]:
from qpsolvers import solve_qp

def calc_corr(one, two):
    norm = (np.linalg.norm(one) * np.linalg.norm(two))
    return np.correlate(one.T, two.T)[0] #/ norm

def calculate_correlation_precalc_mixed(first, second):
    return calc_corr(val_single_predictions[first][:, 0], val_weak_single_predictions[second][:, 0])

def calculate_correlation_precalc_strong(first, second):
    return calc_corr(val_single_predictions[first][:, 0], val_single_predictions[second][:, 0])

def calculate_correlation_precalc_weak(first, second):
    return calc_corr(val_weak_single_predictions[first][:, 0], val_weak_single_predictions[second][:, 0])

def get_corr_matrix(i, j):
    ans = np.zeros((2, 2))
    ans[0, 0] = calculate_correlation_precalc_strong(i, i)
    ans[0, 1] = calculate_correlation_precalc_mixed(i, j)
    ans[1, 0] = ans[0, 1]
    ans[1, 1] = calculate_correlation_precalc_weak(j, j)
    return ans

def corr_based_posterior_variance(corr_matrix, weights):
    weights = np.expand_dims(weights, axis=0)
    pv = np.dot(np.dot(weights, corr_matrix), weights.T)
    return pv[0][0]

def calculate_optimal_weights_output(corr_matrix):
    P = 2 * corr_matrix
    q = np.zeros_like(corr_matrix[:, :1])
    A = np.ones((1, corr_matrix.shape[0]))
    b = np.ones(1)
    lb = np.zeros(corr_matrix.shape[0])
    ub = np.ones(corr_matrix.shape[0])
    x_sol = solve_qp(P, q, A=A, b=b, lb=lb, ub=ub, initvals=np.array([0.5, 0.5]), solver='cvxopt', verbose=True)#, kktsolver='ldl', options={'kktreg':1e-3})
    return x_sol

ans = []
for i in range(len(single_predictions)):
    strong = single_predictions[i]
    for j in range(len(weak_single_predictions)):
        corr_matrix = get_corr_matrix(i, j)
        weak = weak_single_predictions[j]
        weights = calculate_optimal_weights_output(corr_matrix)
        preds = np.array([strong, weak])
        weighted_predictions = weighted_prediction(preds, weights)
        ans.append(np.array([i, j, round(accuracy_score(np.argmax(weighted_predictions, axis=1), true_labels), 5)]))

     pcost       dcost       gap    pres   dres
 0:  1.6350e+05  1.6349e+05  4e+00  7e-01  5e+05
 1:  1.4436e+05  1.4435e+05  1e-01  4e-03  3e+03
 2:  1.4435e+05  1.4435e+05  2e-03  4e-05  2e+01
 3:  1.4435e+05  1.4435e+05  2e-05  4e-07  2e-01
 4:  1.4435e+05  1.4435e+05  2e-07  4e-09  2e-03
 5:  1.4435e+05  1.4435e+05  2e-09  4e-11  2e-05
 6:  1.4435e+05  1.4435e+05  2e-11  4e-13  2e-07
 7:  1.4435e+05  1.4435e+05  2e-13  4e-15  2e-09
Optimal solution found.
     pcost       dcost       gap    pres   dres
 0:  2.3896e+05  2.3896e+05  4e+00  7e-01  7e+05
 1:  1.4087e+05  1.4087e+05  4e-01  1e-01  1e+05
 2:  1.4062e+05  1.4062e+05  4e-01  1e-01  1e+05
 3:  4.7753e+05 -6.5309e+04  6e+05  1e-01  1e+05
 4:  1.4139e+05 -8.5266e+05  1e+06  9e-02  9e+04
 5:  1.4547e+05  1.3110e+05  1e+04  2e-16  7e-10
 6:  1.4481e+05  1.4464e+05  2e+02  2e-16  4e-11
 7:  1.4477e+05  1.4477e+05  2e+00  2e-16  2e-11
 8:  1.4477e+05  1.4477e+05  2e-02  2e-16  9e-11
Optimal solution found.
     pcost       dcost 

In [None]:
import pandas as pd
ans = np.array(ans)
ans_df = pd.DataFrame(ans, columns=['Strong', 'Weak', 'Accuracy'])
ans_df

Unnamed: 0,Strong,Weak,Accuracy
0,0.0,0.0,0.93136
1,0.0,1.0,0.93144
2,0.0,2.0,0.8888
3,1.0,0.0,0.93224
4,1.0,1.0,0.93112
5,1.0,2.0,0.87728
6,2.0,0.0,0.93328
7,2.0,1.0,0.93328
8,2.0,2.0,0.90072
9,3.0,0.0,0.93344


## Barcode correlation

In [None]:
from qpsolvers import solve_qp
import rtd
import pandas as pd
from sklearn.metrics import accuracy_score

def calc_corr(one, two):
    norm = (np.linalg.norm(one) * np.linalg.norm(two))
    return np.correlate(one.T, two.T)[0] #/ norm

def calculate_correlation_precalc_mixed(first, second):
    return calc_corr(val_single_predictions[first][:, 0], val_weak_single_predictions[second][:, 0])

def calculate_correlation_precalc_strong(first, second):
    return calc_corr(val_single_predictions[first][:, 0], val_single_predictions[second][:, 0])

def calculate_correlation_precalc_weak(first, second):
    return calc_corr(val_weak_single_predictions[first][:, 0], val_weak_single_predictions[second][:, 0])

def get_corr_matrix(i, j):
    ans = np.zeros((2, 2))
    ans[0, 0] = calculate_correlation_precalc_strong(i, i)
    ans[0, 1] = calculate_correlation_precalc_mixed(i, j)
    ans[1, 0] = ans[0, 1]
    ans[1, 1] = calculate_correlation_precalc_weak(j, j)
    return ans

def corr_based_posterior_variance(corr_matrix, weights):
    weights = np.expand_dims(weights, axis=0)
    pv = np.dot(np.dot(weights, corr_matrix), weights.T)
    return pv[0][0]

def calculate_optimal_weights_output(corr_matrix):
    P = 2 * corr_matrix
    q = np.zeros_like(corr_matrix[:, :1])
    A = np.ones((1, corr_matrix.shape[0]))
    b = np.ones(1)
    lb = np.zeros(corr_matrix.shape[0])
    ub = np.ones(corr_matrix.shape[0])
    x_sol = solve_qp(P, q, A=A, b=b, lb=lb, ub=ub, initvals=np.array([0.5, 0.5]), solver='cvxopt', verbose=True)#, kktsolver='ldl', options={'kktreg':1e-3})
    return x_sol


In [None]:
attention_weights = []
for m in models:
    attention_weights.append(m.distilbert.transformer.layer[5].attention.out_lin.weight.cpu().detach().numpy())

weak_attention_weights = []
for m in weak_models:
    weak_attention_weights.append(m.distilbert.transformer.layer[0].attention.out_lin.weight.cpu().detach().numpy())

In [None]:
import torch

print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.cuda.current_device())

True
1
0


In [None]:
from time import time
start = time()
a = rtd.rtd(attention_weights[0], weak_attention_weights[0], pdist_device = 'cuda:0', trials=2, batch=400)
print(a)
print(time() - start)

34.20737582445145
255.21833086013794


In [None]:
from time import time
start = time()
a = rtd.rtd(attention_weights[0], weak_attention_weights[0], trials=1, batch=768)
print(time() - start)

In [None]:
def my_rtd(cl1, cl2, pdist_device = 'cuda:0', trials=2, batch=384):
    rtd_avg = rtd.barcodes.rtd1(cl1[:batch], cl2[:batch], pdist_device = pdist_device)
    rtd_avg += rtd.barcodes.rtd1(cl1[batch:], cl2[batch:], pdist_device = pdist_device)
    return rtd_avg / trials

In [None]:
! ls /usr/local/lib/python3.10/dist-packages/rtd/

barcodes.py  cca_core.py  cka.py  __init__.py  pwcca.py  __pycache__  svcca.py


In [None]:
rtd.barcodes.rtd1

<function rtd.barcodes.rtd1(cl1, cl2, pdist_device='cuda:0')>

In [None]:
#rtd.rtd(np.ones((2, 2)), np.ones((2, 2)))
from time import time
start = time()
ans = []
for i in range(5):
    for j in range(3):
        a = my_rtd(attention_weights[i], weak_attention_weights[j], pdist_device='cuda:0', trials=1, batch=384)
        print(i, j, a)
        ans.append((i, j, a))
print(time() - start)

0 0 67.46638742089272
0 1 67.13777333498001
0 2 65.53225845098495
1 0 67.50167348980904
1 1 67.17355877161026
1 2 65.57106006145477
2 0 67.43449205160141
2 1 67.1058779656887
2 2 65.5012558400631
3 0 67.48799046874046
3 1 67.15888267755508
3 2 65.55936521291733
4 0 67.39762878417969
4 1 67.06901469826698
4 2 65.46118709445
1646.8108608722687


In [None]:
for i in range(len(ans)):
    ans[i] = np.array(ans[i])
ans = np.array(ans)
ans_df = pd.DataFrame(ans, columns=['strong', 'weak', 'corr'])
ans_df

Unnamed: 0,strong,weak,corr
0,0.0,0.0,67.466387
1,0.0,1.0,67.137773
2,0.0,2.0,65.532258
3,1.0,0.0,67.501673
4,1.0,1.0,67.173559
5,1.0,2.0,65.57106
6,2.0,0.0,67.434492
7,2.0,1.0,67.105878
8,2.0,2.0,65.501256
9,3.0,0.0,67.48799


In [None]:
ans_df.to_csv("./gdrive/MyDrive/ens_predictions/strong_weak_correlations" + ".csv")

In [None]:
ws_corr_df = pd.read_csv("./gdrive/MyDrive/ens_predictions/strong_weak_correlations.csv")
ws_corr_df = ws_corr_df.drop(columns=ws_corr_df.columns[:1])
ws_corr_df

Unnamed: 0,strong,weak,corr
0,0.0,0.0,67.466387
1,0.0,1.0,67.137773
2,0.0,2.0,65.532258
3,1.0,0.0,67.501673
4,1.0,1.0,67.173559
5,1.0,2.0,65.57106
6,2.0,0.0,67.434492
7,2.0,1.0,67.105878
8,2.0,2.0,65.501256
9,3.0,0.0,67.48799


In [None]:
from tqdm import tqdm
def get_corr_matrix_rtd(i, j):
    ans = np.zeros((2, 2))
    ans[0, 0] = 0.0 #rtd.rtd(attention_weights[i], attention_weights[i])
    ans[0, 1] = ws_corr_df[np.logical_and(ans_df.strong == i, ans_df.weak == j)].values[0][2]
    ans[1, 0] = ans[0, 1]
    ans[1, 1] = 0.0 #rtd.rtd(weak_attention_weights[j], weak_attention_weights[j])
    ans /= ans.max()
    ans = 1 - ans
    return ans

ans = []
for i in range(len(single_predictions)):
    strong = single_predictions[i]
    for j in tqdm(range(len(weak_single_predictions))):
        corr_matrix = get_corr_matrix_rtd(i, j)
        weak = weak_single_predictions[j]
        weights = calculate_optimal_weights_output(corr_matrix)
        preds = np.array([strong, weak])
        weighted_predictions = weighted_prediction(preds, weights)
        ans.append(np.array([i, j, round(accuracy_score(np.argmax(weighted_predictions, axis=1), true_labels), 5)]))

100%|██████████| 3/3 [00:00<00:00, 26.93it/s]


67.46638742089272
     pcost       dcost       gap    pres   dres
 0:  5.0000e-01 -1.5000e+00  4e+00  7e-01  1e+00
 1:  5.0000e-01 -3.1250e-02  5e-01  1e-16  2e-16
 2:  5.0000e-01  4.9469e-01  5e-03  0e+00  3e-17
 3:  5.0000e-01  4.9995e-01  5e-05  1e-16  1e-18
 4:  5.0000e-01  5.0000e-01  5e-07  0e+00  3e-20
 5:  5.0000e-01  5.0000e-01  5e-09  2e-16  2e-22
Optimal solution found.
67.13777333498001
     pcost       dcost       gap    pres   dres
 0:  5.0000e-01 -1.5000e+00  4e+00  7e-01  1e+00
 1:  5.0000e-01 -3.1250e-02  5e-01  1e-16  2e-16
 2:  5.0000e-01  4.9469e-01  5e-03  0e+00  3e-17
 3:  5.0000e-01  4.9995e-01  5e-05  1e-16  1e-18
 4:  5.0000e-01  5.0000e-01  5e-07  0e+00  3e-20
 5:  5.0000e-01  5.0000e-01  5e-09  2e-16  2e-22
Optimal solution found.
65.53225845098495
     pcost       dcost       gap    pres   dres
 0:  5.0000e-01 -1.5000e+00  4e+00  7e-01  1e+00
 1:  5.0000e-01 -3.1250e-02  5e-01  1e-16  2e-16
 2:  5.0000e-01  4.9469e-01  5e-03  0e+00  3e-17
 3:  5.0000e-01  4.

100%|██████████| 3/3 [00:00<00:00, 48.68it/s]


67.50167348980904
     pcost       dcost       gap    pres   dres
 0:  5.0000e-01 -1.5000e+00  4e+00  7e-01  1e+00
 1:  5.0000e-01 -3.1250e-02  5e-01  1e-16  2e-16
 2:  5.0000e-01  4.9469e-01  5e-03  0e+00  3e-17
 3:  5.0000e-01  4.9995e-01  5e-05  1e-16  1e-18
 4:  5.0000e-01  5.0000e-01  5e-07  0e+00  3e-20
 5:  5.0000e-01  5.0000e-01  5e-09  2e-16  2e-22
Optimal solution found.
67.17355877161026
     pcost       dcost       gap    pres   dres
 0:  5.0000e-01 -1.5000e+00  4e+00  7e-01  1e+00
 1:  5.0000e-01 -3.1250e-02  5e-01  1e-16  2e-16
 2:  5.0000e-01  4.9469e-01  5e-03  0e+00  3e-17
 3:  5.0000e-01  4.9995e-01  5e-05  1e-16  1e-18
 4:  5.0000e-01  5.0000e-01  5e-07  0e+00  3e-20
 5:  5.0000e-01  5.0000e-01  5e-09  2e-16  2e-22
Optimal solution found.
65.57106006145477
     pcost       dcost       gap    pres   dres
 0:  5.0000e-01 -1.5000e+00  4e+00  7e-01  1e+00
 1:  5.0000e-01 -3.1250e-02  5e-01  1e-16  2e-16
 2:  5.0000e-01  4.9469e-01  5e-03  0e+00  3e-17
 3:  5.0000e-01  4.

  0%|          | 0/3 [00:00<?, ?it/s]

67.43449205160141
     pcost       dcost       gap    pres   dres

100%|██████████| 3/3 [00:00<00:00, 56.71it/s]



 0:  5.0000e-01 -1.5000e+00  4e+00  7e-01  1e+00
 1:  5.0000e-01 -3.1250e-02  5e-01  1e-16  2e-16
 2:  5.0000e-01  4.9469e-01  5e-03  0e+00  3e-17
 3:  5.0000e-01  4.9995e-01  5e-05  1e-16  1e-18
 4:  5.0000e-01  5.0000e-01  5e-07  0e+00  3e-20
 5:  5.0000e-01  5.0000e-01  5e-09  2e-16  2e-22
Optimal solution found.
67.1058779656887
     pcost       dcost       gap    pres   dres
 0:  5.0000e-01 -1.5000e+00  4e+00  7e-01  1e+00
 1:  5.0000e-01 -3.1250e-02  5e-01  1e-16  2e-16
 2:  5.0000e-01  4.9469e-01  5e-03  0e+00  3e-17
 3:  5.0000e-01  4.9995e-01  5e-05  1e-16  1e-18
 4:  5.0000e-01  5.0000e-01  5e-07  0e+00  3e-20
 5:  5.0000e-01  5.0000e-01  5e-09  2e-16  2e-22
Optimal solution found.
65.5012558400631
     pcost       dcost       gap    pres   dres
 0:  5.0000e-01 -1.5000e+00  4e+00  7e-01  1e+00
 1:  5.0000e-01 -3.1250e-02  5e-01  1e-16  2e-16
 2:  5.0000e-01  4.9469e-01  5e-03  0e+00  3e-17
 3:  5.0000e-01  4.9995e-01  5e-05  1e-16  1e-18
 4:  5.0000e-01  5.0000e-01  5e-07  0

100%|██████████| 3/3 [00:00<00:00, 66.67it/s]


67.48799046874046
     pcost       dcost       gap    pres   dres
 0:  5.0000e-01 -1.5000e+00  4e+00  7e-01  1e+00
 1:  5.0000e-01 -3.1250e-02  5e-01  1e-16  2e-16
 2:  5.0000e-01  4.9469e-01  5e-03  0e+00  3e-17
 3:  5.0000e-01  4.9995e-01  5e-05  1e-16  1e-18
 4:  5.0000e-01  5.0000e-01  5e-07  0e+00  3e-20
 5:  5.0000e-01  5.0000e-01  5e-09  2e-16  2e-22
Optimal solution found.
67.15888267755508
     pcost       dcost       gap    pres   dres
 0:  5.0000e-01 -1.5000e+00  4e+00  7e-01  1e+00
 1:  5.0000e-01 -3.1250e-02  5e-01  1e-16  2e-16
 2:  5.0000e-01  4.9469e-01  5e-03  0e+00  3e-17
 3:  5.0000e-01  4.9995e-01  5e-05  1e-16  1e-18
 4:  5.0000e-01  5.0000e-01  5e-07  0e+00  3e-20
 5:  5.0000e-01  5.0000e-01  5e-09  2e-16  2e-22
Optimal solution found.
65.55936521291733
     pcost       dcost       gap    pres   dres
 0:  5.0000e-01 -1.5000e+00  4e+00  7e-01  1e+00
 1:  5.0000e-01 -3.1250e-02  5e-01  1e-16  2e-16
 2:  5.0000e-01  4.9469e-01  5e-03  0e+00  3e-17
 3:  5.0000e-01  4.

  0%|          | 0/3 [00:00<?, ?it/s]

67.39762878417969
     pcost       dcost       gap    pres   dres
 0:  5.0000e-01 -1.5000e+00  4e+00  7e-01  1e+00
 1:  5.0000e-01 -3.1250e-02  5e-01  1e-16  2e-16


100%|██████████| 3/3 [00:00<00:00, 61.95it/s]

 2:  5.0000e-01  4.9469e-01  5e-03  0e+00  3e-17
 3:  5.0000e-01  4.9995e-01  5e-05  1e-16  1e-18
 4:  5.0000e-01  5.0000e-01  5e-07  0e+00  3e-20
 5:  5.0000e-01  5.0000e-01  5e-09  2e-16  2e-22
Optimal solution found.
67.06901469826698
     pcost       dcost       gap    pres   dres
 0:  5.0000e-01 -1.5000e+00  4e+00  7e-01  1e+00
 1:  5.0000e-01 -3.1250e-02  5e-01  1e-16  2e-16
 2:  5.0000e-01  4.9469e-01  5e-03  0e+00  3e-17
 3:  5.0000e-01  4.9995e-01  5e-05  1e-16  1e-18
 4:  5.0000e-01  5.0000e-01  5e-07  0e+00  3e-20
 5:  5.0000e-01  5.0000e-01  5e-09  2e-16  2e-22
Optimal solution found.
65.46118709445
     pcost       dcost       gap    pres   dres
 0:  5.0000e-01 -1.5000e+00  4e+00  7e-01  1e+00
 1:  5.0000e-01 -3.1250e-02  5e-01  1e-16  2e-16
 2:  5.0000e-01  4.9469e-01  5e-03  0e+00  3e-17
 3:  5.0000e-01  4.9995e-01  5e-05  1e-16  1e-18
 4:  5.0000e-01  5.0000e-01  5e-07  0e+00  3e-20
 5:  5.0000e-01  5.0000e-01  5e-09  2e-16  2e-22
Optimal solution found.





In [None]:
import pandas as pd
ans = np.array(ans)
ans_df = pd.DataFrame(ans, columns=['Strong', 'Weak', 'Accuracy'])
ans_df

Unnamed: 0,Strong,Weak,Accuracy
0,0.0,0.0,0.88504
1,0.0,1.0,0.85576
2,0.0,2.0,0.9336
3,1.0,0.0,0.88536
4,1.0,1.0,0.85624
5,1.0,2.0,0.9352
6,2.0,0.0,0.88424
7,2.0,1.0,0.8552
8,2.0,2.0,0.93344
9,3.0,0.0,0.88544


## Results for all models

In [None]:
ws_corr_df = pd.read_csv("./gdrive/MyDrive/ens_predictions/strong_weak_correlations.csv")
ws_corr_df = ws_corr_df.drop(columns=ws_corr_df.columns[:1])
ws_corr_df['correl'] = ws_corr_df['corr']
ws_corr_df = ws_corr_df.drop(columns=['corr'])
ws_corr_df.correl = 1 - ws_corr_df.correl / ws_corr_df.correl.max()
ws_corr_df

Unnamed: 0,strong,weak,correl
0,0.0,0.0,0.000523
1,0.0,1.0,0.005391
2,0.0,2.0,0.029176
3,1.0,0.0,0.0
4,1.0,1.0,0.004861
5,1.0,2.0,0.028601
6,2.0,0.0,0.000995
7,2.0,1.0,0.005863
8,2.0,2.0,0.029635
9,3.0,0.0,0.000203


In [None]:
strong_corr = np.genfromtxt("./gdrive/MyDrive/results_bert_1/correlation_rtd.csv", delimiter=",")
strong_corr

array([[1.00000000e+00, 4.64429027e-02, 2.99832934e-01, 3.77925146e-02,
        3.29906056e-01],
       [4.64429027e-02, 1.00000000e+00, 1.31877227e-01, 6.40578938e-02,
        7.35732133e-04],
       [2.99832934e-01, 1.31877227e-01, 1.00000000e+00, 4.11294023e-02,
        3.27347703e-01],
       [3.77925146e-02, 6.40578938e-02, 4.11294023e-02, 1.00000000e+00,
        0.00000000e+00],
       [3.29906056e-01, 7.35732133e-04, 3.27347703e-01, 0.00000000e+00,
        1.00000000e+00]])

In [None]:
weak_corr = np.zeros((3, 3))
for i in range(3):
    for j in range(3):
        weak_corr[i, j] = rtd.rtd(weak_attention_weights[i], weak_attention_weights[j])
np.savetxt("./gdrive/MyDrive/ens_predictions/weak_rtd_corr" + ".csv", weak_corr, delimiter=",")

In [None]:
weak_corr = np.genfromtxt("./gdrive/MyDrive/ens_predictions/weak_rtd_corr.csv", delimiter=",")
weak_corr /= weak_corr.max()
weak_corr = 1 - weak_corr
weak_corr

array([[1.        , 0.85944772, 0.06616384],
       [0.8670433 , 1.        , 0.        ],
       [0.0730456 , 0.08975833, 1.        ]])

In [None]:
total_corr_matrix = np.zeros((8, 8))
for i in range(8):
    for j in range(8):
        if i < 5 and j < 5:
            total_corr_matrix[i, j] = strong_corr[i, j]
        elif i >= 5 and j >= 5:
            total_corr_matrix[i, j] = weak_corr[i - 5, j - 5]
        else:
            if i >= 5:
                corr_coef = ws_corr_df[np.logical_and(ws_corr_df.strong == j, ws_corr_df.weak == i - 5)].values[0][2]
                total_corr_matrix[i, j] = corr_coef
            else:
                corr_coef = ws_corr_df[np.logical_and(ws_corr_df.strong == i, ws_corr_df.weak == j - 5)].values[0][2]
                total_corr_matrix[i, j] = corr_coef
total_corr_matrix

array([[1.00000000e+00, 4.64429027e-02, 2.99832934e-01, 3.77925146e-02,
        3.29906056e-01, 5.22743616e-04, 5.39097975e-03, 2.91757958e-02],
       [4.64429027e-02, 1.00000000e+00, 1.31877227e-01, 6.40578938e-02,
        7.35732133e-04, 0.00000000e+00, 4.86083828e-03, 2.86009713e-02],
       [2.99832934e-01, 1.31877227e-01, 1.00000000e+00, 4.11294023e-02,
        3.27347703e-01, 9.95255891e-04, 5.86349202e-03, 2.96350823e-02],
       [3.77925146e-02, 6.40578938e-02, 4.11294023e-02, 1.00000000e+00,
        0.00000000e+00, 2.02706398e-04, 5.07825650e-03, 2.87742241e-02],
       [3.29906056e-01, 7.35732133e-04, 3.27347703e-01, 0.00000000e+00,
        1.00000000e+00, 1.54136483e-03, 6.40960097e-03, 3.02286786e-02],
       [5.22743616e-04, 0.00000000e+00, 9.95255891e-04, 2.02706398e-04,
        1.54136483e-03, 1.00000000e+00, 8.59447718e-01, 6.61638371e-02],
       [5.39097975e-03, 4.86083828e-03, 5.86349202e-03, 5.07825650e-03,
        6.40960097e-03, 8.67043298e-01, 1.00000000e+00, 0.

In [None]:
def calculate_optimal_weights_output(corr_matrix):
    P = 2 * corr_matrix
    q = np.zeros_like(corr_matrix[:, :1])
    A = np.ones((1, corr_matrix.shape[0]))
    b = np.ones(1)
    lb = np.zeros(corr_matrix.shape[0])
    ub = np.ones(corr_matrix.shape[0])
    x_sol = solve_qp(P, q, A=A, b=b, lb=lb, ub=ub, initvals=np.array([0.125] * 8), solver='cvxopt', verbose=True) #, kktsolver='ldl', options={'kktreg':1e-3})
    return x_sol

def weighted_prediction(single_predictions, weights):
    weights = weights / weights.sum()
    return np.vstack((np.dot(weights, np.array(single_predictions)[:, :, 0]), np.dot(weights, np.array(single_predictions)[:, :, 1]))).T

ans = []
weights = calculate_optimal_weights_output(total_corr_matrix)
all_predictions = np.vstack([single_predictions, weak_single_predictions])
weighted_predictions = weighted_prediction(all_predictions, weights)
print(round(accuracy_score(np.argmax(weighted_predictions, axis=1), true_labels), 5))

     pcost       dcost       gap    pres   dres
 0:  2.0270e-01 -7.7973e+00  2e+01  9e-01  1e+00
 1:  1.9540e-01 -6.8771e-01  9e-01  1e-16  3e-16
 2:  1.9402e-01  1.6103e-01  3e-02  1e-16  2e-16
 3:  1.9337e-01  1.9183e-01  2e-03  2e-16  1e-16
 4:  1.9335e-01  1.9332e-01  3e-05  2e-16  9e-17
 5:  1.9335e-01  1.9334e-01  3e-07  2e-16  1e-16
 6:  1.9335e-01  1.9335e-01  3e-09  1e-16  1e-16
Optimal solution found.
0.9356


In [None]:
print(weights)

[0.10735386 0.16095129 0.08712146 0.170416   0.12382564 0.11747113
 0.07367309 0.15918752]
