In [5]:
import sys

import pandas as pd
import numpy as np
# Add your desired directory to PYTHONPATH
sys.path.append("../../lsm/")


from sklearn.metrics import r2_score
norm_values = pd.read_csv('./norm_values.csv')
mini = np.array(norm_values.iloc[0, :])
maxi = np.array(norm_values.iloc[1, :])


In [2]:
def back_calc_smape(method):
    unknown_gt = pd.read_csv(f'../../results/{method}/unknown_data/GT_feats.csv', index_col=0)
    unknown_pred = pd.read_csv(f'../../results/{method}/unknown_data/Y_feats.csv', index_col=0)
    known_gt = pd.read_csv(f'../../results/{method}/known_data/GT_feats.csv', index_col=0)
    known_pred = pd.read_csv(f'../../results/{method}/known_data/Y_feats.csv', index_col=0)
    casmi_gt = pd.read_csv(f'../../results/{method}/casmi_data/GT_feats.csv', index_col=0)
    casmi_pred = pd.read_csv(f'../../results/{method}/casmi_data/Y_feats.csv', index_col=0)
    
    
    # # only report on columns where there are more than 3 unique properties
    n_unique_unk= unknown_gt.nunique()
    n_unique_unk = n_unique_unk[n_unique_unk > 2].index
    n_unique_kn= known_gt.nunique()
    n_unique_kn = n_unique_kn[n_unique_kn > 2].index
    n_unique_casmi= casmi_gt.nunique()
    n_unique_casmi = n_unique_casmi[n_unique_casmi > 2].index


    unknown_gt = unknown_gt[n_unique_unk]
    unknown_pred = unknown_pred[n_unique_unk]
    known_gt = known_gt[n_unique_kn]
    known_pred = known_pred[n_unique_kn]
    casmi_gt = casmi_gt[n_unique_casmi]
    casmi_pred = casmi_pred[n_unique_casmi]

    print(f'number of continuous properties unknown: {len(n_unique_unk)}, known: {len(n_unique_kn)}, casmi: {len(n_unique_casmi)}')

    unknown_r2, known_r2, casmi_r2 = [], [], []
    for i in range(len(n_unique_unk)):
        unknown_r2.append(r2_score(unknown_gt.iloc[:, i], unknown_pred.iloc[:, i]))
    for i in range(len(n_unique_kn)):
        known_r2.append(r2_score(known_gt.iloc[:, i], known_pred.iloc[:, i]))
    for i in range(len(n_unique_casmi)):    
        casmi_r2.append(r2_score(casmi_gt.iloc[:, i], casmi_pred.iloc[:, i]))
    
    unknown_r2 = np.clip(unknown_r2, 0, 1)
    known_r2 = np.clip(known_r2, 0, 1)
    casmi_r2 = np.clip(casmi_r2, 0, 1)
              
    return np.mean(unknown_r2), np.mean(known_r2), np.mean(casmi_r2)

In [3]:
methods = [
    'ft_lsm_100', 'ms2prop', 'supervised_100', 'cosine_similarity', 
    'finetune_1', 'finetune_5', 'finetune_10', 'finetune_25', 'finetune_50',
    'fixed_embed_1', 'fixed_embed_5', 'fixed_embed_10', 'fixed_embed_25', 'fixed_embed_50', 'fixed_embed_100'
]

df= pd.DataFrame(columns=['method', 'r2_known', 'r2_unknown', 'r2_casmi'])
for method in methods:
    r2_unknown, r2_known, r2_casmi = back_calc_smape(method)
    df = pd.concat([df, pd.DataFrame([[method, r2_known, r2_unknown, r2_casmi]], columns=['method', 'r2_known','r2_unknown', 'r2_casmi'])], axis=0)

number of continuous properties unknown: 164, known: 174, casmi: 157
number of continuous properties unknown: 164, known: 174, casmi: 157
number of continuous properties unknown: 164, known: 174, casmi: 157
number of continuous properties unknown: 164, known: 174, casmi: 157
number of continuous properties unknown: 164, known: 174, casmi: 157
number of continuous properties unknown: 164, known: 174, casmi: 157
number of continuous properties unknown: 164, known: 174, casmi: 157
number of continuous properties unknown: 164, known: 174, casmi: 157
number of continuous properties unknown: 164, known: 174, casmi: 157
number of continuous properties unknown: 164, known: 174, casmi: 157
number of continuous properties unknown: 164, known: 174, casmi: 157
number of continuous properties unknown: 164, known: 174, casmi: 157
number of continuous properties unknown: 164, known: 174, casmi: 157
number of continuous properties unknown: 164, known: 174, casmi: 157
number of continuous properties un

In [4]:
df

Unnamed: 0,method,r2_known,r2_unknown,r2_casmi
0,ft_lsm_100,0.953518,0.467931,0.341619
0,ms2prop,0.891553,0.39442,0.248644
0,supervised_100,0.908884,0.420996,0.332334
0,cosine_similarity,0.934449,0.194151,0.054866
0,finetune_1,0.239988,0.221027,0.088945
0,finetune_5,0.411661,0.330049,0.24633
0,finetune_10,0.581747,0.374717,0.283603
0,finetune_25,0.771446,0.400202,0.290227
0,finetune_50,0.881572,0.443008,0.348527
0,fixed_embed_1,0.179265,0.185644,0.066726
