In [1]:
"""
Evaluate trained models on the official CUB test set
"""
import os
import sys
import torch
import joblib
import argparse
import numpy as np
import pandas as pd
from sklearn.metrics import f1_score
from sklearn.metrics import precision_recall_curve
from scipy.special import softmax
sys.path.append('/home/mattyshen/interpretableDistillation')
from interpretDistill import fourierDistill
from interpretDistill import mo_figs
sys.path.append('/home/mattyshen/iCBM')

from CUB.dataset import load_data
from CUB.config import BASE_DIR, N_CLASSES, N_ATTRIBUTES, DEVICE, get_device, set_device
from analysis import AverageMeter, multiclass_metric, accuracy, binary_accuracy

from imodels import FIGSClassifierCV, FIGSRegressorCV, FIGSRegressor


class ARGS:
    def __init__(self, a_dict):
        for k in a_dict.keys():
            exec(f'self.{k} = a_dict["{k}"]')
            
parser_args = ['log_dir', 
               'model_dirs', 
               'model_dirs2', 
               'eval_data', 
               'use_attr', 
               'no_img', 
               'bottleneck', 
               'image_dir', 
               'n_class_attr', 
               'data_dir', 
               'n_attributes', 
               'attribute_group',
               'feature_group_results', 
               'use_relu', 
               'use_sigmoid', 
               'use_gbsm', 
               'expand_gbsm_dim', 
               'gpu']
parser_sigmoid = ['/home/mattyshen/iCBM/CUB/eval/JointSigmoidModels/outputs', 
               ['/home/mattyshen/iCBM/CUB/best_models/Joint0.01SigmoidModel__Seed1/outputs/best_model_1.pth', '/home/mattyshen/iCBM/CUB/best_models/Joint0.01SigmoidModel__Seed2/outputs/best_model_2.pth', '/home/mattyshen/iCBM/CUB/best_models/Joint0.01SigmoidModel__Seed3/outputs/best_model_3.pth'],
               None,
               'test',
               True,
               False,
               False,
               'images',
               2,
               'CUB_processed/class_attr_data_10',
               112,
               None,
               False,
               False,
               True,
               False,
               False,
               2]
parser_gbsm = ['/home/mattyshen/iCBM/CUB/eval/JointGBSMModels/outputs', 
               ['/home/mattyshen/iCBM/CUB/best_models/Joint0.01GBSMModel__Seed1/outputs/best_model_1.pth', '/home/mattyshen/iCBM/CUB/best_models/Joint0.01GBSMModel__Seed2/outputs/best_model_2.pth', '/home/mattyshen/iCBM/CUB/best_models/Joint0.01GBSMModel__Seed3/outputs/best_model_3.pth'],
               None,
               'test',
               True,
               False,
               False,
               'images',
               2,
               'CUB_processed/class_attr_data_10',
               112,
               None,
               False,
               False,
               True,
               True,
               False,
               2]

In [2]:
args_dict = dict(zip(parser_args, parser_sigmoid))
torch.backends.cudnn.benchmark=True
args = ARGS(args_dict)

set_device(args.gpu)

args.three_class = (args.n_class_attr == 3)
args.batch_size = 16

print(args)

<__main__.ARGS object at 0x7fac046d17c0>


In [3]:
args.data_dir

'CUB_processed/class_attr_data_10'

In [18]:
args.model_dirs[0]

'/home/mattyshen/iCBM/CUB/best_models/Joint0.01SigmoidModel__Seed1/outputs/best_model_1.pth'

In [25]:
def get_FT_data(args, data='trainval', override_train = True, batch_size = 32):
    #TODO: loop over all models
    model = torch.load(args.model_dirs[0])
    model = model.to(get_device())
    model.eval()
    # print(model.first_model)
    # print(model.sec_model)
    # print(model.sec_model.expand_dim)
    with torch.no_grad():
        if data == 'test':
            test_dir = os.path.join(BASE_DIR, args.data_dir, 'test.pkl')
            print(test_dir)
            loader = load_data([test_dir], args.use_attr, args.no_img, batch_size, image_dir=args.image_dir,
                               n_class_attr=args.n_class_attr, override_train=override_train)
        else:
            train_dir = os.path.join(BASE_DIR, args.data_dir, 'train.pkl')
            print(train_dir)
            val_dir = os.path.join(BASE_DIR, args.data_dir, 'val.pkl')
            print(args.use_attr, args.no_img, batch_size, args.image_dir,args.n_class_attr)
            loader = load_data([train_dir, val_dir], args.use_attr, args.no_img, batch_size, image_dir=args.image_dir,
                               n_class_attr=args.n_class_attr, override_train=override_train)
        attrs_true = []
        attrs_hat = []
        labels_true = []
        labels_hat = []
        for data_idx, data in enumerate(loader):
            if data_idx % 50 == 0: 
                print(f'loading data index: {data_idx}...')
            inputs, labels, attr_labels = data
            attr_labels = torch.stack(attr_labels).t()

            inputs_var = torch.autograd.Variable(inputs).to(get_device())
            labels_var = torch.autograd.Variable(labels).to(get_device())
            #labels = labels.to(get_device()) if torch.cuda.is_available() else labels

            print(inputs_var.shape)
            outputs = model(inputs_var)
            print(len(outputs))
            class_outputs = outputs[0]
            print(type(class_outputs))
            
            attr_outputs = [torch.nn.Sigmoid()(o) for o in outputs[1:]]
            attr_outputs_sigmoid = attr_outputs

            attrs_hat.append(torch.stack(attr_outputs).squeeze(2).detach().cpu().numpy())
            attrs_true.append(attr_labels.T)
            labels_hat.append(class_outputs.detach().cpu().numpy())
            labels_true.append(labels)


        # X_train = pd.DataFrame(np.concatenate(attrs_hat, axis=0) > p_thresh, columns = [f'c{i}' for i in range(1, 113)]).astype(np.int64)
        X_hat = pd.DataFrame(np.concatenate(attrs_hat, axis=1).T, columns = [f'c{i}' for i in range(1, 113)])
        X = pd.DataFrame(np.concatenate(attrs_true, axis = 1).T, columns = [f'c{i}' for i in range(1, 113)])

        y = pd.Series(np.concatenate([l.numpy().reshape(-1, ) for l in labels_true]))
        y_hat = pd.DataFrame(np.concatenate(labels_hat, axis = 0))

        del attrs_hat
        del labels
        del labels_hat
        del loader
        del data
        del inputs
        # del labels
        # del attr_labels
        del outputs
        del class_outputs
        del attr_outputs
        del attr_outputs_sigmoid
        del inputs_var
        del labels_var
        del model
        torch.cuda.empty_cache()

        return X_hat, X, y_hat, y

In [26]:
args.use_attr, args.no_img, args.image_dir, args.n_class_attr, args.data_dir

(True, False, 'images', 2, 'CUB_processed/class_attr_data_10')

In [27]:
X_train_hat, X_train, y_train_hat, y_train = get_FT_data(args, override_train = True)

X_test_hat, X_test, y_test_hat, y_test = get_FT_data(args, data = 'test')

/home/mattyshen/iCBM/CUB/CUB_processed/class_attr_data_10/train.pkl
True False 32 images 2
loading data index: 0...
torch.Size([32, 3, 299, 299])
113
<class 'torch.Tensor'>
torch.Size([32, 3, 299, 299])
113
<class 'torch.Tensor'>
torch.Size([32, 3, 299, 299])
113
<class 'torch.Tensor'>
torch.Size([32, 3, 299, 299])
113
<class 'torch.Tensor'>
torch.Size([32, 3, 299, 299])
113
<class 'torch.Tensor'>
torch.Size([32, 3, 299, 299])
113
<class 'torch.Tensor'>
torch.Size([32, 3, 299, 299])
113
<class 'torch.Tensor'>
torch.Size([32, 3, 299, 299])
113
<class 'torch.Tensor'>
torch.Size([32, 3, 299, 299])
113
<class 'torch.Tensor'>
torch.Size([32, 3, 299, 299])
113
<class 'torch.Tensor'>
torch.Size([32, 3, 299, 299])
113
<class 'torch.Tensor'>
torch.Size([32, 3, 299, 299])


KeyboardInterrupt: 

In [10]:
X_train_hat

Unnamed: 0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,...,c103,c104,c105,c106,c107,c108,c109,c110,c111,c112
0,0.000029,0.000049,0.997168,0.002838,0.001316,0.997925,0.997404,0.070798,0.000639,0.000169,...,0.000181,0.000144,0.003451,0.998567,0.008230,0.000151,0.002673,0.000009,0.002072,0.996889
1,0.000127,0.000002,0.000452,0.998915,0.999547,0.000025,0.000003,0.995172,0.000221,0.000145,...,0.000041,0.003336,0.000543,0.000009,0.001520,0.000108,0.065626,0.000092,0.000608,0.000702
2,0.000298,0.000297,0.006749,0.996593,0.002590,0.002136,0.000303,0.996558,0.995859,0.000817,...,0.000150,0.000055,0.000132,0.000264,0.991023,0.000146,0.000867,0.000848,0.000850,0.998815
3,0.999575,0.000173,0.000532,0.000563,0.000342,0.000943,0.000027,0.004145,0.999393,0.000163,...,0.000018,0.000656,0.000053,0.000009,0.998703,0.000081,0.997077,0.000083,0.000556,0.002653
4,0.000348,0.000191,0.991590,0.010792,0.269908,0.002203,0.000559,0.004344,0.002673,0.001327,...,0.000348,0.000264,0.000388,0.000867,0.894389,0.000974,0.014078,0.000202,0.002349,0.002375
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5979,0.000608,0.000457,0.002528,0.009372,0.004716,0.002445,0.000050,0.980387,0.017051,0.002176,...,0.000265,0.000819,0.002930,0.000084,0.981107,0.002328,0.955899,0.000504,0.002906,0.005129
5980,0.000370,0.000215,0.074480,0.996344,0.003308,0.998139,0.000445,0.006045,0.001480,0.000161,...,0.000126,0.001359,0.005574,0.000653,0.004495,0.000172,0.002586,0.000093,0.005962,0.002283
5981,0.000559,0.000039,0.003472,0.001571,0.997409,0.000278,0.000051,0.995713,0.995884,0.999127,...,0.000085,0.997997,0.000627,0.000072,0.002126,0.000085,0.001449,0.999766,0.000739,0.001646
5982,0.000396,0.000026,0.001485,0.999044,0.001498,0.001283,0.000260,0.998458,0.000801,0.000231,...,0.999396,0.001065,0.000278,0.000035,0.002833,0.000043,0.003661,0.000085,0.002528,0.998108


In [8]:
np.mean(y_test_hat.idxmax(axis = 1)== y_test.astype(int))

0.7747670003451846

In [None]:
np.mean(y_train_hat1.idxmax(axis = 1)== y_train1.astype(int))

In [None]:
np.mean(y_test_hat1.idxmax(axis = 1)== y_test1.astype(int))

In [None]:
def find_optimal_threshold(y_true, y_probs):
    precisions, recalls, thresholds = precision_recall_curve(y_true, y_probs)
    f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
    optimal_idx = np.argmax(f1_scores)
    optimal_threshold = thresholds[optimal_idx]
    return optimal_threshold

optimal_thresholds = []

for class_idx in range(112):
    y_true_class = X_train.iloc[:, class_idx]
    y_probs_class = X_train_hat.iloc[:, class_idx]
    optimal_thresholds.append(find_optimal_threshold(y_true_class, y_probs_class))
    
optimal_thresholds = np.array(optimal_thresholds)

y_train_probs_hat = pd.DataFrame(softmax(y_train_hat, axis = 1))

In [None]:

sys.path.append('/home/mattyshen/interpretableDistillation')
from interpretDistill import fourierDistill
from interpretDistill.mo_figs import FIGSHydraRegressor
sys.path.append('/home/mattyshen/iCBM')

In [None]:
figs_r = FIGSHydraRegressor(max_trees = 3, max_rules = 10, max_depth = 2)

In [None]:
figs_r.fit(X_train_hat, softmax(y_train_hat, axis = 1))

In [None]:
np.mean(np.argmax(figs_r.predict(X_train_hat), axis = 1) == y_train)

In [None]:
np.mean(np.argmax(figs_r.predict(X_test_hat), axis = 1) == y_test)

In [None]:
figs_r_no_sm = mo_figs.FIGSRegressor(max_trees = 5, max_rules = 20, max_depth = 4)

In [None]:
figs_r_no_sm.fit(X_train_hat, y_train_hat)

In [None]:
np.mean(np.argmax(figs_r_no_sm.predict(X_train_hat), axis = 1) == y_train)

In [None]:
np.mean(np.argmax(figs_r_no_sm.predict(X_test_hat), axis = 1) == y_test)

In [None]:
figs_c = mo_figs.FIGSClassifier(max_trees = 30, max_rules = 90, max_depth = 4)

In [None]:
figs_c.fit(X_train_hat, pd.DataFrame(y_train_hat.idxmax(axis = 1)))

In [None]:
y_train.values

In [None]:
np.mean(figs_c.predict(X_train_hat).reshape(-1, ) == y_train.values)

In [None]:
np.mean(figs_c.predict(X_test_hat).reshape(-1, ) == y_test.values)

In [None]:
np.arange(1, 10

In [None]:
np.mean(np.mean(t == (X_hat > optimal_thresholds).astype(int)))

In [None]:
X_train.shape, y_train.shape, y_train_hat.shape

In [None]:
y_hat_probs = pd.DataFrame(softmax(y_train_hat, axis = 1))

In [None]:
figs_models = []
for c in range(200):
    if c%25 == 0:
        print(c)
    figs = FIGSRegressor(max_trees = 3, max_rules = 10)
    figs.fit(X_train, y_hat_probs[c])
    figs_models.append(figs)

In [None]:
figs_models[123]

In [None]:
preds = [f.predict(X_train) for f in figs_models]

In [None]:
np.mean(np.argmax(np.array(preds).T, axis = 1) == y_train)

In [None]:
np.mean(y_train_hat.idxmax(axis = 1) == y_train)

In [None]:
X_test, y_test, y_test_hat = get_FT_data(args, data = 'test')

In [None]:
test_preds = [f.predict(X_test) for f in figs_models]

In [None]:
np.mean(np.argmax(np.array(test_preds).T, axis = 1) == y_test)

In [None]:
np.mean(y_test_hat.idxmax(axis = 1) == y_test)

In [None]:
figs_models2 = []
for i in range(145, 200):
    if i%25 == 0:
        print(f'training class {i}')
    figs_i = FIGSClassifierCV(n_rules_list = [20, 20], n_trees_list = [5, 10])
    figs_i.fit(X_train, np.where(y_train_hat == i, 1, 0))
    figs_models.append(figs_i)
    #np.where(y_train_hat == 46, 1, 0)

In [None]:
np.sum(np.where(y_train_hat == 147, 1, 0))

In [None]:
figs_i = FIGSClassifierCV(n_rules_list = [20, 20], n_trees_list = [5, 10])
figs_i.fit(X_train, np.where(y_train_hat == i, 1, 0))

In [None]:
figs_models = []
for i in range(0, 200):
    if i%25 == 0:
        print(f'training class {i}')
    figs_i = FIGSClassifierCV(n_rules_list = [20, 20], n_trees_list = [5, 10])
    if np.sum(np.where(y_train_hat == i, 1, 0)) > 0:
        figs_i.fit(X_train, np.where(y_train_hat == i, 1, 0))
    figs_models.append(figs_i)
    #np.where(y_train_hat == 46, 1, 0)

In [None]:
len(figs_models)

In [None]:
predictions = []
for i in figs_models:
    if hasattr(i, 'figs'):
        print(i.predict_proba(X_train)[:, 0].shape)
        predictions.append(i.predict_proba(X_train)[:, 0])
    else:
        print(np.zeros((X_train.shape[0], )).shape)
        predictions.append(np.zeros((X_train.shape[0], )))

In [None]:
np.argmax(np.vstack((predictions)).T, axis = 1).shape

In [None]:
np.unique(np.vstack((predictions)))

In [None]:
len(np.argmax(np.vstack((predictions)).T, axis = 1))

In [None]:
np.mean(np.argmax(np.vstack((predictions)).T, axis = 1) == y_train)

In [None]:
np.concatenate(predictions, axis = 1)

In [None]:
figs_models[i].predict_proba(X_train).shape, figs_models[i].predict_proba(X_train)

In [None]:
np.max(y_train_hat)

In [None]:
np.mean(y_train_hat == y_train)

In [None]:
figs = FIGSClassifierCV()

In [None]:
figs.fit(X_train, y_train)

In [None]:
figs_distill = FIGSClassifierCV()
figs_distill.fit(X_train, y_train_hat)

In [None]:
np.mean(figs_distill.predict(X_train))

In [None]:
np.mean(figs.predict(X_train) == y_train), np.mean(figs_distill.predict(X_train) == y_train), np.mean(figs_distill.predict(X_train) == y_train_hat)

In [None]:
ftd = fourierDistill.FTDistillClassifierCV(pre_interaction='l0l2', 
                             pre_max_features=50,
                             post_interaction='l0l2', 
                             post_max_features=20,
                             size_interactions=3,  
                             cv=3)

In [None]:
ftd.fit(X_train, y_train)

In [None]:
np.mean(ftd.predict(X_train) == y_train)

In [None]:
X_test, y_test = get_FT_data(args, data='test')

In [None]:
y_test = pd.Series(np.concatenate([l.numpy().reshape(-1, ) for l in y_test]))

In [None]:
np.min(np.sum(ftd.post_sparsity_model.coef_ != 0, axis = 1))

In [None]:
self.data.extend(pickle.load(open(file_path, 'rb')))

In [None]:
np.mean(ftd.predict(X_train) == y_train), np.mean(ftd.predict(X_test) == y_test)

In [None]:
import pickle
data = pickle.load(open('/home/mattyshen/iCBM/CUB/CUB_processed/class_attr_data_10/train.pkl', 'rb'))

In [None]:
input_file = "input.txt"  # Replace with the path to your original file
output_file = "filtered_output.txt"  # The file to save the filtered lines

# List of numbers to filter
numbers_to_keep =     [1, 4, 6, 7, 10, 14, 15, 20, 21, 23, 25, 29, 30, 35, 36, 38, 40, 44, 45, 50, 51, 53, 54, 56, 57, 59, 63, 64, 69, 70, 72, 75, 80, 84, 90, 91, 
                       93, 99, 101, 106, 110, 111, 116, 117, 119, 125, 126, 131, 132, 134, 145, 149, 151, 152, 153, 157, 158, 163, 164, 168, 172, 178, 179, 181, 
                       183, 187, 188, 193, 194, 196, 198, 202, 203, 208, 209, 211, 212, 213, 218, 220, 221, 225, 235, 236, 238, 239, 240, 242, 243, 244, 249, 253, 
                       254, 259, 260, 262, 268, 274, 277, 283, 289, 292, 293, 294, 298, 299, 304, 305, 308, 309, 310, 311]

# Read and filter the lines
with open('/home/mattyshen/iCBM/CUB/CUB_200_2011/attributes/attributes.txt', "r") as infile, open('/home/mattyshen/iCBM/CUB/CUB_200_2011/attributes/filtered_attributes.txt', "w") as outfile:
    i=1
    for line in infile:
        # Extract the number at the start of the line
        line_number = int(line.split()[0])
        # If the number is in the list, write it to the output file
        if line_number in numbers_to_keep:
            print(i)
            renumbered_line = f"{i} {' '.join(line.split()[1:])}\n"
            outfile.write(renumbered_line)
            i= i+1 

print(f"Filtered lines saved to {output_file}")

In [None]:
with open('/home/mattyshen/iCBM/CUB/CUB_200_2011/attributes/image_attribute_labels.txt', 'r') as f:
    for i, line in enumerate(f):
        if i < 313:
            file_idx, attribute_idx, attribute_label, attribute_certainty = line.strip().split()[:4]
            print(file_idx, attribute_idx, attribute_label, attribute_certainty)
        
        # attribute_label = int(attribute_label)
        # attribute_certainty = int(attribute_certainty)
        # uncertain_label = uncertainty_map[attribute_label][attribute_certainty]
        # attribute_labels_all[int(file_idx)].append(attribute_label)
        # attribute_uncertain_labels_all[int(file_idx)].append(uncertain_label)
        # attribute_certainties_all[int(file_idx)].append(attribute_certainty)

In [None]:
K = [1, 3, 5] #top k class accuracies to compute

def eval(args):
    """
    Run inference using model (and model2 if bottleneck)
    Returns: (for notebook analysis)
    all_class_labels: flattened list of class labels for each image
    topk_class_outputs: array of top k class ids predicted for each image. Shape = size of test set * max(K)
    all_class_outputs: array of all logit outputs for class prediction, shape = N_TEST * N_CLASS
    all_attr_labels: flattened list of labels for each attribute for each image (length = N_ATTRIBUTES * N_TEST)
    all_attr_outputs: flatted list of attribute logits (after ReLU/ Sigmoid respectively) predicted for each attribute for each image (length = N_ATTRIBUTES * N_TEST)
    all_attr_outputs_sigmoid: flatted list of attribute logits predicted (after Sigmoid) for each attribute for each image (length = N_ATTRIBUTES * N_TEST)
    wrong_idx: image ids where the model got the wrong class prediction (to compare with other models)
    """
    if args.model_dir:
        model = torch.load(args.model_dir)
        
    else:
        model = None

    if not hasattr(model, 'use_relu'):
        if args.use_relu:
            model.use_relu = True
        else:
            model.use_relu = False
    if not hasattr(model, 'use_sigmoid'):
        if args.use_sigmoid:
            model.use_sigmoid = True
        else:
            model.use_sigmoid = False
    if not hasattr(model, 'cy_fc'):
        model.cy_fc = None
    model = model.to(get_device())
    model.eval()

    if args.model_dir2:
        if 'rf' in args.model_dir2:
            model2 = joblib.load(args.model_dir2)
        else:
            model2 = torch.load(args.model_dir2)
        if not hasattr(model2, 'use_relu'):
            if args.use_relu:
                model2.use_relu = True
            else:
                model2.use_relu = False
        if not hasattr(model2, 'use_sigmoid'):
            if args.use_sigmoid:
                model2.use_sigmoid = True
            else:
                model2.use_sigmoid = False
        model2 = model2.to(get_device())
        model2.eval()
    else:
        model2 = None

    if args.use_attr:
        attr_acc_meter = [AverageMeter()]
        if args.feature_group_results:  # compute acc for each feature individually in addition to the overall accuracy
            for _ in range(args.n_attributes):
                attr_acc_meter.append(AverageMeter())
    else:
        attr_acc_meter = None

    class_acc_meter = []
    for j in range(len(K)):
        class_acc_meter.append(AverageMeter())
    if args.eval_data == 'trainval':
        train_dir = data_dir = os.path.join(BASE_DIR, args.data_dir, 'train.pkl')
        val_dir = data_dir = os.path.join(BASE_DIR, args.data_dir, 'val.pkl')
        loader = load_data([train_dir, val_dir], args.use_attr, args.no_img, args.batch_size, image_dir=args.image_dir,
                           n_class_attr=args.n_class_attr)
    else:
        data_dir = os.path.join(BASE_DIR, args.data_dir, args.eval_data + '.pkl')
        loader = load_data([data_dir], args.use_attr, args.no_img, args.batch_size, image_dir=args.image_dir,
                           n_class_attr=args.n_class_attr)
    all_outputs, all_targets = [], []
    all_attr_labels, all_attr_outputs, all_attr_outputs_sigmoid, all_attr_outputs2 = [], [], [], []
    all_class_labels, all_class_outputs, all_class_logits = [], [], []
    topk_class_labels, topk_class_outputs = [], []

    for data_idx, data in enumerate(loader):
        if args.use_attr:
            if args.no_img:  # A -> Y
                inputs, labels = data
                if isinstance(inputs, list):
                    inputs = torch.stack(inputs).t().float()
                inputs = inputs.float()
                # inputs = torch.flatten(inputs, start_dim=1).float()
            else:
                inputs, labels, attr_labels = data
                attr_labels = torch.stack(attr_labels).t()  # N x 312
        else:  # simple finetune
            inputs, labels = data

        inputs_var = torch.autograd.Variable(inputs).to(get_device())
        labels_var = torch.autograd.Variable(labels).to(get_device())
        labels = labels.to(get_device()) if torch.cuda.is_available() else labels

        if args.attribute_group:
            outputs = []
            f = open(args.attribute_group, 'r')
            for line in f:
                attr_model = torch.load(line.strip())
                outputs.extend(attr_model(inputs_var))
        else:
            outputs = model(inputs_var)
        if args.use_attr:
            if args.no_img:  # A -> Y
                class_outputs = outputs
            else:
                if args.bottleneck:
                    if args.use_relu:
                        attr_outputs = [torch.nn.ReLU()(o) for o in outputs]
                        attr_outputs_sigmoid = [torch.nn.Sigmoid()(o) for o in outputs]
                    elif args.use_sigmoid:
                        attr_outputs = [torch.nn.Sigmoid()(o) for o in outputs]
                        attr_outputs_sigmoid = attr_outputs
                    else:
                        attr_outputs = outputs
                        attr_outputs_sigmoid = [torch.nn.Sigmoid()(o) for o in outputs]
                    if model2:
                        stage2_inputs = torch.cat(attr_outputs, dim=1)
                        class_outputs = model2(stage2_inputs)
                    else:  # for debugging bottleneck performance without running stage 2
                        class_outputs = torch.zeros([inputs.size(0), N_CLASSES],
                                                    dtype=torch.float64).to(get_device())  # ignore this
                else:  # cotraining, end2end
                    if args.use_relu:
                        attr_outputs = [torch.nn.ReLU()(o) for o in outputs[1:]]
                        attr_outputs_sigmoid = [torch.nn.Sigmoid()(o) for o in outputs[1:]]
                    elif args.use_sigmoid:
                        attr_outputs = [torch.nn.Sigmoid()(o) for o in outputs[1:]]
                        attr_outputs_sigmoid = attr_outputs
                    else:
                        attr_outputs = outputs[1:]
                        attr_outputs_sigmoid = [torch.nn.Sigmoid()(o) for o in outputs[1:]]

                    class_outputs = outputs[0]

                for i in range(args.n_attributes):
                    acc = binary_accuracy(attr_outputs_sigmoid[i].squeeze(), attr_labels[:, i])
                    acc = acc.data.cpu().numpy()
                    # acc = accuracy(attr_outputs_sigmoid[i], attr_labels[:, i], topk=(1,))
                    attr_acc_meter[0].update(acc, inputs.size(0))
                    if args.feature_group_results:  # keep track of accuracy of individual attributes
                        attr_acc_meter[i + 1].update(acc, inputs.size(0))

                attr_outputs = torch.cat([o.unsqueeze(1) for o in attr_outputs], dim=1)
                attr_outputs_sigmoid = torch.cat([o for o in attr_outputs_sigmoid], dim=1)
                all_attr_outputs.extend(list(attr_outputs.flatten().data.cpu().numpy()))
                all_attr_outputs_sigmoid.extend(list(attr_outputs_sigmoid.flatten().data.cpu().numpy()))
                all_attr_labels.extend(list(attr_labels.flatten().data.cpu().numpy()))
        else:
            class_outputs = outputs[0]

        _, topk_preds = class_outputs.topk(max(K), 1, True, True)
        _, preds = class_outputs.topk(1, 1, True, True)
        all_class_outputs.extend(list(preds.detach().cpu().numpy().flatten()))
        all_class_labels.extend(list(labels.data.cpu().numpy()))
        all_class_logits.extend(class_outputs.detach().cpu().numpy())
        topk_class_outputs.extend(topk_preds.detach().cpu().numpy())
        topk_class_labels.extend(labels.view(-1, 1).expand_as(preds))

        np.set_printoptions(threshold=sys.maxsize)
        class_acc = accuracy(class_outputs, labels, topk=K)  # only class prediction accuracy
        for m in range(len(class_acc_meter)):
            class_acc_meter[m].update(class_acc[m], inputs.size(0))

    all_class_logits = np.vstack(all_class_logits)
    topk_class_outputs = np.vstack([tco if isinstance(tco, np.ndarray) else tco.cpu() for tco in topk_class_outputs])
    topk_class_labels = np.vstack([tcl if isinstance(tcl, np.ndarray) else tcl.cpu() for tcl in topk_class_labels])
    wrong_idx = np.where(np.sum(topk_class_outputs == topk_class_labels, axis=1) == 0)[0]

    for j in range(len(K)):
        print('Average top %d class accuracy: %.5f' % (K[j], class_acc_meter[j].avg))

    if args.use_attr and not args.no_img:  # print some metrics for attribute prediction performance
        print('Average attribute accuracy: %.5f' % attr_acc_meter[0].avg)
        all_attr_outputs_int = np.array(all_attr_outputs_sigmoid) >= 0.5
        if args.feature_group_results:
            n = len(all_attr_labels)
            all_attr_acc, all_attr_f1 = [], []
            for i in range(args.n_attributes):
                acc_meter = attr_acc_meter[1 + i]
                attr_acc = float(acc_meter.avg)
                attr_preds = [all_attr_outputs_int[j] for j in range(n) if j % args.n_attributes == i]
                attr_labels = [all_attr_labels[j] for j in range(n) if j % args.n_attributes == i]
                attr_f1 = f1_score(attr_labels, attr_preds)
                all_attr_acc.append(attr_acc)
                all_attr_f1.append(attr_f1)

            '''
            fig, axs = plt.subplots(1, 2, figsize=(20,10))
            for plt_id, values in enumerate([all_attr_acc, all_attr_f1]):
                axs[plt_id].set_xticks(np.arange(0, 1.1, 0.1))
                if plt_id == 0:
                    axs[plt_id].hist(np.array(values)/100.0, bins=np.arange(0, 1.1, 0.1), rwidth=0.8)
                    axs[plt_id].set_title("Attribute accuracies distribution")
                else:
                    axs[plt_id].hist(values, bins=np.arange(0, 1.1, 0.1), rwidth=0.8)
                    axs[plt_id].set_title("Attribute F1 scores distribution")
            plt.savefig('/'.join(args.model_dir.split('/')[:-1]) + '.png')
            '''
            bins = np.arange(0, 1.01, 0.1)
            acc_bin_ids = np.digitize(np.array(all_attr_acc) / 100.0, bins)
            acc_counts_per_bin = [np.sum(acc_bin_ids == (i + 1)) for i in range(len(bins))]
            f1_bin_ids = np.digitize(np.array(all_attr_f1), bins)
            f1_counts_per_bin = [np.sum(f1_bin_ids == (i + 1)) for i in range(len(bins))]
            print("Accuracy bins:")
            print(acc_counts_per_bin)
            print("F1 bins:")
            print(f1_counts_per_bin)
            np.savetxt(os.path.join(args.log_dir, 'concepts.txt'), f1_counts_per_bin)

        balanced_acc, report = multiclass_metric(all_attr_outputs_int, all_attr_labels)
        f1 = f1_score(all_attr_labels, all_attr_outputs_int)
        print("Total 1's predicted:", sum(np.array(all_attr_outputs_sigmoid) >= 0.5) / len(all_attr_outputs_sigmoid))
        print('Avg attribute balanced acc: %.5f' % (balanced_acc))
        print("Avg attribute F1 score: %.5f" % f1)
        print(report + '\n')
    return class_acc_meter, attr_acc_meter, all_class_labels, topk_class_outputs, all_class_logits, all_attr_labels, all_attr_outputs, all_attr_outputs_sigmoid, wrong_idx, all_attr_outputs2

In [None]:

y_results, c_results = [], []
for i, model_dir in enumerate(args.model_dirs):
    args.model_dir = model_dir
    args.model_dir2 = args.model_dirs2[i] if args.model_dirs2 else None
    result = eval(args)
    class_acc_meter, attr_acc_meter = result[0], result[1]
    y_results.append(1 - class_acc_meter[0].avg[0].item() / 100.)
    if attr_acc_meter is not None:
        c_results.append(1 - attr_acc_meter[0].avg.item() / 100.)
    else:
        c_results.append(-1)
values = (np.mean(y_results), np.std(y_results), np.mean(c_results), np.std(c_results))
output_string = '%.4f %.4f %.4f %.4f' % values
print_string = 'Error of y: %.4f +- %.4f, Error of C: %.4f +- %.4f' % values
print(print_string)
f = open(os.path.join(args.log_dir, 'results.txt'), "a")
f.write(output_string)
f.close()

In [None]:
f = open(os.path.join(args.log_dir, 'results.txt'), "a")
f.write(output_string)
f.close()

In [None]:
output_string

In [None]:
os.path.join(args.log_dir, 'results.txt')

In [None]:
args.data_dir

In [None]:
def sig(x):
    return 1/(1+np.exp(x))

In [None]:
file = open(os.path.join(args.log_dir, 'results.txt'), "r")
content = file.read()
print(content)
file.close()