In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
import torch
import json
import argparse 
import subprocess

In [3]:
from locality.images import *
from locality.util import *
from locality.models import *

In [4]:
is_jupyter = 'ipykernel' in sys.modules
if is_jupyter:
    encoder_model='inceptionv3'
    seed = 43
    dataset_name = 'CUB'
    num_concept_combinations = 200
else:
    parser = argparse.ArgumentParser(description="Synthetic Dataset Experiments")


    parser.add_argument('--encoder_model', type=str, default='inceptionv3', help='Encoder model')
    parser.add_argument('--seed', type=int, default=42, help='Random seed')
    parser.add_argument('--dataset_name', type=str, default='CUB', help='Number of concept combinations')
    parser.add_argument('--num_concept_combinations', type=int, default=100, help='Random seed')

    args = parser.parse_args()
    encoder_model = args.encoder_model 
    seed = args.seed 
    dataset_name = args.dataset_name 
    num_concept_combinations = args.num_concept_combinations

parameters = {
    'seed': seed, 
    'encoder_model': encoder_model ,
    'debugging': False,
    'dataset_name': dataset_name,
    'num_concept_combinations': num_concept_combinations,
}
print(parameters)

{'seed': 43, 'encoder_model': 'inceptionv3', 'debugging': False, 'dataset_name': 'CUB', 'num_concept_combinations': 200}


In [5]:
np.random.seed(seed)
torch.manual_seed(seed)
random.seed(seed)

In [6]:
train_loader, val_loader, test_loader, train_pkl, val_pkl, test_pkl = get_data(1,encoder_model=encoder_model,dataset_name=dataset_name)

In [7]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [8]:
def generate_random_combinations(K):
    all_combos = list(set([str(i['attribute_label']) for i in train_pkl]))
    random.seed(seed)    
    # Generate all possible combinations
    random.shuffle(all_combos)
    all_combos = [eval(i) for i in all_combos]

    return all_combos[:K]

In [9]:
random_combinations = generate_random_combinations(num_concept_combinations)
formatted_combinations = []
for r in random_combinations:
    formatted_combinations.append(str(int("".join([str(i) for i in r]),2)))

In [10]:
if dataset_name == "CUB":
    epochs = 100
    command_to_run = "python train_cbm.py -dataset CUB --encoder_model inceptionv3 --pretrained -epochs {} -num_attributes 112 -num_classes 200 -seed {} --attr_loss_weight 0.01 --optimizer adam --scheduler_step 100 -lr 0.005 --concept_restriction {}".format(epochs,seed," ".join(formatted_combinations))
elif dataset_name == "coco":
    epochs = 25
    command_to_run = "python train_cbm.py -dataset coco --encoder_model inceptionv3 --pretrained -epochs {} -num_attributes 10 -num_classes 2 -seed {} --attr_loss_weight 0.1 --optimizer adam --scheduler_step 100 -lr 0.005 --concept_restriction {}".format(epochs,seed," ".join(formatted_combinations))

In [11]:
subprocess.run("cd locality/cbm_variants/ConceptBottleneck && {}".format(command_to_run),shell=True)

Files to delete are []
Namespace(adversarial_epsilon=0.01, adversarial_weight=0.25, attr_loss_weight=0.01, batch_size=32, bottleneck=False, ckpt='0', concept_restriction=[772643582985149452814108718565441, 520219592880232881077925112315938, 20760269268464950908855439884288, 507476893959608173921852499951666, 20604902816464173336528117057576, 2627054854946746030807546027477028, 20601807956632996543808133300264, 2631227343844988606910318593, 2627049903205478354654734573799456, 816449386968930925476007714525184, 669639563106603117427022825424897, 494466138099959256318865357938946, 1287457643220284571457972865024, 10460603154798514420549683513376, 791658343188053979922447699822657, 20604902816464173336563013666849, 2698830798403343773693333279475752, 669636507092933265713535882905888, 569197595870814443879052031821952, 1318679117450171656929900207818792, 649047320323397925401975331506176, 345121590941182300847249926672897, 344804059321409990098318952368641, 76218982702653787327607445156876

CompletedProcess(args='cd ../../../locality/cbm_variants/ConceptBottleneck && python train_cbm.py -dataset CUB --encoder_model inceptionv3 --pretrained -epochs 100 -num_attributes 112 -num_classes 200 -seed 43 --attr_loss_weight 0.01 --optimizer adam --scheduler_step 100 -lr 0.005 --concept_restriction 772643582985149452814108718565441 520219592880232881077925112315938 20760269268464950908855439884288 507476893959608173921852499951666 20604902816464173336528117057576 2627054854946746030807546027477028 20601807956632996543808133300264 2631227343844988606910318593 2627049903205478354654734573799456 816449386968930925476007714525184 669639563106603117427022825424897 494466138099959256318865357938946 1287457643220284571457972865024 10460603154798514420549683513376 791658343188053979922447699822657 20604902816464173336563013666849 2698830798403343773693333279475752 669636507092933265713535882905888 569197595870814443879052031821952 1318679117450171656929900207818792 649047320323397925401975

In [12]:
def get_most_recent_file(directory):
    files = [os.path.join(directory, f) for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
    if not files:
        return None

    most_recent_file = max(files, key=os.path.getmtime)
    
    return most_recent_file


In [13]:
most_recent_data = get_most_recent_file("../../../models/model_data/")
rand_name = most_recent_data.split("/")[-1].replace(".json","")
results_file = "../../../results/correlation/{}.json".format(rand_name)
delete_same_dict(parameters,"../../../results/correlation")

In [14]:
seed

43

In [15]:
rand_name = get_name_matching_parameters({'seed': seed, 'dataset': dataset_name})#, 'concept_restrictions': str([int(i) for i in formatted_combinations])})
temp = [json.load(open("../../../models/model_data/{}.json".format(i))) for i in rand_name]
rand_name = [rand_name[i] for i in range(len(rand_name)) if 'concept_restriction' in temp[i] and len(temp[i]['concept_restriction']) == num_concept_combinations]
rand_name = rand_name[-1]
results_file = "../../../results/correlation/{}.json".format(rand_name)


IndexError: list index out of range

In [81]:
joint_location = "../../../models/{}/{}/joint/best_model_{}.pth".format(dataset_name,rand_name,seed)
joint_model = torch.load(joint_location,map_location='cpu')

if 'encoder_model' in parameters and 'mlp' in parameters['encoder_model']:
    joint_model.encoder_model = True

r = joint_model.eval()

In [82]:
joint_model = joint_model.to(device)

In [83]:
torch.cuda.empty_cache()

## Compute Accuracy

In [84]:
concept_acc = get_concept_accuracy_by_concept(joint_model,run_joint_model,test_loader)
locality_intervention = 1-torch.mean(concept_acc).detach().numpy()

In [85]:
final_data = {
    'parameters': parameters,  
    'locality_intervention': locality_intervention,
}

In [86]:
final_data

{'parameters': {'seed': 43,
  'encoder_model': 'inceptionv3',
  'debugging': False,
  'dataset_name': 'CUB',
  'num_concept_combinations': 200},
 'locality_intervention': 0.12926393747329712}

In [87]:
json.dump(final_data,open(results_file,"w"))