In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('/usr0/home/naveenr/projects/spurious_concepts/ConceptBottleneck/')
sys.path.append('/usr0/home/naveenr/projects/spurious_concepts')

In [3]:
import torch
from sklearn.metrics import roc_auc_score
from sklearn.neural_network import MLPClassifier
import torch.nn as nn
import torch.optim as optim
import pickle
import matplotlib.pyplot as plt
import torch.nn.functional as F
from PIL import Image
from captum.attr import visualization as viz
from matplotlib.colors import LinearSegmentedColormap
import cv2
from copy import copy 
import itertools
import json
import argparse 
import secrets
import subprocess
import shutil 
from torch.nn.utils import prune
import resource 

In [4]:
from src.images import *
from src.util import *
from src.models import *
from src.plot import *

In [5]:
torch.set_num_threads(1)

In [76]:
is_jupyter = 'ipykernel' in sys.modules
if is_jupyter:
    encoder_model='inceptionv3'
    seed = 43
    dataset_name = 'CUB'
    num_concept_combinations = 200
else:
    parser = argparse.ArgumentParser(description="Synthetic Dataset Experiments")


    parser.add_argument('--encoder_model', type=str, default='inceptionv3', help='Encoder model')
    parser.add_argument('--seed', type=int, default=42, help='Random seed')
    parser.add_argument('--dataset_name', type=str, default='CUB', help='Number of concept combinations')
    parser.add_argument('--num_concept_combinations', type=int, default=100, help='Random seed')

    args = parser.parse_args()
    encoder_model = args.encoder_model 
    seed = args.seed 
    dataset_name = args.dataset_name 
    num_concept_combinations = args.num_concept_combinations

parameters = {
    'seed': seed, 
    'encoder_model': encoder_model ,
    'debugging': False,
    'dataset_name': dataset_name,
    'num_concept_combinations': num_concept_combinations,
}
print(parameters)
torch.cuda.set_per_process_memory_fraction(0.5)
resource.setrlimit(resource.RLIMIT_AS, (20 * 1024 * 1024 * 1024, -1))


{'seed': 43, 'encoder_model': 'inceptionv3', 'debugging': False, 'dataset_name': 'CUB', 'num_concept_combinations': 200}


In [77]:
np.random.seed(seed)
torch.manual_seed(seed)
random.seed(seed)

In [78]:
train_loader, val_loader, test_loader, train_pkl, val_pkl, test_pkl = get_data(1,encoder_model=encoder_model,dataset_name=dataset_name)

In [79]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [14]:
def generate_random_combinations(K):
    all_combos = list(set([str(i['attribute_label']) for i in train_pkl]))
    random.seed(seed)    
    # Generate all possible combinations
    random.shuffle(all_combos)
    all_combos = [eval(i) for i in all_combos]

    return all_combos[:K]

In [15]:
random_combinations = generate_random_combinations(num_concept_combinations)
formatted_combinations = []
for r in random_combinations:
    formatted_combinations.append(str(int("".join([str(i) for i in r]),2)))

In [31]:
if dataset_name == "CUB":
    epochs = 100
    command_to_run = "python train_cbm.py -dataset CUB --encoder_model inceptionv3 --pretrained -epochs {} -num_attributes 112 -num_classes 200 -seed {} --attr_loss_weight 0.01 --optimizer adam --scheduler_step 100 -lr 0.005 --concept_restriction {}".format(epochs,seed," ".join(formatted_combinations))
elif dataset_name == "coco":
    epochs = 25
    command_to_run = "python train_cbm.py -dataset coco --encoder_model inceptionv3 --pretrained -epochs {} -num_attributes 10 -num_classes 2 -seed {} --attr_loss_weight 0.1 --optimizer adam --scheduler_step 100 -lr 0.005 --concept_restriction {}".format(epochs,seed," ".join(formatted_combinations))

In [32]:
subprocess.run("cd ../../ConceptBottleneck && {}".format(command_to_run),shell=True)

Files to delete are []
Namespace(adversarial_epsilon=0.01, adversarial_weight=0.25, attr_loss_weight=0.01, batch_size=32, bottleneck=False, ckpt='0', concept_restriction=[489434840942257862547355994169602, 20604902816464749788475737524264, 82417754025159411000739688890512, 20760269268464950908855439884288, 30904563943319113684455092652065, 731454243580909305755854349289601, 10460603154798514420549683513376, 2678707308143309534777359169882152, 407245753770509458926955160703040, 324518554867355428701590198976512, 649037726286876266790630683855368, 20604902816464173336528117057576, 669642011342243446517486512818216, 515082171979654095527797410766882, 489352823749794443838504306753796, 824016296030621995351173006106880, 406936308578464778494410333636609, 171212139289391367582460917385472, 669642010124481654523558334056520, 813951844719847969791594442262530, 669638296303146277227239087294504, 2760953143537429435283906260111362, 345118496100528533257654219325448, 2060490281646417333656301366

CompletedProcess(args='cd ../../ConceptBottleneck && python train_cbm.py -dataset CUB --encoder_model inceptionv3 --pretrained -epochs 10 -num_attributes 112 -num_classes 200 -seed 42 --attr_loss_weight 0.01 --optimizer adam --scheduler_step 100 -lr 0.005 --concept_restriction 489434840942257862547355994169602 20604902816464749788475737524264 82417754025159411000739688890512 20760269268464950908855439884288 30904563943319113684455092652065 731454243580909305755854349289601 10460603154798514420549683513376 2678707308143309534777359169882152 407245753770509458926955160703040 324518554867355428701590198976512 649037726286876266790630683855368 20604902816464173336528117057576 669642011342243446517486512818216 515082171979654095527797410766882 489352823749794443838504306753796 824016296030621995351173006106880 406936308578464778494410333636609 171212139289391367582460917385472 669642010124481654523558334056520 813951844719847969791594442262530 669638296303146277227239087294504 2760953143537

In [15]:
def get_most_recent_file(directory):
    files = [os.path.join(directory, f) for f in os.listdir(directory) if os.path.isfile(os.path.join(directory, f))]
    if not files:
        return None

    most_recent_file = max(files, key=os.path.getmtime)
    
    return most_recent_file


In [16]:
most_recent_data = get_most_recent_file("../../models/model_data/")
rand_name = most_recent_data.split("/")[-1].replace(".json","")
results_file = "../../results/correlation/{}.json".format(rand_name)
delete_same_dict(parameters,"../../results/correlation")

In [25]:
seed

42

In [80]:
rand_name = get_name_matching_parameters({'seed': seed, 'dataset': dataset_name})#, 'concept_restrictions': str([int(i) for i in formatted_combinations])})
temp = [json.load(open("../../models/model_data/{}.json".format(i))) for i in rand_name]
rand_name = [rand_name[i] for i in range(len(rand_name)) if 'concept_restriction' in temp[i] and len(temp[i]['concept_restriction']) == num_concept_combinations]
rand_name = rand_name[-1]
results_file = "../../results/correlation/{}.json".format(rand_name)


In [81]:
joint_location = "../../models/{}/{}/joint/best_model_{}.pth".format(dataset_name,rand_name,seed)
joint_model = torch.load(joint_location,map_location='cpu')

if 'encoder_model' in parameters and 'mlp' in parameters['encoder_model']:
    joint_model.encoder_model = True

r = joint_model.eval()

In [82]:
joint_model = joint_model.to(device)

In [83]:
torch.cuda.empty_cache()

## Compute Accuracy

In [84]:
concept_acc = get_concept_accuracy_by_concept(joint_model,run_joint_model,test_loader)
locality_intervention = 1-torch.mean(concept_acc).detach().numpy()

In [85]:
final_data = {
    'parameters': parameters,  
    'locality_intervention': locality_intervention,
}

In [86]:
final_data

{'parameters': {'seed': 43,
  'encoder_model': 'inceptionv3',
  'debugging': False,
  'dataset_name': 'CUB',
  'num_concept_combinations': 200},
 'locality_intervention': 0.12926393747329712}

In [87]:
json.dump(final_data,open(results_file,"w"))