In [3]:
import sys
sys.path.append('../')

from dataset import*
from synthetic_concept_model import *
from synthetic_coop_model import *
from torch.utils.data import DataLoader
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pickle
import torch
import numpy
import random

random.seed(7)
numpy.random.seed(seed=7)
torch.manual_seed(7)

<torch._C.Generator at 0x7f68cbbe36d0>

In [4]:
# !nvidia-smi
torch.cuda.is_available()

True

## Data generation

In [5]:
feature_dim_info = dict()
label_dim_info = dict()
transform_dim = 100000

intersections = get_intersections(num_modalities=2)

feature_dim_info['12'] = 10
feature_dim_info['1'] = 6
feature_dim_info['2'] = 6

label_dim_info['12'] = 10
label_dim_info['1'] = 6
label_dim_info['2'] = 6
num_concepts = 1
transforms_2concept = None
transforms_2hd = None
num_data = 10000
noise=0.3
pos_prob=0.5
# total_data, total_labels, total_concepts, total_raw_features = generate_data_concepts(num_data, num_concepts,
                                                                                     #  feature_dim_info,
                                                                                     #  label_dim_info,
                                                                                     #  transform_dim=transform_dim,
                                                                                     # noise=noise,
                                                                                     # pos_prob=pos_prob)

# synth_data_dict = {'total_data':total_data, 'total_labels':total_labels, 'total_concepts':total_concepts, 'total_raw_features':total_raw_features}
# synth_data_file_name = '../synth_data/'+'synth_data_exp2_'+str(noise)+'_'+str(pos_prob)+'.pkl'
# pickle.dump(synth_data_dict, open(synth_data_file_name, 'wb'))

In [6]:
# Data splitting & loading
synth_data_dict = pickle.load(open('../synth_data/'+'synth_data_exp2_'+str(noise)+'_'+str(pos_prob)+'.pkl', 'rb'))
total_data = synth_data_dict['total_data']
total_labels = synth_data_dict['total_labels']
total_concepts = synth_data_dict['total_concepts']
total_raw_features = synth_data_dict['total_raw_features']

dataset = MultiConcept(total_data, total_labels, total_concepts, 0)
batch_size = 100
trainval_dataset, test_dataset = torch.utils.data.random_split(dataset,  
                                                            [int(0.8 * num_data), num_data - int(0.8 * num_data)])
train_dataset, val_dataset = torch.utils.data.random_split(trainval_dataset,
                                                           [int(0.8 * len(trainval_dataset)), len(trainval_dataset) - int(0.8 * len(trainval_dataset))])

train_loader = DataLoader(train_dataset, shuffle=True, drop_last=True,
                          batch_size=batch_size)
val_loader = DataLoader(val_dataset, shuffle=True, batch_size=batch_size, drop_last=True)
test_loader = DataLoader(test_dataset, shuffle=False, drop_last=False)

In [5]:
# transform_dim = 101024

## Experiment 2

## Scenario 1
Pretrain on a large dataset $D_L:=\{x_i,c_i\}$ by minimizing InfoNCE_CLUB between $(Z_{\bar{c}},c|x)$ and fine-tune on small dataset $D_S:=\{x_j,y_j\}$ with supervised learning $x \rightarrow Z_{\bar{c}} \rightarrow y$

In [None]:
class ConceptCLSUP_Pretrain(nn.Module):
    def __init__(self, x_dim, hidden_dim, embed_dim, layers=2, activation='relu', lr=1e-4):
        super(ConceptCLSUP_Pretrain, self).__init__()
        self.critic_hidden_dim = 512
        self.critic_layers = 1
        self.critic_activation = 'relu'
        self.lr = lr

        # encoders
        self.backbone = mlp(x_dim, hidden_dim, embed_dim, layers, activation)
        self.linears_infonce = mlp(embed_dim, embed_dim, embed_dim, 1, activation) 

        # critics
        concept_dim = 1
        self.club_critic = CLUBInfoNCECritic(embed_dim + x_dim, concept_dim, self.critic_hidden_dim, self.critic_layers, self.critic_activation)

    def forward(self, x, c):
        # compute embedding
        z = self.linears_infonce(self.backbone(x))
        # compute critic scores
        club_infonce_score = self.club_critic(torch.cat([z, x], dim=-1), c)
        return club_infonce_score

    def get_embedding(self, x):
        return self.backbone(x)
    
    def get_backbone(self):
        return self.backbone
    

def train_concept_informed_Pretrain_model(concept_encoder, model, train_loader,val_loader, num_epochs, device, lr, log_interval,
                          save_interval, save_path):

    concept_encoder.eval()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    best_val_err = torch.tensor(1e7)
    tepoch = tqdm(range(num_epochs))
    for epoch in tepoch:
        tepoch.set_description(f"Epoch {epoch}")
        model.train()
        for batch_idx, (data, concept, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            c, z_c = concept_encoder(data)
            loss = model(data, c) #z_c
            loss.backward()
            optimizer.step()
        tepoch.set_postfix(loss=loss.item())
        if epoch % save_interval == 0:
            val_err = 0
            model.eval()
            with torch.no_grad():
                for batch_idx, (data, concept, target) in enumerate(val_loader):
                    data, target = data.to(device), target.to(device)
                    c , z_c = concept_encoder(data)
                    output = model(data, c) #z_c
                    val_err += output
                val_err = val_err / len(val_loader)
            if val_err < best_val_err:
                best_val_err = val_err

            else:
                print('Val loss did not improve')
                torch.save(model.state_dict(), os.path.join(save_path, 'concept_informed_model.pth'))
                return model
    return model


In [9]:
# models
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
hidden_dim = 512
embed_dim = 50 
concept_encoder = ConceptEncoder(transform_dim, embed_dim,hidden_dim, critic_hidden_dim=512, critic_layers=1, layers=1).to(device)
model = ConceptCLSUP_Pretrain(transform_dim, hidden_dim, embed_dim).to(device)

In [None]:
# train concpet encoder
trained_concept_encoder = train_concept_encoder(concept_encoder, train_loader,val_loader, transform_dim, device, 1e-5, 1e-5, 25, 3, '../trained_models')
# train concept informed model
trained_concept_informed_Pretrain_model = train_concept_informed_Pretrain_model(trained_concept_encoder, model, train_loader, val_loader, 100, device, 1e-5, 25, 3, '../trained_models')
        

In [17]:
import torch.nn as nn

#New test split
num_data = len(test_dataset)
batch_size = 100
new_trainval_dataset, new_test_dataset = torch.utils.data.random_split(test_dataset,  
                                                            [int(0.5 * num_data), num_data - int(0.5 * num_data)])
new_train_dataset, new_val_dataset = torch.utils.data.random_split(new_trainval_dataset,
                                                           [int(0.8 * len(new_trainval_dataset)), len(new_trainval_dataset) - int(0.8 * len(new_trainval_dataset))])

new_train_loader = DataLoader(new_train_dataset, shuffle=True, drop_last=True,
                          batch_size=batch_size)
new_val_loader = DataLoader(new_val_dataset, shuffle=True, batch_size=batch_size, drop_last=True)
new_test_loader = DataLoader(new_test_dataset, shuffle=False, drop_last=False)

#Train Final Model

backbone = trained_concept_informed_Pretrain_model.get_backbone()
new_model = nn.Sequential(backbone, mlp(50, 256, 1, 1, activation= 'relu'))

In [19]:
from baselines import*

final_model = mlp_train(new_model, new_train_loader, new_val_loader, 1000, 1e-5, 1e-5,'cuda', 100, 100)

test_embeds = torch.stack([sample[0] for sample in  new_test_dataset]).detach().cpu().numpy()
test_concepts = torch.tensor([sample[1].item() for sample in  new_test_dataset]).unsqueeze(1)
test_labels = np.array([sample[-1].item() for sample in  new_test_dataset])
    
out = final_model(torch.tensor(test_embeds).to(device))
predictions = torch.sigmoid(out).round().detach().cpu().numpy()
        
accuracy = accuracy_score(test_labels, predictions)
precision = precision_score(test_labels, predictions)
recall = recall_score(test_labels, predictions)
f1 = f1_score(test_labels, predictions)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

Epoch 200:  20%|██        | 200/1000 [01:42<06:51,  1.95it/s, loss=0.000801]


Val loss did not improve
Accuracy: 0.694
Precision: 0.6555023923444976
Recall: 0.8203592814371258
F1-score: 0.728723404255319


## Scenario 2
Pretrain on a large dataset $D_L:=\{x_i,c_i\}$ by both minimizing InfoNCE_CLUB between $(Z_{\bar{c}},c|x)$ and maximizing 
InfoNCE$(c,Z_c|x)$.
Proceed to fine-tune on small dataset $D_S:=\{x_j,y_j\}$ with supervised learning $x \rightarrow Z_{\bar{c}}, Z_c \rightarrow y$

In [7]:
class ConceptCLSUP_Pretrain_Sc2(nn.Module):
    def __init__(self, x_dim, hidden_dim, embed_dim, layers=2, activation='relu', lr=1e-4, concept_dim=1):
        super(ConceptCLSUP_Pretrain, self).__init__()
        self.critic_hidden_dim = 512
        self.critic_layers = 1
        self.critic_activation = 'relu'
        self.lr = lr

        # encoders
        self.backbone = mlp(x_dim, hidden_dim, embed_dim, layers, activation)
        self.linears_club = mlp(embed_dim, embed_dim, embed_dim, 1, activation) 
        self.linears_infonce = mlp(embed_dim, embed_dim, embed_dim, 1, activation) 

        # critics
        self.club_critic = CLUBInfoNCECritic(embed_dim + x_dim, concept_dim, self.critic_hidden_dim, self.critic_layers, self.critic_activation)
        self.nce_critic = InfoNCECritic(embed_dim + x_dim, concept_dim, self.critic_hidden_dim, self.critic_layers, self.critic_activation)

    def forward(self, x, c):
        # compute embedding
        z_comp = self.linears_club(self.backbone(x))
        z_c = self.linears_infonce(self.backbone(x))
        # compute critic scores
        club_infonce_score = self.club_critic(torch.cat([z_comp, x], dim=-1), c)
        infonce_score = self.nce_critic(torch.cat([z_c, x], dim=-1), c)
        
        return  club_infonce_score + infonce_score

    def get_embedding(self, x):
        #what should the embedding be?
        return self.backbone(x)
    
    def get_backbone(self):
        return self.backbone
    

def train_concept_informed_Pretrain_model(concept_encoder, model, train_loader,val_loader, num_epochs, device, lr, log_interval,
                          save_interval, save_path):

    concept_encoder.eval()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    best_val_err = torch.tensor(1e7)
    tepoch = tqdm(range(num_epochs))
    for epoch in tepoch:
        tepoch.set_description(f"Epoch {epoch}")
        model.train()
        for batch_idx, (data, concept, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            c, z_c = concept_encoder(data)
            loss = model(data, c) #z_c
            loss.backward()
            optimizer.step()
        tepoch.set_postfix(loss=loss.item())
        if epoch % save_interval == 0:
            val_err = 0
            model.eval()
            with torch.no_grad():
                for batch_idx, (data, concept, target) in enumerate(val_loader):
                    data, target = data.to(device), target.to(device)
                    c , z_c = concept_encoder(data)
                    output = model(data, c) #z_c
                    val_err += output
                val_err = val_err / len(val_loader)
            if val_err < best_val_err:
                best_val_err = val_err

            else:
                print('Val loss did not improve')
                torch.save(model.state_dict(), os.path.join(save_path, 'concept_informed_model.pth'))
                return model
    return model


## Baseline (Pre-Training with Concepts $x \rightarrow c_1, x \rightarrow y$)

In [52]:
# train_loader = DataLoader(train_dataset, shuffle=True, drop_last=True,
#                           batch_size=batch_size)
# val_loader = DataLoader(val_dataset, shuffle=True, batch_size=batch_size, drop_last=True)
# test_loader = DataLoader(test_dataset, shuffle=False, drop_last=False)

backbone = mlp(transform_dim, 512, 1, layers=3, activation='relu')
trained_backbone = mlp_train_c(backbone, train_loader, val_loader, 1000, 1e-5, 1e-5,'cuda', 100, 100)
FC = mlp(1, 256, 1, 1, activation= 'relu')
model = nn.Sequential(trained_backbone, FC)
trained_model = mlp_train(model, new_train_loader, new_val_loader, 1000, 1e-5, 1e-5,'cuda', 100, 100)

test_embeds = torch.stack([sample[0] for sample in  new_test_dataset]).detach().cpu().numpy()
out = trained_model(torch.tensor(test_embeds).to(device))
predictions = torch.sigmoid(out).round().detach().cpu().numpy()
        
accuracy = accuracy_score(test_labels, predictions)
precision = precision_score(test_labels, predictions)
recall = recall_score(test_labels, predictions)
f1 = f1_score(test_labels, predictions)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1-score:", f1)

Epoch 200:  20%|██        | 200/1000 [32:32<2:10:10,  9.76s/it, loss=0.00038] 


Val loss did not improve


Epoch 200:  20%|██        | 200/1000 [03:54<15:37,  1.17s/it, loss=2.81e-6]

Val loss did not improve





Accuracy: 0.904
Precision: 0.8802919708029197
Recall: 0.9773095623987034
F1-score: 0.9262672811059907
