In [1]:
!pip install grad-cam pytorch-gradcam torchvision==0.4.0 -f https://download.pytorch.org/whl/torch_stable.html "pillow<7"

Looking in links: https://download.pytorch.org/whl/torch_stable.html


In [2]:
from sys import path
path.append("/home/ec2-user/SageMaker/data-science-development/utils")
path.append("/home/ec2-user/SageMaker/data-science-development/config")

import pandas as pd
import numpy as np
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns

import os
import torch
import random
import json
import datetime
import time

from torch import nn
from torch.autograd import Variable
from torch.utils.data import TensorDataset, DataLoader, WeightedRandomSampler
from pytorch_grad_cam import GradCAM, ScoreCAM, GradCAMPlusPlus, AblationCAM, XGradCAM, EigenCAM
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget

from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight

from datetime import datetime
from collections import defaultdict, Counter
from tqdm import tqdm 

tqdm.pandas()

ModuleNotFoundError: No module named 'pytorch_grad_cam.utils.model_targets'

In [None]:
skills = pd.read_csv("../Data/skills_one-hot.csv").set_index("candidate_id")
skills.head()

In [None]:
skills = dict(zip(skills.index, skills.values))

In [None]:
certs = pd.read_csv("../Data/candidate_certificates_one-hot.csv").set_index("candidate_id")
certs.head()

In [None]:
certs = dict(zip(certs.index, certs.values))

In [None]:
licenses = pd.read_csv("../Data/licenses_one-hot.csv").set_index("candidate_id")
licenses.head()

In [None]:
licenses = dict(zip(licenses.index, licenses.values))

In [None]:
languages = pd.read_csv("../Data/languages_one-hot.csv").set_index("candidate_id")
languages.head()

In [None]:
languages = dict(zip(languages.index, languages.values))

In [None]:
addresses = pd.read_csv("../Data/addresses_one-hot.csv").set_index("candidate_id")
addresses.head()

In [None]:
addresses = dict(zip(addresses.index, addresses.values))

In [None]:
w2v = json.load(open("../Data/embeddings.json"))
# Convert to ints
w2v = {int(k):{int(k2):v2 for k2, v2 in v.items()} for k, v in w2v.items()}

In [None]:
df_pred = pd.read_csv("../Data/df_pred_ext.csv").drop("Unnamed: 0", axis=1)

In [None]:
df_pred["time_between"] = (df_pred["time_between"] - df_pred["time_between"].mean()) / df_pred["time_between"].std()
df_pred["time_spent"] = (df_pred["time_spent"] - df_pred["time_spent"].mean()) / df_pred["time_spent"].std()

In [None]:
df_pred.head()

In [None]:
career_paths = df_pred.groupby("candidate_id")

In [None]:
num_classes = len(df_pred["isco_code4"].unique())
num_features = len(career_paths.mean().columns)
num_classes, num_features

In [None]:
maximum_career_duration = 25

In [None]:
# Convert to 2d-arrays, grabbing the last 25 jobs of each candidate and getting rid of candidate_ids as values
career_paths = career_paths.progress_apply(lambda x: x.values[-(maximum_career_duration + 1):,1:])

In [None]:
# Drop careers that are only 1 job long
career_lens = career_paths.apply(len)
career_paths = career_paths.loc[(career_lens > 1)]

In [None]:
career_paths = career_paths.loc[career_paths.apply(lambda x: x[-1][-1] != x[-2][-1])]

In [None]:
career_paths.head()

In [None]:
idxs = []
x = []
y = []

candidate_lens = defaultdict(int)

# max_skills = len([col for col in df_pred if "skill_" in col])

for idx, career in zip(career_paths.index, career_paths.values):
    label = career[-1, -1]
    
    if not np.isnan(label):
        candidate_lens[idx] = len(career) - 1
        
        idxs.append(idx)
        x.append(career[:-1].reshape(len(career) - 1, num_features))
        y.append(label)

idxs = np.array(idxs)
x = np.array(x)
y = np.array(y)

In [None]:
to_fill = np.zeros([len(x), len(max(x, key = lambda x: len(x))), num_features])

for i,j in enumerate(x):
    if len(j):
        to_fill[i][-len(j):] = j

In [None]:
max_len = len(max(x, key = lambda x: len(x)))
max_len

In [None]:
del df_pred
del x

In [None]:
device = ("cuda:0" if torch.cuda.is_available() else "cpu")
device = "cpu"

In [None]:
len(to_fill), len(y)

In [None]:
to_fill = to_fill[:50000]
y = y[:50000]

In [None]:
# Train test split
split = 0.8
random.seed(42)

training = np.array(random.sample(range(len(to_fill)), int(split * len(to_fill))))
test = np.array(list(set(range(len(to_fill))) - set(training)))

train_indices, val_indices = idxs[training], idxs[test]
X_train, X_val = to_fill[training], to_fill[test]
y_train, y_val = y[training].astype(int), y[test].astype(int)

In [None]:
# Class weights
counts = np.bincount(y_train) + 1

# TODO: Change so that common classes get punished slightly more
labels_weights = 2. / (0.5 * np.sqrt(counts))
weights = labels_weights[y_train]
sampler = WeightedRandomSampler(weights, len(weights))

# Create dataloaders
train_data = TensorDataset(torch.Tensor(train_indices), 
                           torch.Tensor(X_train), 
                           torch.Tensor(y_train).type(torch.LongTensor))

trainloader = DataLoader(train_data, batch_size=512, sampler=sampler)

val_data = TensorDataset(torch.Tensor(val_indices),
                         torch.Tensor(X_val),
                         torch.Tensor(y_val).type(torch.LongTensor))

valloader = DataLoader(val_data, batch_size=512, shuffle=True)

In [None]:
# Feature maps per time step
# N = features (each feature is a 'time series' in the words of the authors), T = time steps

k1 = 25
f_maps = 100
N = 9

conv2d = nn.Conv2d(in_channels=1,
                   out_channels=f_maps,
                   kernel_size=(1, k1),
                   stride=(1, 2),
                   padding=(0, k1 // 2))

_1x1 = nn.Conv2d(in_channels=f_maps, 
                 out_channels=1, 
                 kernel_size=(1, 1), # Maybe (100, 1, 1)?
                 stride=1)

# Feature map for all features
# 84 = n_features
conv1d = nn.Conv1d(in_channels=1,
                   out_channels=1, 
                   kernel_size=(N, k1 // 2),
                   padding=(N // 2, k1 // 2 // 2 // 2)) # lol

for c, i, j in trainloader:
    
    i = i.unsqueeze(-1)
    i = i.transpose(1, 3)
    print(i.shape)
    x = conv2d(i) 
    print(x.shape)
    x = _1x1(x)
    print(x.shape)
    x = conv1d(x)
    
    x = x.flatten(start_dim=1)
    
    print(x.shape)
    
    break

In [None]:
class CNN(nn.Module):

    def __init__(self, num_classes, input_size, conv_size, 
                 skills, certs, licenses, languages, 
                 addresses, w2v, candidate_lengths, max_len, 
                 skill_embedding_size=50, certs_embedding_size=20,
                 license_embedding_size=3, language_embedding_size=10,
                 address_embedding_size=25, function_embedding_size=50, 
                 isco4_embedding_size=25, education_embedding_size=3, 
                 isco_level_embedding_size=3, company_embedding_size=50):
        
        super(CNN, self).__init__()
              
        self.num_classes = num_classes
        self.input_size = input_size + 300
        self.conv_size = conv_size
        
        # Static embeddings: skills, certificates, licenses, languages
        self.skill_embedding = nn.Linear(317, skill_embedding_size, bias=False)
        self.skill_embedding.weight.data = torch.randn_like(self.skill_embedding.weight) 
        
        self.certs_embedding = nn.Linear(98, certs_embedding_size, bias=False)
        self.certs_embedding.weight.data = torch.randn_like(self.certs_embedding.weight) 
        
        self.license_embedding = nn.Linear(8, license_embedding_size, bias=False)
        self.license_embedding.weight.data = torch.randn_like(self.license_embedding.weight) 
        
        self.language_embedding = nn.Linear(23, language_embedding_size, bias=False)
        self.language_embedding.weight.data = torch.randn_like(self.language_embedding.weight) 
        
        # Address embedding
        self.address_embedding = nn.Embedding(4757, address_embedding_size)       
        
        # Categorical feature embeddings
        self.function_embedding = nn.Embedding(2992, function_embedding_size)
        self.isco_code_embedding = nn.Embedding(num_classes, isco4_embedding_size)
        self.company_embedding = nn.Embedding(441153, company_embedding_size)
        self.source_embedding = nn.Embedding(2, 1)
        self.education_embedding = nn.Embedding(6, education_embedding_size)
        self.isco_level_embedding = nn.Embedding(5, isco_level_embedding_size)
        
        # Actual model
        
        # Feature maps per time step
#         self.conv2d = nn.Conv2d(in_channels=1,
#                                 out_channels=25,
#                                 kernel_size=(84, 1),
#                                 stride=(1, 2))
        
#         self._1x1 = nn.Conv3d(in_channels=, 
#                               out_channels=1, 
#                               kernel_size=(1, 1, 25))
        
#         # Feature map for all features
#         # 84 = n_features
#         self.conv1d = nn.Conv1d(in_channels=1,
#                                 out_channels=max_len, 
#                                 kernel_size=(84, max_len))
        
        
        k1 = max_len
        f_maps = 20
        N = 84
        
        # TODO: more than 1 conv2d
        self.conv2d = nn.Conv2d(in_channels=1,
                                out_channels=f_maps,
                                kernel_size=(1, k1),
                                stride=(1, 2),
                                padding=(0, k1 // 2))
     
        self._1x1 = nn.Conv2d(in_channels=f_maps, 
                              out_channels=1, 
                              kernel_size=(1, 1), # Maybe (100, 1, 1)?
                              stride=1)

        self.conv1d = nn.Conv1d(in_channels=1,
                                out_channels=1, 
                                kernel_size=(N, k1 // 2),
                                padding=(N // 2, k1 // 2 // 2 // 2)) # lol
        
        self.relu = nn.ReLU()
        
        # 109 = n_feautres + n_time_steps
        self.fc = nn.Linear(1088, num_classes)
        
        self.softmax = nn.LogSoftmax(dim=-1)

        
        # Skill lookup
        self.skill_keys = set(skills.keys())
        self.skills = np.vectorize(skills.get)
        
        # Certificate lookup
        self.certs_keys = set(certs.keys())
        self.certs = np.vectorize(certs.get)
        
        # License lookup
        self.license_keys = set(licenses.keys())
        self.licenses = np.vectorize(licenses.get)
        
        # Language lookup
        self.langs_keys = set(languages.keys())
        self.langs = np.vectorize(languages.get)
        
        # Address lookup
        self.address_keys = set(addresses.keys())
        self.adds = np.vectorize(addresses.get)
        
        # w2v lookup
        self.w2v_keys = set(w2v.keys())
        self.w2v = w2v
        
        # Career durations
        self.candidate_lengths = candidate_lengths
        self.max_len = max_len        
                
    def static_lookup(self, candidate):
        """Looks up a candidate's static features (skills, certificates)"""
         # Look up skills            
        if candidate.item() in self.skill_keys:
            skill_list = torch.LongTensor(self.skills(candidate.item())).to(device)
        else:
            skill_list = torch.LongTensor([0] * 317).to(device)

        # Look up certificates
        if candidate.item() in self.certs_keys:
            certs_list = torch.LongTensor(self.certs(candidate.item())).to(device)
        else:
            certs_list = torch.LongTensor([0] * 98).to(device)
            
        # Look up certificates
        if candidate.item() in self.license_keys:
            license_list = torch.LongTensor(self.licenses(candidate.item())).to(device)
        else:
            license_list = torch.LongTensor([0] * 8).to(device)
        
        # Look up certificates
        if candidate.item() in self.langs_keys:
            langs_list = torch.LongTensor(self.langs(candidate.item())).to(device)
        else:
            langs_list = torch.LongTensor([0] * 23).to(device)
            
        # Look up address
        if candidate.item() in self.address_keys:
            address = torch.LongTensor(self.adds(candidate.item())).to(device)
        else:
            address = torch.LongTensor([0]).to(device)
            
        return skill_list, certs_list, license_list, langs_list, address
    
    def w2v_lookup(self, candidate, career_duration):
        """Finds a candidate's CVs and converts them to a tensor of length career_duration"""
            
        # Look for cvs
        if candidate.item() in self.w2v_keys:
            cvs = self.w2v[candidate.item()]
                
            storage = []

             # If a candidate only has one CV, proceed as normal
            if len(cvs.keys()) == 1:
                w2v_list = torch.LongTensor(cvs[0]).to(device)
                w2v_list = torch.stack([w2v_list] * career_duration)
            else: # Otherwise, stack them accordingly
                ks = np.array(list(cvs.keys()))
                # Due to clipping, some careers are longer than max_len
                ks = np.array([k for k in ks if k <= self.max_len])

                # Find how many time steps (rows) each CV lasted
                durations = [ks[i+1] - ks[i]
                             if i < (len(ks) - 1) 
                             else career_duration - ks[i]
                             for i in range(len(ks))]

                embed_values = list(cvs.values())

                # When the CV got updated on the last timestep, aka our test value
                # Remove it from the list of durations, as it should be ignored
                if durations[-1] == 0: 
                    durations.pop()
                if durations[-1] == -1: # Sometimes contains -1 --> last location > (career duration)?
                    durations.pop()
                    durations[-1] -= 1
                    # In case the last one should be ignored completely
                    if durations[-1] == 0:
                        durations.pop()

                # Create Tensor(s)
                if durations:
                    for i, duration in enumerate(durations):
                        storage.append(torch.stack([torch.Tensor(embed_values[i])] * duration, dim=0))
                else:
                    w2v_list = torch.LongTensor(cvs[0]).to(device)

                # Combine stored tensors into a single tensor
                w2v_list = torch.cat((storage)).type(torch.LongTensor).to(device)
        else:
            w2v_list = torch.LongTensor([0] * 300).to(device)
            w2v_list = torch.stack([w2v_list] * career_duration)

        return w2v_list
 
    def forward(self, x):               
        # Default width of a row (filled with 0s)
        feature_width = torch.Tensor([0] * 408).type(torch.LongTensor).to(device)
        
        candidate_features = []
                
#         candidate, x = x
            
#         # For each candidate in the current batch
#         for c in candidate:
#             # Get career duration
#             career_duration = self.candidate_lengths[c.item()]            
            
#             # Get skills and certificates
#             skill_list, certs_list, license_list, langs_list, address = self.static_lookup(c)
            
#             # Get CV embeddings
#             w2v_list = self.w2v_lookup(c, career_duration)

#             # Only create zeros if needed (e.g. less than max_len career duration)
#             if (self.max_len - career_duration) > 0:
#                 zeros = torch.stack([feature_width] * (self.max_len - career_duration))                
#             else: # Reset zeros to prevent shape mismatch
#                 zeros = torch.LongTensor([]).to(device)
                
#             # Embed every static feature
#             skill_list, certs_list, license_list, langs_list = [self.skill_embedding(skill_list.type(torch.FloatTensor).to(device)),
#                                                                 self.certs_embedding(certs_list.type(torch.FloatTensor).to(device)),
#                                                                 self.license_embedding(license_list.type(torch.FloatTensor).to(device)),
#                                                                 self.language_embedding(langs_list.type(torch.FloatTensor).to(device))]
                
#             # Combine static features
#             static_features = torch.cat([skill_list, certs_list, 
#                                          license_list, langs_list], dim=-1).type(torch.FloatTensor).to(device)
            
#             # Embed address
#             address_emb = self.address_embedding(address)[0]
                        
#             # Broadcast and add static features
#             static_features = torch.stack([static_features] * career_duration).type(torch.LongTensor).to(device)
#             address_emb = torch.stack([address_emb] * career_duration).type(torch.LongTensor).to(device)
            
#             # Combine w2v, static features, and address
#             full_features = torch.cat([w2v_list, static_features, address_emb], dim=1)
                                    
#             # Broadcast CV, static, and address to the correct length
#             full_features = torch.cat([zeros, full_features], dim=0)
                    
#             # Store result
#             candidate_features.append(full_features)
                                
#         # Convert list of tensors to actual tensor
#         additional_features = torch.stack((candidate_features)).type(torch.FloatTensor).to(device)
        
        # isco_functie_niveau, education, function_id, isco_code4
        isco_level, source, education, company_name, function_id, isco_code = [x[:,:,-6],
                                                                               x[:,:,-5],
                                                                               x[:,:,-4],
                                                                               x[:,:,-3],
                                                                               x[:,:,-2],
                                                                               x[:,:,-1]]
        
        x = x[:,:,:-6].to(device)

        isco_level, source, education, company_name, function_id, isco_code  = [self.isco_level_embedding(isco_level.type(torch.LongTensor).to(device)),
                                                                                self.source_embedding(source.type(torch.LongTensor).to(device)),
                                                                                self.education_embedding(education.type(torch.LongTensor).to(device)),
                                                                                self.company_embedding(company_name.type(torch.LongTensor).to(device)),
                                                                                self.function_embedding(function_id.type(torch.LongTensor).to(device)),
                                                                                self.isco_code_embedding(isco_code.type(torch.LongTensor).to(device))]
                
        # Add features
        x = torch.cat([x, isco_level, source, education, company_name, function_id, isco_code], dim=2)

        # Reshape to allow conv2D
        x = x.unsqueeze(-1)
        x = x.transpose(1, 3)
        
        # K time steps, 1 feature at a time
        x = self.conv2d(x)

        # Reduce to 1 feature map
        x = self._1x1(x)
        
        # Patterns across all features
        x = self.conv1d(x)
        
        x = x.flatten(start_dim=1)
        
        out = self.fc(x)
               
        # softmax
        out = self.softmax(out)                        
        return out

In [None]:
def train_loop(model, trainloader, valloader, optimizer, scheduler, criterion, num_epochs):

    results = defaultdict(list)
    
    passed = [0]
    training_losses = [6]
    test_losses = [6]
    accuracy = [0]
    
    # Train the model
    for epoch in range(num_epochs):
        start = time.time()
        print("-------------------------------------------------------------------------------")
        print(f"Epoch starting at: {datetime.now().strftime('%H:%M:%S')}")
        
        training_loss = 0
        
        for i, (candidate, career, job) in enumerate(trainloader):
            
            candidate, career, job = candidate.to(device), career.to(device), job.to(device)
            optimizer.zero_grad()

            outputs = model(career)
                        
            # obtain the loss function
            loss = criterion(outputs, job)
            loss = loss.mean()           
            loss.backward()
            optimizer.step()
            
            training_loss += loss.item()
            
            print("Epoch: %d, batch: %d/%d, loss: %1.5f" % (epoch + 1, i + 1, len(trainloader), loss.item()), end="\r")
               
        training_loss /= len(trainloader)
                
        stats = test_loop(valloader, model, criterion)
        results["Epoch"].append(epoch + 1)
        results["Acc@1"].append(stats[0])
        results["Acc@5"].append(stats[1])
        results["Acc@10"].append(stats[2])
        results["Acc@20"].append(stats[3])
        results["test_loss"].append(stats[4])
        results["training_loss"].append(training_loss)

        scheduler.step()
        
        print(f"Epoch duration: {int((time.time() - start) // 60)}:{int((time.time() - start) % 60):02d}\n")
        
        passed.append(epoch + 1)
        training_losses.append(training_loss)
        test_losses.append(stats[4])
        accuracy.append(stats[0])
        
        plt.plot(passed, training_losses, label="Training Loss")
        plt.plot(passed, test_losses, label="Test Loss")
        plt.xlabel("Epoch")
        plt.ylabel("Average loss")
        plt.legend()
        plt.show()
                
    return results
        
def test_loop(dataloader, model, criterion):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, acc1, acc5, acc10, acc20 = 0, 0, 0, 0, 0
    
    with torch.no_grad():
        for candidate, career, job in dataloader:
            candidate, career, job = candidate.to(device), career.to(device), job.to(device)
            pred = model(career)
            
            test_loss += criterion(pred, job).mean().item()
            acc1 += (pred.argmax(1) == job).type(torch.float).sum().item()
            
            sorted_preds = torch.argsort(pred, 1, descending=True)
            
            at5 = []
            at10 = []
            at20 = []
            
            for answer, predictions in zip(job, sorted_preds):
                at5.append(answer.item() in predictions[:5])
                at10.append(answer.item() in predictions[:10])
                at20.append(answer.item() in predictions[:20])
            
            acc5 += np.sum(at5)
            acc10 += np.sum(at10)
            acc20 += np.sum(at20)
            
#         a = weights[0].cpu().detach().numpy().mean(axis=0)            
#         plt.plot(a, label="average")
#         plt.plot(weights[0][np.random.choice(range(len(weights[0])))].cpu().detach().numpy(), label="random example")
#         plt.xlabel("Career step")
#         plt.ylabel("Attention weight")
#         plt.show()
 
            
    # print("\nValidation:", Counter(np.array(pred.argmax(1).cpu())))
    test_loss /= num_batches
    acc1 /= size
    acc5 /= size
    acc10 /= size
    acc20 /= size
    print(f"\nTest Error:")
    print(f"Acc@1: {(100*acc1):>0.2f}%, Acc@5: {100*acc5:>0.2f}%, " +\
          f"Acc@10: {100*acc10:>0.2f}%, Acc@20: {100*acc20:>0.2f}% Avg loss: {test_loss:>8f}")
    
    return acc1, acc5, acc10, acc20, test_loss

In [None]:
torch.cuda.empty_cache()

In [None]:
sns.set()

In [None]:
num_epochs = 3
current = 0

criterion = torch.nn.CrossEntropyLoss()

full_results = []

learning_rates = [1e-1, 1e-2, 1e-3, 1e-4][2:]
num_layers_values = [1, 5, 10]
conv_sizes = [24, 48, 64, 128][1:]

try:            
    for learning_rate in learning_rates:
        for conv_size in conv_sizes:

            cnn = CNN(num_classes=num_classes,
                      input_size=num_features,
                      conv_size=conv_size,
                      skills=skills, 
                      certs=certs,
                      licenses=licenses,
                      languages=languages,
                      addresses=addresses,
                      w2v=w2v,
                      address_embedding_size=25,
                      candidate_lengths=candidate_lens,
                      max_len=max_len)

            cnn = cnn.to(device)

            optimizer = torch.optim.Adam(cnn.parameters(), lr=learning_rate)
            scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)

            print(f"Current iteration {current}/{len(learning_rates) * len(num_layers_values) * len(conv_sizes)}")
            print(f"- Initial learning rate: {learning_rate}\n- Model: \n\n", cnn, "\n")

            # Store results of current configuration
            outcome = train_loop(cnn, trainloader, valloader, optimizer, scheduler, criterion, num_epochs)
            outcome["lr"] = [learning_rate] * num_epochs
            outcome["Convolution size"] = [conv_size] * num_epochs

            full_results.append(outcome)

            current += 1
            
            break

        # We ignore LR for now
        break
except KeyboardInterrupt:
    pass

In [None]:
merge_results = defaultdict(list)

for res in full_results:
    for k, v in res.items():
        merge_results[k].extend(v)
        
total = pd.DataFrame(merge_results).set_index(["lr", "Convolution size", "Epoch"])

In [None]:
total

In [None]:
with torch.no_grad():
    for candidate, career, job in valloader:
        candidate, career, job = candidate.to(device), career.to(device), job.to(device)
        pred = cnn(career)
        
        print("Batch accuracy:", (pred.argmax(1) == job).type(torch.float).mean().item())        
        a = pd.Series(Counter(job.tolist()))
        a.sort_index().plot(kind="area", label="Ground truth")
        
        b = pd.Series(Counter(pred.argmax(1).tolist()))
        b.sort_index().plot(kind="area", label="predicted")
        plt.xlabel("isco_code4")
        plt.ylabel("number of occurences")
        plt.legend()
        
        # Check how often the model predicted the previous job + compare to baseline performance
        previous_job = torch.LongTensor(career_paths.loc[candidate.cpu()].apply(lambda x: x[-2][-1]).values).to(device)
        print("Previous-job baseline accuracy:", (job == previous_job).cpu().numpy().mean())
        print("Fraction of previous job predictions:", (pred.argmax(1) == previous_job).cpu().numpy().mean())
        
        plt.show()        
        break

In [None]:
target_layers = cnn._1x1

# Construct the CAM object once, and then re-use it on many images:
cam = GradCAM(model=cnn, target_layer=target_layers, use_cuda=True)

In [None]:
cmap = sns.diverging_palette(240, 10, n=9, as_cmap=True)

In [None]:
overall = []

for c, i, j in valloader:
    input_tensor = i.to(device)
    a = cam(input_tensor=input_tensor, targets=[ClassifierOutputTarget(353)])
    overall.append(a.mean(axis=0).T)

In [None]:
sns.heatmap(np.array(overall).sum(axis=0)[:,::-1], cmap=cmap)