In [1]:
%load_ext autoreload
%matplotlib ipympl
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import sys
import clipboard
import numpy as np
import os
from sklearn.metrics import roc_auc_score
from IPython.display import clear_output
from PIL import Image
import networkx as nx
import json
import pandas as pd
import random
import time
import neptune
from collections import defaultdict
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from torchvision import models, transforms
from torchvision.models import VGG16_Weights
sys.path.append('/Users/orenm/BlenderShaderProject/project_files/')

In [2]:
from Logic.utils import lc
from Logic.data_loaders import create_dataloaders, evaluate_model_by_attribute
from Logic.NN_makers import make_siamese_vgg

In [3]:
import cProfile
import IPython.display as dp
from pstats import Stats

# pip install gprof2dot

def profile(exec_code):
    cProfile.run(exec_code, filename='/tmp/cprof.pstats')
    !gprof2dot -f pstats /tmp/cprof.pstats | dot -Tpng -o /tmp/output.png
    return dp.Image(filename='/tmp/output.png')

In [4]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [5]:
NEPTUNE_KEY = "eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJjYTQ2MmQ1YS1mNTc0LTRkMDgtYWU1My02MTQ0MWIyNDdlNzUifQ=="

In [6]:
path = '/Users/orenm/BlenderShaderProject/data/'
images_path = os.path.join(path, 'images/')
models_path = os.path.join(path, 'models/')
db_path = os.path.join(path, 'DB/')

In [7]:
file_path = os.path.join(path, 'texture_cls_pairs.json')
with open(file_path, "rb") as json_file:
    data = json.load(json_file)

In [8]:
data.keys()

dict_keys(['similar_pairs', 'different_pairs_random', 'different_pairs_cluster', 'cat_numeric_pairs', 'important_params_pairs'])

In [9]:
def add_labels(pairs, *labels):
    return [(x[0], x[1], *labels) for x in pairs]

dataset = []
for pair_type in ['different_pairs_random', 'different_pairs_cluster', 'cat_numeric_pairs', 'important_params_pairs']:
    dataset.extend(add_labels(data[pair_type], 0, pair_type))

dataset.extend(add_labels(data['similar_pairs'], 1, 'similar_pairs'))

In [10]:
len(dataset)

158304

In [11]:
sampled_dataset = random.sample(dataset, k=50000)

In [12]:
pd.DataFrame(sampled_dataset)[3].value_counts(normalize=True)

3
similar_pairs              0.38758
different_pairs_cluster    0.18984
different_pairs_random     0.18798
important_params_pairs     0.13106
cat_numeric_pairs          0.10354
Name: proportion, dtype: float64

In [13]:
possible_layers_to_stop_at = [(8, 128),(10, 128),(15, 256),(17, 256), (22, 512),(24, 512)]
layers_to_take = (10, 128)
batch_size = 64
learning_rate = 0.0001
num_epochs=4

log_interval = 100

model = make_siamese_vgg(layers_to_take)
print("model params:", count_parameters(model))

model params: 324928


In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
model = model.to(device)

cuda


In [15]:
train_loader, test_loader = create_dataloaders(sampled_dataset, images_path, test_size=0.2, batch_size=batch_size, num_workers=4)

In [None]:
run = neptune.init_run(
    project="oren.matar/BlenderShaders",
    api_token=NEPTUNE_KEY,
)
run["parameters"] = {
    "batch_size": batch_size,
    'model_type': 'VGG16',
    'layers_taken': layers_to_take[0],
    'criterion': 'CosineEmbeddingLoss',
    "learning_rate": learning_rate,
    "optimizer": "Adam",
}
t = time.time()

criterion = torch.nn.CosineEmbeddingLoss(margin=0.3)

# Optimizer and Scheduler
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)

# Training Loop
for epoch in range(num_epochs):
    model.train()  # Set to training mode
    running_loss = 0.0
    
    for batch_idx, (img1, img2, labels, attributes) in enumerate(train_loader, start=1):
        print(f'{batch_idx}/{len(train_loader)}')
        img1, img2, labels = img1.to(device), img2.to(device), labels.to(device)
        embedding1 = model(img1)
        embedding2 = model(img2)
        
        targets = 2 * labels - 1  # Convert 0/1 labels to -1/1
        loss = criterion(embedding1, embedding2, targets)
        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # gradient clipping
        optimizer.step()
        
        running_loss += loss.item()

        if batch_idx % log_interval == 0:
            
            eval_auc = evaluate_model_by_attribute(model, test_loader)
            run["train/loss"].append(running_loss)
            run['test/auc'] = eval_auc

            running_loss = 0.0

    torch.save({
    'epoch': epoch,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'scheduler_state_dict': scheduler.state_dict(),
    'loss': running_loss
    }, f"{models_path}+checkpoint_epoch_{epoch + 1}.pt")
    
    scheduler.step()

run.stop()



[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/oren.matar/BlenderShaders/e/BLEN-11
1/625
2/625
3/625
4/625
5/625
6/625
7/625
8/625
9/625
10/625
11/625
12/625
13/625
14/625
15/625
16/625
17/625
18/625
19/625
20/625
21/625
22/625
23/625
24/625
25/625
26/625
27/625
28/625
29/625
30/625
31/625
32/625
33/625
34/625
35/625
36/625
37/625
38/625
39/625
40/625
41/625
42/625
43/625
44/625
45/625
46/625
47/625
48/625
49/625
50/625
51/625
52/625
53/625
54/625
55/625
56/625
57/625
58/625
59/625
60/625
61/625
62/625
63/625
64/625
65/625
66/625
67/625
68/625
69/625
70/625
71/625
72/625
73/625
74/625
75/625
76/625
77/625
78/625
79/625
80/625
81/625
82/625
83/625
84/625
85/625
86/625
87/625
88/625
89/625
90/625
91/625
92/625
93/625
94/625
95/625
96/625
97/625
98/625
99/625
100/625
Starting eval using cuda
