In [1]:
%load_ext autoreload
%matplotlib ipympl
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import sys
import clipboard
import numpy as np
import os
from sklearn.metrics import roc_auc_score
from IPython.display import clear_output
from PIL import Image
import networkx as nx
import json
import pandas as pd
import random
import time
import neptune
from collections import defaultdict
import os
import time
import subprocess
import torch
import gc
sys.path.append('/Users/orenm/BlenderShaderProject/project_files/')

In [2]:
%autoreload
from Logic.utils import lc
from Logic.data_loaders import create_dataloaders, evaluate_model_by_attribute
from Logic.NN_makers import make_siamese_vgg, make_siamese_dists

In [3]:
import cProfile
import IPython.display as dp
from pstats import Stats

# pip install gprof2dot

def profile(exec_code):
    cProfile.run(exec_code, filename='/tmp/cprof.pstats')
    !gprof2dot -f pstats /tmp/cprof.pstats | dot -Tpng -o /tmp/output.png
    return dp.Image(filename='/tmp/output.png')

In [4]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [5]:
NEPTUNE_KEY = "eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJjYTQ2MmQ1YS1mNTc0LTRkMDgtYWU1My02MTQ0MWIyNDdlNzUifQ=="

In [6]:
path = '/Users/orenm/BlenderShaderProject/data/'
images_path = os.path.join(path, 'images/')
models_path = os.path.join(path, 'models/')
db_path = os.path.join(path, 'DB/')

In [7]:
file_path = os.path.join(path, 'texture_cls_pairs.json')
with open(file_path, "rb") as json_file:
    data = json.load(json_file)

In [8]:
data.keys()

dict_keys(['similar_pairs', 'different_pairs_random', 'different_pairs_cluster', 'cat_numeric_pairs', 'important_params_pairs'])

In [9]:
def add_labels(pairs, *labels):
    return [(x[0], x[1], *labels) for x in pairs]

dataset = []
for pair_type in ['different_pairs_random', 'different_pairs_cluster', 'cat_numeric_pairs', 'important_params_pairs']:
    dataset.extend(add_labels(data[pair_type], 0, pair_type))

dataset.extend(add_labels(data['similar_pairs'], 1, 'similar_pairs'))

In [10]:
len(dataset)

158304

In [11]:
sampled_dataset = random.sample(dataset, k=50000)

In [12]:
pd.DataFrame(sampled_dataset)[3].value_counts(normalize=True)

3
similar_pairs              0.38360
different_pairs_random     0.18996
different_pairs_cluster    0.18934
important_params_pairs     0.13488
cat_numeric_pairs          0.10222
Name: proportion, dtype: float64

In [13]:
def run_training_script(hyperparams, script_path):
    """
    Run the training script with given hyperparameters.
    """
    try:
        # Base command to run the script
        cmd = [sys.executable, '-m', script_path]

        # Add hyperparameters to the command, only if they are provided (i.e., not None or default)
        if "sample_perc" in hyperparams:
            cmd.extend(["--sample_perc", str(hyperparams["sample_perc"])])
        if "layers_to_take" in hyperparams:
            cmd.extend(["--layers_to_take", hyperparams["layers_to_take"]])
        if "learning_rate" in hyperparams:
            cmd.extend(["--learning_rate", str(hyperparams["learning_rate"])])
        if "model_name" in hyperparams:
            cmd.extend(["--model_name", hyperparams["model_name"]])
        if "final_emb" in hyperparams:
            cmd.extend(["--final_emb", str(hyperparams["final_emb"])])
        if "pool_type" in hyperparams:
            cmd.extend(["--pool_type", hyperparams["pool_type"]])
        if "loss_type" in hyperparams:
            cmd.extend(["--loss_type", hyperparams["loss_type"]])
        if "scheduler_name" in hyperparams:
            cmd.extend(["--scheduler_name", str(hyperparams["scheduler_name"])])
        if "resize" in hyperparams:
            cmd.extend(["--resize", str(hyperparams["resize"])])
        if "test_perc" in hyperparams:
            cmd.extend(["--test_perc", str(hyperparams["test_perc"])])
        if "batch_size" in hyperparams:
            cmd.extend(["--batch_size", str(hyperparams["batch_size"])])
        if "num_epochs" in hyperparams:
            cmd.extend(["--num_epochs", str(hyperparams["num_epochs"])])
        
        # Execute the script via subprocess
        subprocess.run(cmd, check=True)
    
    except subprocess.CalledProcessError as e:
        print(f"Error during training: {e}")
    
    finally:
        # Cleanup GPU memory (important if using GPUs)
        torch.cuda.empty_cache()  # Clears cache from CUDA
        gc.collect()  # Python garbage collection to free memory


In [14]:
resnet_layer_sizes = [(5, 64), (6, 128), (7,256)]
vgg_layer_sizes = [(8, 128),(10, 128),(15, 256),(17, 256), (22, 512),(24, 512)]
learning_rates = [1e-3, 1e-4]
final_emb = [64, 128]
loss_type = ['cos', 'cont']
pool_type = ['avg', 'max']
model_name = ['resnet', 'vgg', 'dists']
resize = [True, False]

In [15]:
hyperparameters_list = [
        {
        'layers_to_take': '7_256',
        'learning_rate': 1e-4,
        'scheduler_name': 'cosine'
    },
            {
        'layers_to_take': '7_256',
        'learning_rate': 1e-4,
        'scheduler_name': 'step'
    },
            {
        'layers_to_take': '6_128',
        'learning_rate': 1e-4,
        'scheduler_name': 'cosine'
    },
            {
        'layers_to_take': '6_128',
        'learning_rate': 1e-4,
        'scheduler_name': 'step'
    },
                {
        'layers_to_take': '6_128',
        'learning_rate': 5e-5,
        'scheduler_name': 'cosine'
    },
            {
        'layers_to_take': '6_128',
        'learning_rate': 5e-5,
        'scheduler_name': 'step'
    },
]

In [16]:
add_to_all = {
    'sample_perc': 1,
    'num_epochs': 4,
    'pool_type': 'max',
    'loss_type': 'cos',
    'resize': True,
    'model_name': 'resnet',
    'final_emb': 128,
}
for hp in hyperparameters_list:
    hp.update(add_to_all)

In [17]:
pd.DataFrame(hyperparameters_list)

Unnamed: 0,layers_to_take,learning_rate,scheduler_name,sample_perc,num_epochs,pool_type,loss_type,resize,model_name,final_emb
0,7_256,0.0001,cosine,1,4,max,cos,True,resnet,128
1,7_256,0.0001,step,1,4,max,cos,True,resnet,128
2,6_128,0.0001,cosine,1,4,max,cos,True,resnet,128
3,6_128,0.0001,step,1,4,max,cos,True,resnet,128
4,6_128,5e-05,cosine,1,4,max,cos,True,resnet,128
5,6_128,5e-05,step,1,4,max,cos,True,resnet,128


In [18]:
# Add the root project directory to sys.path
project_path = "/Users/orenm/BlenderShaderProject/project_files"  # Adjust this to your actual project root
sys.path.append(project_path)

# Change the working directory to the project folder (optional, but helps avoid path issues)
os.chdir(project_path)

print("Current working directory:", os.getcwd())  # Check if it's correct

Current working directory: C:\Users\orenm\BlenderShaderProject\project_files


In [19]:
os.system("powercfg -change -standby-timeout-ac 0")

# Loop through different sets of hyperparameters
for i, hyperparams in enumerate(hyperparameters_list, start=1):
    t = time.time()
    print(f"Starting training for set {i}/{len(hyperparameters_list)}, with hyperparameters: {hyperparams}")
    run_training_script(hyperparams, script_path='Logic.training_script')
    print(f"Training finished, Time: {round(time.time() - t, 2)}")
    print("=" * 50)  # Separating output for clarity
    time.sleep(5) # computer rest and avoid resource limit
os.system("powercfg -change -standby-timeout-ac 20")

Starting training for set 1/6, with hyperparameters: {'layers_to_take': '7_256', 'learning_rate': 0.0001, 'scheduler_name': 'cosine', 'sample_perc': 1, 'num_epochs': 4, 'pool_type': 'max', 'loss_type': 'cos', 'resize': True, 'model_name': 'resnet', 'final_emb': 128}
Training finished, Time: 2300.91
Starting training for set 2/6, with hyperparameters: {'layers_to_take': '7_256', 'learning_rate': 0.0001, 'scheduler_name': 'step', 'sample_perc': 1, 'num_epochs': 4, 'pool_type': 'max', 'loss_type': 'cos', 'resize': True, 'model_name': 'resnet', 'final_emb': 128}
Training finished, Time: 2291.21
Starting training for set 3/6, with hyperparameters: {'layers_to_take': '6_128', 'learning_rate': 0.0001, 'scheduler_name': 'cosine', 'sample_perc': 1, 'num_epochs': 4, 'pool_type': 'max', 'loss_type': 'cos', 'resize': True, 'model_name': 'resnet', 'final_emb': 128}
Training finished, Time: 1902.84
Starting training for set 4/6, with hyperparameters: {'layers_to_take': '6_128', 'learning_rate': 0.00

0