In [1]:
# Class dependencies
from pprint import pprint
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import torch
from models.folds import Fold, SoftFold
from models.model_bank import DynamicOrigami, Softmax
from models.training import train, NoamScheduler, load_data, plot_model
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_digits
from tqdm import tqdm

In [2]:
# import small digits dataset
digits = load_digits()
X = digits.data
y = digits.target

# split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2)

# set up the model
# model = OrigamiNetwork(layers=1, epochs=300, optimizer='sgd',learning_rate=0.001, sigmoid=True, crease=5)
# model.fit(X_train, y_train, X_val, y_val)

# # get the predictions
# y_hat = model.predict(X_test)
# print(accuracy_score(y_test, y_hat))

In [None]:
import itertools
import random
architectures = {}
n = 1
layer_type = ['SoftFold', 'Fold']
hs = [False, True]
depths = [1, 4, 7, 10]


lrs = [1e-2, 1e-3, 1e-4]
leak = 0.1
repeat = 3
rates = [0.1]
alernate_rates = [1, 0.5]
combinations = list(itertools.product(depths, layer_type, rates, lrs))
progress = tqdm(total=672, desc="Linearly Increasing")
precision = 6
architectures["Control"] = {"string": "", 
                            "structure": [],
                            "learning_rate": 1e-3,
                            "repeat": 3}

for i, comb in enumerate(combinations):
    depth, layer, rate, lr = comb
    h = hs[random.randint(0, 1)]
    
    details = f"_{layer}_depth_{depth}_rate_{rate}_stretch_{int(h)}_lr{lr}"
    name_list = [name + details for name in ["Increasing", "Plateau", "Flat"]]
    
    width_list = []
    width_list.append([round(n*(1+rate)**i, precision) for i in range(1, depth+1)])
    width_list.append([round(n*(1+rate), precision)]*depth)
    width_list.append([round(n, precision)]*depth)
    
    linear_layers_list = []
    linear_layers_list.append([layer, "Linear"] * (depth//2 + 1))
    # linear_layers_list.append([layer, layer, "Linear", "Linear"] * (depth//4 + 1))
    # linear_layers_list.append(["Linear", "Linear", layer, layer] * (depth//4 + 1))
    # linear_layers_list.append(["Linear", "Linear", "Linear", layer, layer, layer] * (depth//6 + 1))
    linear_layers_list.append([layer, layer, layer, "Linear", "Linear", "Linear"] * (depth//6 + 1))
    
    # only folds
    for name, widths in zip(name_list, width_list):
        if name == name_list[0] and depth > 6 and rate > 0.1:
            continue
        structure = [{'type': layer, 'params': {'width':width, 'has_stretch': h}} for width in widths]
        if structure not in [arch["structure"] for arch in architectures.values()]:
            architectures[name] = {"string": [layer]*depth, 
                                   "structure": structure,
                                   "learning_rate": lr,
                                   "repeat": repeat}
            if layer == "Fold":
                architectures[name].update({"leak": leak})
    
    # linear networks
    for alternate in alernate_rates:
        for name, widths in zip(name_list, width_list):
            if name == name_list[0] and depth > 6 and rate > 0.1:
                continue
            for i, layer in enumerate(linear_layers_list):
                layer = layer[:depth]
                params = []
                prev_layer, prev_width = None, 1
                for format, width in zip(layer, widths):
                    if format == "Linear":
                        win = prev_width if prev_layer != "Linear" else width
                        params.append({'in_features': win, 'out_features': round(alternate * width, precision)})
                    else:
                        params.append({'width':width, 'has_stretch': h})
                    prev_layer, prev_width = format, width
                layer_params = [{'type': form, 'params': param} for form, param in zip(layer, params)]
                if layer_params not in [arch["structure"] for arch in architectures.values()]:
                    architectures["Linear_"+name+f"_alternate_{alternate}_{i}"] = {"string": layer, 
                                                                                   "structure": layer_params,
                                                                                   "learning_rate": lr,
                                                                                   "repeat": repeat}
                    if layer == "Fold":
                        architectures[name].update({"leak": leak})
                progress.update(1)
progress.close()

Linearly Increasing:  43%|████▎     | 288/672 [00:00<00:00, 24017.01it/s]


In [6]:
len(architectures)

174

In [9]:
# save architectures to json
import json
with open("BenchmarkTests/architectures.json", "w") as file:
    json.dump(architectures, file)

In [16]:
list(architectures.keys())[1]

'Increasing_SoftFold_depth_1_rate_0.1_stretch_0_lr0.01'

In [19]:
query = {"Linear":False,
         "growth": "Increasing",
         "fold": "SoftFold",
         "depth": 1,
         "rate": 0.1,
         "stretch": 0, # not used
         "alternate": 1, # not used
         "pattern": 0,
         "lr": 1e-2}
key1 = "Linear_" if query["Linear"] else ""
key1 += f"{query['growth']}_{query['fold']}_depth_{query['depth']}_rate_{query['rate']}"
key2 = key1
key1 += f"_stretch_0_lr{query['lr']}"
key2 += f"_stretch_1_lr{query['lr']}"
# key1 += f"" if query["Linear"] else ""
# key2 += f"_lr{query['lr']}" if query["Linear"] else ""

print(len(architectures))
try:
    key = key1 if key1 in architectures else key2
    print("Querying:", key, "\n")
    pprint(architectures[key])
except KeyError:
    print(f"Key not found: {key}")
    print("Available query values:")
    print("\tLinear: [True, False]")
    print("\tgrowth: ['Increasing', 'Plateau', 'Flat']")
    print(f"\tfold: {layer_type}")
    print(f"\tdepth: {depths}")
    print(f"\trate: {rates}")
    print(f"\tstretch: {[int(h) for h in hs]}")
    print(f"\talternate:", alernate_rates)
    print(f"\tpattern: [0, 1, 2, 3, 4]")
    print(f"\tlr: {lrs}")
    
# for key in architectures.keys():
#     print(key)

174
Querying: Increasing_SoftFold_depth_1_rate_0.1_stretch_0_lr0.01 

{'learning_rate': 0.01,
 'repeat': 3,
 'string': ['SoftFold'],
 'structure': [{'params': {'has_stretch': False, 'width': 1.1},
                'type': 'SoftFold'}]}


In [155]:
filter_list = ["Linear", "SoftFold", "Flat", "rate_0.2"]
count = 0
for key in architectures.keys():
    if all([filt in key for filt in filter_list]):
        print(key)
        # pprint(architectures[key])
        # print("\n")
        count += 1
plural = "s" if count != 1 else ""
print("Found", count, f"architecture{plural}")

Linear_Flat_SoftFold_depth_8_rate_0.2_stretch_1_alternate_1_0
Linear_Flat_SoftFold_depth_8_rate_0.2_stretch_1_alternate_1_1
Linear_Flat_SoftFold_depth_8_rate_0.2_stretch_1_alternate_1_2
Linear_Flat_SoftFold_depth_8_rate_0.2_stretch_1_alternate_0.5_0
Linear_Flat_SoftFold_depth_8_rate_0.2_stretch_1_alternate_0.5_1
Linear_Flat_SoftFold_depth_8_rate_0.2_stretch_1_alternate_0.5_2
Linear_Flat_SoftFold_depth_14_rate_0.2_stretch_0_alternate_1_0
Linear_Flat_SoftFold_depth_14_rate_0.2_stretch_0_alternate_1_1
Linear_Flat_SoftFold_depth_14_rate_0.2_stretch_0_alternate_1_2
Linear_Flat_SoftFold_depth_14_rate_0.2_stretch_0_alternate_0.5_0
Linear_Flat_SoftFold_depth_14_rate_0.2_stretch_0_alternate_0.5_1
Linear_Flat_SoftFold_depth_14_rate_0.2_stretch_0_alternate_0.5_2
Found 12 architectures


In [3]:
architecture = [{'type': 'SoftFold', 'params': {'width':64, 'has_stretch': False}},
                {'type': 'SoftFold', 'params': {'width':70, 'has_stretch': False}},
                {'type': 'SoftFold', 'params': {'width':80, 'has_stretch': False}},
                {'type': 'SoftFold', 'params': {'width':90, 'has_stretch': False}},
                {'type': 'SoftFold', 'params': {'width':100, 'has_stretch': False}}]

architecture2 = [{'type': 'SoftFold', 'params': {'width':100, 'has_stretch': False}}]

In [4]:
# Data generation
train_loader = load_data(X_train, y_train, batch_size=32, shuffle=True)
val_loader = load_data(X_val, y_val, batch_size=32, shuffle=True)
test_loader = load_data(X_test, y_test, batch_size=32, shuffle=True)

In [None]:
model = DynamicOrigami(architecture,10)
optimizer = torch.optim.Adam(model.parameters(), lr=.0001)
train_losses, val_losses, train_accuracies, val_accuracies = train(model, optimizer, train_loader, val_loader, epochs=300)
plot_model(train_losses, val_losses, train_accuracies, val_accuracies)
print("final val_score:", np.mean(val_accuracies[-10]))