In [1]:
from ray import train, tune
from layer import Layer
from perceptron import Perceptron
from network import Network
import typing as tp
import numpy as np
import seaborn as sns
import os
import pandas as pd
import itertools

In [2]:
def min_max_scale(a: np.ndarray) -> np.ndarray:
    min_val: np.ndarray = np.min(a, axis=0)
    max_val: np.ndarray = np.max(a, axis=0)
    return (a - min_val) / (max_val - min_val)

x_cols: tp.List[str] = ['LAC', 'SOW']
y_cols: tp.List[str] = ['TACA']

train_df: pd.DataFrame = pd.read_csv(os.path.join('data', 'train.csv'))
test_df: pd.DataFrame = pd.read_csv(os.path.join('data', 'test.csv'))

train_X: np.ndarray = min_max_scale(train_df[x_cols].values)
train_y: np.ndarray = train_df[y_cols].values

test_X: np.ndarray = min_max_scale(test_df[x_cols].values)
test_y: np.ndarray = test_df[y_cols].values

train_X.shape, train_y.shape

((10, 2), (10, 1))

In [3]:
def get_average_error(net: Network, x: np.ndarray, y: np.ndarray) -> float:
    tot_error: float = 0
    for i in range(x.shape[0]):
        net.feedforward(x[i])
        tot_error += net.get_total_error(y[i])
    return tot_error

def f1_score(pred: np.ndarray, true_y: np.ndarray) -> float:
    """
    Use F1 Score since it is better for unbalanced datasets, which our dataset is.
    :param pred:
    :param true_y:
    :return:
    """
    if isinstance(true_y, list):
        true_y: np.ndarray = np.array(true_y)
    if isinstance(pred, list):
        pred: np.ndarray = np.array(pred)
    tol: float = 1e-3

    def _is_equal(one: np.ndarray, two: tp.Union[np.ndarray, float]) -> np.ndarray:
        return np.abs(one - two) < tol

    true_positives: int = int(np.sum(np.logical_and(_is_equal(pred, 1), _is_equal(true_y, 1))))
    false_positives: int = int(np.sum(np.logical_and(_is_equal(pred, 1), _is_equal(true_y, 0))))
    false_negatives: int = int(np.sum(np.logical_and(_is_equal(pred, 0), _is_equal(true_y, 1))))
    denom: float = 2.0 * true_positives + false_positives + false_negatives
    if abs(denom - 0) < 1e-3:
        return 0
    return 2.0 * true_positives / denom

In [4]:
def objective(config) -> tp.Dict[str, float]:
    np.random.seed(config['seed'])
    num_nodes: tp.List[int] = [n for n in config['num_nodes'] if n > 0]
    if len(num_nodes) < 2:
        # Don't have the right number of layers.
        return {'error': 1e9, 'f1_score': -1e9}
    network: Network = Network().add_layer(
        Layer(num_in_features=2,
              num_out_features=num_nodes[0],
              use_biases=config['use_biases'],
              is_hidden_layer=True))
    for num_in_nodes, num_out_nodes in itertools.pairwise(num_nodes):
        network.add_layer(Layer(num_in_features=num_in_nodes,
                                num_out_features=num_out_nodes,
                                use_biases=config['use_biases'],
                                is_hidden_layer=True))
    network.add_layer(Layer(num_in_features=num_nodes[-1],
                            num_out_features=1,
                            use_biases=config['use_biases'],
                            is_hidden_layer=False))
    for epoch in range(config['epochs']):
        for i in range(train_X.shape[0]):
            network.feedforward(train_X[i, :])
            network.backprop(config['lr'], train_y[i, :])
    predictions: tp.List[int] = []
    for i in range(train_X.shape[0]):
        predictions.append(network.feedforward(train_X[i, :]))
    curr_error: float = get_average_error(network, train_X, train_y)
    f1: float = f1_score(pred=np.array(predictions), true_y=train_y)
    return {'f1_score': f1, 'error': curr_error}

In [5]:
objective({'seed': 13, 'num_nodes': [2, 4, 0, 0, 8], 'use_biases': True, 'lr': 1e-3, 'epochs': 10})

{'f1_score': 0, 'error': 2.2989093353939873}

In [None]:
class FakeList:
    # I just want my list to be hashable. 
    def __init__(self, l: tp.List[int]):
        self.l: tp.List[int] = l
    
    def __hash__(self) -> int:
        return sum(hash((i + 1) * v) for i, v in enumerate(self.l)) % 3010349
    
    def __getattr__(self, whatever):
        return getattr(self.l, whatever)
    
    def __str__(self) -> str:
        return str(self.l)
    
    def __repr__(self) -> str:
        return repr(self.l)
    
    def to_list(self) -> tp.List[int]:
        return self.l

In [None]:
v = FakeList([1, 2, 3])
v.append(23)
v, hash(v)

In [None]:
s = {1, 2, 3}
len(s.intersection([2, 3, 4]))

In [None]:
def get_all_possible_layers(max_num_layers: int, max_nodes_log2: int, max_tries: int = 10, block_size: int = 64, max_num_iter: int = 10_000) -> tp.List[tp.List[int]]:
    listing: tp.Set[FakeList] = set()
    np.random.seed(13)
    
    def _helper() -> tp.List[FakeList]:
        block: np.ndarray = (2 ** np.random.randint(-1, 1 + max_nodes_log2, size=(block_size, max_num_layers), dtype=int).astype(float)).astype(int)
        block = block[np.sum(block, axis=1) > 2]
        return list({FakeList(l.tolist()) for l in block})
    
    num_tries: int = 0
    num_iter: int = 0
    
    while num_tries < max_tries and num_iter <= max_num_iter:
        configs: tp.List[FakeList] = _helper()
        while len(listing.intersection(configs)) >= block_size // 2:
            num_tries += 1
        num_tries = 0
        listing.update(configs)
        num_iter += 1
    return [l.to_list() for l in list(listing)]

In [None]:
layers_config_path: str = os.path.join('data', 'layer_config.txt.gz')
if not os.path.exists(layers_config_path):
    layer_config: tp.List[tp.List[int]] = get_all_possible_layers(5, 4, max_tries = 2, max_num_iter = 5_000)
    np.savetxt(layers_config_path, np.array(layer_config).astype(int))
else:
    layer_config: tp.List[tp.List[int]] = [a.tolist() for a in np.loadtxt(layers_config_path).astype(int)]
layer_config[:5], len(layer_config)

In [None]:
len(f'{1 * 1024 * 1024 * 1024}'), len('1073741824')

In [None]:
import ray 

memory_to_use_GB: int = 8

ray.init(num_gpus=0, object_store_memory=1024*1024*1024*memory_to_use_GB)

In [None]:
# https://docs.ray.io/en/latest/tune/index.html

max_num_layers: int = 5
max_num_nodes: int = int(np.log2(2 ** 4))

search_space = {
    'epochs': tune.grid_search(list(range(30, 61, 10))),
    'lr': 1e-1,
    'seed': tune.grid_search(list(range(25))),
    'use_biases': False,
    'num_nodes': tune.grid_search(layer_config[:5_000])
}

tuner = tune.Tuner(tune.with_resources(
        objective, resources={}
    ), objective, param_space=search_space, tune_config=tune.TuneConfig(scheduler=tune.schedulers.ASHAScheduler(metric='f1_score', mode='max'), num_samples=1))

results = tuner.fit()
results.get_best_result(metric='f1_score', mode='max')

In [None]:
import pickle

In [None]:
with open(os.path.join('data', 'best_results.pkl'), 'wb') as f:
    pickle.dump(results.get_best_result(metric='f1_score', mode='max'), f)

In [None]:
results

In [8]:
s = {1, 2, 3}
len(s.intersection([2, 3, 4]))

2

In [9]:
def get_all_possible_layers(max_num_layers: int, max_nodes_log2: int, max_tries: int = 10, block_size: int = 64, max_num_iter: int = 10_000) -> tp.List[tp.List[int]]:
    listing: tp.Set[FakeList] = set()
    np.random.seed(13)
    
    def _helper() -> tp.List[FakeList]:
        block: np.ndarray = (2 ** np.random.randint(-1, 1 + max_nodes_log2, size=(block_size, max_num_layers), dtype=int).astype(float)).astype(int)
        block = block[np.sum(block, axis=1) > 2]
        return list({FakeList(l.tolist()) for l in block})
    
    num_tries: int = 0
    num_iter: int = 0
    
    while num_tries < max_tries and num_iter <= max_num_iter:
        configs: tp.List[FakeList] = _helper()
        while len(listing.intersection(configs)) >= block_size // 2:
            num_tries += 1
        num_tries = 0
        listing.update(configs)
        num_iter += 1
    return [l.to_list() for l in list(listing)]

In [10]:
layers_config_path: str = os.path.join('data', 'layer_config.txt.gz')
if not os.path.exists(layers_config_path):
    layer_config: tp.List[tp.List[int]] = get_all_possible_layers(5, 4, max_tries = 2, max_num_iter = 5_000)
    np.savetxt(layers_config_path, np.array(layer_config).astype(int))
else:
    layer_config: tp.List[tp.List[int]] = [a.tolist() for a in np.loadtxt(layers_config_path).astype(int)]
layer_config[:5], len(layer_config)

([[1, 0, 1, 0, 1],
  [0, 0, 2, 1, 0],
  [0, 1, 0, 0, 4],
  [2, 0, 1, 1, 1],
  [2, 0, 2, 0, 2]],
 319212)

In [11]:
len(f'{1 * 1024 * 1024 * 1024}'), len('1073741824')

(10, 10)

In [12]:
import ray 

memory_to_use_GB: int = 8

ray.init(num_gpus=0, object_store_memory=1024*1024*1024*memory_to_use_GB)

2023-11-19 09:41:13,993	INFO worker.py:1673 -- Started a local Ray instance.


0,1
Python version:,3.10.13
Ray version:,2.8.0


In [13]:
# https://docs.ray.io/en/latest/tune/index.html

max_num_layers: int = 5
max_num_nodes: int = int(np.log2(2 ** 4))

search_space = {
    'epochs': tune.grid_search(list(range(30, 61, 10))),
    'lr': 1e-1,
    'seed': tune.grid_search(list(range(25))),
    'use_biases': False,
    'num_nodes': tune.grid_search(layer_config[:5_000])
}

tuner = tune.Tuner(tune.with_resources(
        objective, resources={}
    ), objective, param_space=search_space, tune_config=tune.TuneConfig(scheduler=tune.schedulers.ASHAScheduler(metric='f1_score', mode='max'), num_samples=1))

results = tuner.fit()
results.get_best_result(metric='f1_score', mode='max')

0,1
Current time:,2023-11-19 18:31:27
Running for:,08:50:11.69
Memory:,14.1/15.3 GiB

Trial name,status,loc,epochs,num_nodes,seed,iter,total time (s),f1_score,error
objective_b0e04_126651,RUNNING,10.0.0.227:1398524,60,"[16, 2, 0, 8, 16]",6,,,,
objective_b0e04_126665,RUNNING,10.0.0.227:1398520,40,"[16, 16, 8, 1, 1]",6,,,,
objective_b0e04_126668,RUNNING,10.0.0.227:1398518,30,"[2, 16, 0, 8, 16]",6,,,,
objective_b0e04_126669,RUNNING,10.0.0.227:1398515,40,"[2, 16, 0, 8, 16]",6,1.0,2.1296,0.666667,2.49985
objective_b0e04_126671,RUNNING,10.0.0.227:1398516,60,"[2, 16, 0, 8, 16]",6,,,,
objective_b0e04_126674,RUNNING,10.0.0.227:1398511,50,"[16, 2, 4, 4, 16]",6,,,,
objective_b0e04_126675,RUNNING,10.0.0.227:1398510,60,"[16, 2, 4, 4, 16]",6,,,,
objective_b0e04_126679,PENDING,10.0.0.227:1398509,60,"[16, 0, 8, 16, 2]",6,,,,
objective_b0e04_126682,PENDING,,50,"[1, 8, 2, 16, 0]",6,,,,
objective_b0e04_126683,PENDING,10.0.0.227:1398513,60,"[1, 8, 2, 16, 0]",6,,,,


[33m(raylet)[0m [2023-11-19 18:31:14,675 E 1394256 1394256] (raylet) node_manager.cc:3035: 1 Workers (tasks / actors) killed due to memory pressure (OOM), 0 Workers crashed due to other reasons at node (ID: a34752f1128186e837a1340cc005166d519dc2897421733d1feccfa7, IP: 10.0.0.227) over the last time period. To see more information about the Workers killed on this node, use `ray logs raylet.out -ip 10.0.0.227`
[33m(raylet)[0m 
[33m(raylet)[0m Refer to the documentation on how to address the out of memory issue: https://docs.ray.io/en/latest/ray-core/scheduling/ray-oom-prevention.html. Consider provisioning more memory on this node or reducing task parallelism by requesting more CPUs per task. To adjust the kill threshold, set the environment variable `RAY_memory_usage_threshold` when starting Ray. To disable worker killing, set the environment variable `RAY_memory_monitor_refresh_ms` to zero.


RuntimeError: Caught unexpected exception: Task was killed due to the node running low on memory.
Memory on the node (IP: 10.0.0.227, ID: a34752f1128186e837a1340cc005166d519dc2897421733d1feccfa7) where the task (actor ID: 18e51ac6cdc5734041b3131701000000, name=ImplicitFunc.__init__, pid=1398524, memory used=0.10GB) was running was 14.59GB / 15.33GB (0.951345), which exceeds the memory usage threshold of 0.95. Ray killed this worker (ID: 7d2227d9c3b58c0fea52597113c5c01ceecc2de9787ea7711d3b299e) because it was the most recently scheduled task; to see more information about memory usage on this node, use `ray logs raylet.out -ip 10.0.0.227`. To see the logs of the worker, use `ray logs worker-7d2227d9c3b58c0fea52597113c5c01ceecc2de9787ea7711d3b299e*out -ip 10.0.0.227. Top 10 memory users:
PID	MEM(GB)	COMMAND
1394168	7.00	/home/duke_trystan/Documents/FastStorage/conda/envs/JHU-Intro-NN/bin/python -m ipykernel_launcher -f...
4250	0.48	io.elementary.appcenter -s
1394202	0.37	/home/duke_trystan/Documents/FastStorage/conda/envs/JHU-Intro-NN/lib/python3.10/site-packages/ray/co...
5170	0.32	/usr/lib/firefox/firefox -contentproc -childID 14 -isForBrowser -prefsLen 34092 -prefMapSize 246449 ...
643533	0.32	/usr/lib/firefox/firefox -contentproc -childID 483 -isForBrowser -prefsLen 34551 -prefMapSize 246449...
3912	0.28	/usr/lib/firefox/firefox -contentproc -childID 2 -isForBrowser -prefsLen 45473 -prefMapSize 246449 -...
3738	0.26	/usr/lib/firefox/firefox
1394256	0.18	/home/duke_trystan/Documents/FastStorage/conda/envs/JHU-Intro-NN/lib/python3.10/site-packages/ray/co...
3219	0.18	./jetbrains-toolbox --minimize
7187	0.13	/home/duke_trystan/Documents/FastStorage/conda/envs/JHU-Intro-NN/bin/python -m ipykernel_launcher -f...
Refer to the documentation on how to address the out of memory issue: https://docs.ray.io/en/latest/ray-core/scheduling/ray-oom-prevention.html. Consider provisioning more memory on this node or reducing task parallelism by requesting more CPUs per task. Set max_restarts and max_task_retries to enable retry when the task crashes due to OOM. To adjust the kill threshold, set the environment variable `RAY_memory_usage_threshold` when starting Ray. To disable worker killing, set the environment variable `RAY_memory_monitor_refresh_ms` to zero.

In [None]:
import pickle

In [None]:
with open(os.path.join('data', 'best_results.pkl'), 'wb') as f:
    pickle.dump(results.get_best_result(metric='f1_score', mode='max'), f)

In [14]:
results

NameError: name 'results' is not defined