In [1]:
import sys, os, pickle
sys.path.append('/home/om2382/vanilla-rtrl/')
from cluster import *
from continual_learning import *
from core import *
from dynamics import *
from functions import *
from gen_data import *
from learning_algorithms import *
from optimizers import *
from plotting import *
from torch_tools import *
from wrappers import *
import torch
from torch.optim.lr_scheduler import ExponentialLR

In [2]:
### --- SET UP ALL CONFIGS --- ###
from itertools import product
n_seeds = 50
macro_configs = config_generator()
micro_configs = tuple(product(macro_configs, list(range(n_seeds))))
prototype = False

### --- SELECT PARTICULAR CONFIG --- ###
try:
    i_job = int(os.environ['SLURM_ARRAY_TASK_ID']) - 1
except KeyError:
    i_job = 0
    prototype = True
params, i_seed = micro_configs[i_job]
i_config = i_job//n_seeds

new_random_seed_per_condition = True
if new_random_seed_per_condition:
    np.random.seed(i_job)
else: #Match random seeds across conditions
    np.random.seed(i_seed)

In [None]:
root_name = 'BPTT_CDM_b200_50_examples_ctx0'
file_names = []

np.random.seed(i_seed)
T_trial = 100
task = Context_Dependent_Decision_Task(T_trial=T_trial, input_var=0.1, report_cue=True)
N_train = 10000000
N_test = 20000
checkpoint_interval = 5
data = task.gen_data(N_train, N_test, add_dummy_end_point=False)
batched_data = add_batch_dimension_to_data(data, T_trial)

### --- initialize RNN paramters and RNN object --- ###
n_in = task.n_in
n_hidden = 32
n_out = task.n_out

W_in  = np.random.normal(0, np.sqrt(1/(n_in)), (n_hidden, n_in))
W_rec = np.random.normal(0, np.sqrt(1/n_hidden), (n_hidden, n_hidden))
W_out = np.random.normal(0, np.sqrt(1/(n_hidden)), (n_out, n_hidden))
b_rec = np.zeros(n_hidden)
b_out = np.zeros(n_out)

alpha = 0.1

rnn = Torch_RNN(W_in, W_rec, W_out, b_rec, b_out,
                activation='tanh',
                alpha=alpha)

optimizer = torch.optim.Adam(rnn.parameters(), lr=0.01)
scheduler = None
checkpoints = train_torch_RNN(rnn, optimizer, batched_data,
                              batch_size=200, n_epochs=2,
                              L2_reg=0.0001, verbose=True,
                              checkpoint_interval=checkpoint_interval,
                              scheduler=scheduler)

sim = Empty_Simulation()
sim.checkpoints = checkpoints
indices = sorted([k for k in checkpoints.keys() if type(k) != str])
sim.checkpoint_interval = indices[1] - indices[0]
sim.total_time_steps = indices[-1] + sim.checkpoint_interval
sim.rnn = checkpoints['final']['rnn']

#fix context to context 0
task.fixed_context = 0

file_name = root_name + '_seed={}'.format(i_seed)
for key in params.keys():
    file_name += '_{}={}'.format(key, str(params[key]).replace('.', ','))

file_names.append(file_name)

print('finish simulation: {}'.format(i_seed))
for key in params.keys():
    print('{}:, {}, seed = {}'.format(key, params[key], i_seed))
if np.isnan(rnn.W_rec.detach().numpy()[0, 0]):
    print('MISTAKES WERE MADE')
    pass


### --- SAVE RUN --- ###
with open(os.path.join('/home/om2382/learning-dynamics/notebooks/Figure_CDM/saved_runs/', file_name), 'wb') as f:
    saved_run = {'sim': sim, 'task': task}
    pickle.dump(saved_run, f)

In [7]:
###Truncate file above
file_name = 'generate_saved_runs_BPTT_CDM'
job_name = 'generate_BPTT_CDM_b200_runs'
project_dir = '/home/om2382/learning-dynamics/'
main_script_path = os.path.join(project_dir, 'cluster_main_scripts', job_name + '.py')
get_ipython().run_cell_magic('javascript', '', 'IPython.notebook.save_notebook()')
get_ipython().system('jupyter nbconvert --to script --no-prompt {}.ipynb'.format(file_name))
get_ipython().system('awk "/###Truncate/ {{exit}} {{print}}" {}.py'.format(file_name))
get_ipython().system('sed -i "/###Truncate/Q" {}.py'.format(file_name))
get_ipython().system('mv {}.py {}'.format(file_name, main_script_path))

<IPython.core.display.Javascript object>

[NbConvertApp] Converting notebook generate_saved_runs_BPTT_CDM.ipynb to script
[NbConvertApp] Writing 11582 bytes to generate_saved_runs_BPTT_CDM.py
awk: cmd. line:1: /###Truncate/ <IPython.core.autocall.ZMQExitAutocall object at 0x2b21e1cf9970> <built-in function print>
awk: cmd. line:1:                       ^ syntax error
awk: cmd. line:1: /###Truncate/ <IPython.core.autocall.ZMQExitAutocall object at 0x2b21e1cf9970> <built-in function print>
awk: cmd. line:1:                                                                                ^ syntax error


In [8]:
###Submit job to cluster
n_jobs = len(micro_configs)
write_job_file(job_name, py_file_name='{}.py'.format(job_name))
job_script_path = os.path.join(project_dir, 'job_scripts', job_name + '.s')
job_id_1 = submit_job(job_script_path, n_jobs)

rm: cannot remove ‘/home/om2382/learning-dynamics/results/misc/generate_BPTT_CDM_b200_runs/result_*’: No such file or directory
sending incremental file list

sent 3,494 bytes  received 34 bytes  2,352.00 bytes/sec
total size is 441,230  speedup is 125.07


In [13]:
###Get job status
get_ipython().system('squeue -u om2382')

             JOBID PARTITION     NAME     USER ST       TIME  NODES NODELIST(REASON)
           1122084    lkumar  jupyter   om2382  R      25:10      1 ax16


In [None]:
!ls -t ../job_scripts/

In [None]:
project_dir = '/home/om2382/learning-dynamics/'
job_name = 'generate_RFLO_CDM_runs'
job_script_path = os.path.join(project_dir, 'job_scripts', job_name + '.s')
configs_array, results_array, key_order, sim_dict = unpack_processed_data(job_script_path, username='om2382')

In [None]:
### --- Plot examples with one task --- ###
task_dict = {}
for key in sim_dict.keys():
    #T_context = int(key.split('_')[-1])
    task_dict[key] = Context_Dependent_Decision_Task(T_trial=100, input_var=0.1, report_cue=True, tonic_report=True)
#data = task.gen_data(1000, 1000) #generate data from task
fig = plot_1d_or_2d_array_of_config_examples(configs_array, results_array,
                                             key_order, sim_dict, data=data,
                                             task_dict=task_dict, N_task_data=3000,
                                             xlim=[0, 3000], trace_spacing=3,
                                             output_scale=20)

In [None]:
fig = plot_2d_array_of_config_results(configs_array, results_array, key_order)#, tick_rounding=None)

In [None]:
fig = plot_3d_or_4d_array_of_config_results(configs_array, results_array, key_order,
                                            tick_rounding=5, vmin=0, vmax=0.005)

In [None]:
sim_dict.keys()

In [None]:
### --- COLORS --- ###

def hex_to_rgb(value):
    value = value.lstrip('#')
    lv = len(value)
    return list(int(value[i:i + lv // 3], 16) for i in range(0, lv, lv // 3))

def rgb_to_hex(rgb):
    return '#%02x%02x%02x' % rgb

def linearly_interpolate_colors(col1, col2, N):

    if '#' in col1:
        c1 = hex_to_rgb(col1)
    if '#' in col2:
        c2 = hex_to_rgb(col2)

    cols = np.linspace(c1, c2, N).astype(int)
    return [rgb_to_hex(tuple(c)) for c in cols]

In [None]:
plt.plot(data['probe']['X'][:,0])
plt.plot(data['probe']['X'][:,1])
plt.plot(data['probe']['X'][:,2])
plt.plot(data['probe']['X'][:,3])
plt.plot(data['probe']['X'][:,4])

In [None]:
### --- PLOT FINAL TOPOLOGIES IN SPACE --- ###
colors = ['#0000FF', '#D62728']
col1 = '#FF0000'
col2 = '#0000FF'
col3 = '#00FF00'
col4 = '#FFFF00'
probe_colors = linearly_interpolate_colors(col1, col2, 10) + linearly_interpolate_colors(col3, col4, 10)
data = task.gen_data(0, 1000)
#probe_colors = ['C{}'.format(i) for i in range(10)] + ['k']
for i_key, key in enumerate(sim_dict):

    #if key.split('_')[-1] != '0':
    #    continue
    if key != '0.001_0.03_0':
        continue
    #if '500000' not in key:
    #    continue
    sim = sim_dict[key]
    cp = sim.analyzed_checkpoint_1
    ssa = State_Space_Analysis(cp, data, n_PCs=2)
    ssa = plot_checkpoint_results(cp, data, ssa,
                                  plot_fixed_points=True,
                                  plot_cluster_means=True,
                                  plot_graph_structure=False,
                                  plot_test_points=False,
                                  n_test_samples=None,
                                  T_per_sample=100,
                                  test_alpha=1,
                                  plot_probe=True,
                                  n_probes=20,
                                  probe_colors=probe_colors,
                                  eig_norm_color=True,
                                  graph_key='adjmat_input_1')
    W_in_ = cp['rnn'].W_in[:,0]
    W_out_ = cp['rnn'].W_out[0]
    w_in = ssa.transform(W_in_)
    w_in = w_in / norm(w_in)
    w_out = ssa.transform(W_out_)
    w_out = w_out / norm(w_out)
    #plt.title(key + ', in-out-alignment = {}'.format(normalized_dot_product(W_in_, W_out_)))
    plt.title(key)
    if True:
        #ssa.ax.plot([-w_in[0], w_in[0]],
        #            [-w_in[1], w_in[1]], color='k', linewidth=2)
        ssa.ax.plot([-w_out[0], w_out[0]],
                    [-w_out[1], w_out[1]], color='g', linewidth=2)
        window_size = 1.2
        #ssa.ax.set_ylim([-window_size, window_size])
        #ssa.ax.set_xlim([-window_size, window_size])
    #ssa.fig.savefig('figs/DB_input_{}.pdf'.format(i_key))

In [None]:
np.savetxt('FPs.csv', sim_dict['3_0.1_0'].analyzed_checkpoint['fixed_points'])

In [None]:
data = sim_dict['3_0.1_0'].analyzed_checkpoint['fixed_points']

In [None]:
import numpy as np
from sklearn.manifold import TSNE
from sklearn.cluster import DBSCAN

# Initialize lists to store the results
num_clusters = []
num_noise_points = []

# Iterate over the seeds
for seed in range(10):
    # Run t-SNE with the current seed
    tsne = TSNE(n_components=2, random_state=seed)
    tsne_data = tsne.fit_transform(data)
    
    # Run DBSCAN with eps=4.6 and min_samples=5
    dbscan = DBSCAN(eps=4.6, min_samples=5)
    db_labels = dbscan.fit_predict(tsne_data)
    
    # Calculate the number of clusters and the number of noise points
    clusters = len(set(db_labels)) - (1 if -1 in db_labels else 0)
    noise_points = list(db_labels).count(-1)
    
    # Store the results
    num_clusters.append(clusters)
    num_noise_points.append(noise_points)

# Display the results
print(num_clusters)
print(num_noise_points)


In [None]:
sim_dict.keys()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.manifold import TSNE
from sklearn.cluster import DBSCAN

# Initialize lists to store the results
num_clusters = []
num_noise_points = []

# Create a figure with 7 subplots
for method in ['PCA1', 'PCA2', 'TSNE']:
    fig, axes = plt.subplots(2, 5, figsize=(20, 10))
    keys = [k for k in list(sim_dict.keys()) if k.split('_')[0]=='128']
    for i in range(10):
        print(i)

        data = sim_dict[keys[i]].analyzed_checkpoint['fixed_points']
        # Run t-SNE with the current seed
        if method == 'TSNE':
            tsne = TSNE(n_components=2, random_state=42, perplexity=30.0)
            tsne_data = tsne.fit_transform(data)
        #random projection instead
        #if i< 9:
        #    np.random.seed(seed)
        #    U = np.linalg.qr(np.random.normal(0, 1, (data.shape[1], data.shape[1])))[0][:,:2]
        #elif i==9:
        elif method == 'PCA1':
            U = np.linalg.svd(data)[-1][:,:2]
            tsne_data = data.dot(U)
        elif method == 'PCA2':
            U = np.linalg.svd(data)[-1][:2]
            tsne_data = data.dot(U.T)

        # Run DBSCAN with eps=4.6 and min_samples=5
        dbscan = DBSCAN(eps=5, min_samples=5)
        db_labels = dbscan.fit_predict(tsne_data)

        # Calculate the number of clusters and the number of noise points
        clusters = len(set(db_labels)) - (1 if -1 in db_labels else 0)
        noise_points = list(db_labels).count(-1)

        # Store the results
        num_clusters.append(clusters)
        num_noise_points.append(noise_points)

        # Plot the t-SNE visualization for this seed
        ax = axes[i // 5, i % 5]
        scatter = ax.scatter(tsne_data[:, 0], tsne_data[:, 1], c=db_labels, cmap='Spectral', s=5)
        ax.set_title(f'Seed {0}, Clusters {clusters}, Noise {noise_points}, {keys[i]}')
        legend1 = ax.legend(*scatter.legend_elements(), title="Clusters")
        ax.add_artist(legend1)

    # Show the plots
    plt.tight_layout()
    #plt.suptitle(f'perp = {perplexity}')
    plt.show()


In [None]:
from pcurve.principle_curve import PrincipleCurve