# Config & Imports

In [1]:
config = {
    'data': {
        'n_datasets': 10_000, # the number of datasets
        
        'n_samples': 5_000, # the number of samples per dataset
        
        'n_features': 10, # the number of features
        
        'n_informative': 10,
        # The number of informative features, i.e., the number of features used to build the linear model used to generate the output.
        
        'n_targets': 1,
        # The number of regression targets, i.e., the dimension of the y output vector associated with a sample. By default, the output is a scalar.
    
        'bias': 0.0,
        # The bias term in the underlying linear model
        
        'effective_rank': 5,
        # if not None:
            # The approximate number of singular vectors required to explain most of the input data by linear combinations. Using this kind of singular spectrum in the input allows the generator to reproduce the correlations often observed in practice.
        # if None:
            # The input set is well conditioned, centered and gaussian with unit variance.
        
        'tail_strength': 0.5,
        # The relative importance of the fat noisy tail of the singular values profile if effective_rank is not None. When a float, it should be between 0 and 1.
        
        'noise': 5,
        # The standard deviation of the gaussian noise applied to the output.
        
        'shuffle': True,
        # Shuffle the samples and the features.
        
        'random_state': None,
        # Determines random number generation for dataset creation. Pass an int for reproducible output across multiple function calls.
    },    
    'lambda': {
        'data_prep': {
            'train_test_val_split': { # refer to sklearn doc
                'test_size': 0.2,
                'val_size': 0.1,
                'random_state': None,
                'shuffle': True,
                'stratify': None
            }
        },
        'model_compile': {
            
        },
        'model_fit': { # refer to keras API
            'batch_size': 32,
            'epochs': 150,
            'verbose': '0',
            'callbacks': None,
            'shuffle': True,
            'class_weight': None,
            'sample_weight': None,
            'initial_epoch': 0,
            'steps_per_epoch': None,
            'validation_steps': None,
            'validation_batch_size': None,
            'validation_freq': 1
        }
    },
    'inets': {
        'data_prep': {
            'train_test_val_split': { # refer to sklearn doc
                'test_size': 0.2,
                'val_size': 0.1,
                'random_state': None,
                'shuffle': True,
                'stratify': None
            }
        },
        'model_compile': {
            
        },
        'model_fit': { # refer to keras API
            'batch_size': 32,
            'epochs': 1000,
            'verbose': 'auto',
            'callbacks': None,
            'shuffle': True,
            'class_weight': None,
            'sample_weight': None,
            'initial_epoch': 0,
            'steps_per_epoch': None,
            'validation_steps': None,
            'validation_batch_size': None,
            'validation_freq': 1
        }
    },
    'computation':{
        'n_jobs': 100,
        'use_gpu': False,
        'gpu_numbers': '4',
        'RANDOM_SEED': 1,   
    }
}

In [2]:
import numpy as np
from pathlib import Path
from sklearn.model_selection import train_test_split

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import BatchNormalization

import utilities

2022-05-18 10:20:47.328469: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-05-18 10:20:47.328506: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [3]:
os.environ['CUDA_VISIBLE_DEVICES'] = config['computation']['gpu_numbers'] if config['computation']['use_gpu'] else ''
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true' if config['computation']['use_gpu'] else ''

os.environ['XLA_FLAGS'] = '--xla_gpu_cuda_data_dir=/usr/local/cuda-11.4' if config['computation']['use_gpu'] else ''#-10.1' #--xla_gpu_cuda_data_dir=/usr/local/cuda, 
os.environ['TF_XLA_FLAGS'] = '--tf_xla_auto_jit=2 ,--tf_xla_enable_xla_devices' if config['computation']['use_gpu'] else ''#'--tf_xla_auto_jit=2' #, --tf_xla_enable_xla_devices

In [4]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
print("Num XLA-GPUs Available: ", len(tf.config.experimental.list_physical_devices('XLA_GPU')))

Num GPUs Available:  0
Num XLA-GPUs Available:  0


2022-05-18 10:20:50.251155: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-05-18 10:20:50.251204: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-05-18 10:20:50.251239: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (dws-02): /proc/driver/nvidia/version does not exist


# Functions

In [5]:
#def load_model(datasetIndex):
 #   # with open(utilities.lambda_path(config, datasetIndex), 'r') as f:
  #  #     model = keras.models.load_model(f)
   # model = keras.models.load_model(utilities.lambda_path(config, datasetIndex))
    #return model

In [6]:
# def get_weightdata(datasetIndex):
  #   weights_list = load_model(datasetIndex).get_weights()
    # return np.concatenate([x.flatten() for x in weights_list])

In [7]:
def load_lambda():
    directory = utilities.lambda_path_LR(config)
    
    with open(directory + '/lambda_weights_list.npy', "rb") as f:
        return np.load(f, allow_pickle=True)

In [8]:
#def get_models():
#    # return np.array([get_weights(i) for i in range(config['data']['n_datasets'])])
#    # return np.array([get_weightdata(i) for i in range(10)])
#    with open(utilities.lambda_path_LR(config, 0) + '.npy', 'rb') as f:
#        weights_list = np.load(f, allow_pickle=True)
#    for i in range(1, config['data']['n_datasets']):
#        with open(utilities.lambda_path(config, i) + '.npy', 'rb') as f:
#            weights = np.load(f, allow_pickle=True)
#        weights_list = np.vstack([weights_list, weights])
#    return weights_list

In [9]:
def get_coef():
    return np.load(utilities.data_path_LR(config) + '/coef_list_targetForInet.npy')

In [10]:
def train_test_val_split(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, 
                                                        y, 
                                                        test_size=config['inets']['data_prep']['train_test_val_split']['test_size'] + config['inets']['data_prep']['train_test_val_split']['val_size'], 
                                                        random_state=config['inets']['data_prep']['train_test_val_split']['random_state'], 
                                                        shuffle=config['inets']['data_prep']['train_test_val_split']['shuffle'], 
                                                        stratify=config['inets']['data_prep']['train_test_val_split']['stratify'])
    X_test, X_val, y__test, y_val = train_test_split(X_test, 
                                                    y_test, 
                                                    test_size=config['inets']['data_prep']['train_test_val_split']['val_size'] / (config['inets']['data_prep']['train_test_val_split']['test_size'] + config['inets']['data_prep']['train_test_val_split']['val_size']), 
                                                    random_state=config['inets']['data_prep']['train_test_val_split']['random_state'], 
                                                    shuffle=config['inets']['data_prep']['train_test_val_split']['shuffle'], 
                                                    stratify=config['inets']['data_prep']['train_test_val_split']['stratify'])
    return X_train, X_test, X_val, y_train, y_test, y_val
    

In [11]:
def save_model(model):
    path = utilities.inet_path_LR(config)
    
    Path(path).mkdir(parents=True, exist_ok=True)
    
    weights_list = model.get_weights()
    
    weights_linearized = np.concatenate([x.flatten() for x in weights_list])
    
    with open(path + '/inet_weights.npy', "wb") as f:
        np.save(f, weights_linearized, allow_pickle=True)

# Model Training

In [12]:
X = load_lambda()

In [13]:
X.shape

(10000, 7261)

In [14]:
y = get_coef()

In [15]:
y.shape

(10000, 10)

In [None]:
# Data Prep
X_train, X_test, X_val, y_train, y_test, y_val = train_test_val_split(X,
                                                                      y)

# Model Def
model = Sequential()
model.add(BatchNormalization(input_dim=X.shape[1]))
model.add(Dense(6000, activation='relu'))
model.add(Dense(5000, activation='relu'))
model.add(Dense(4000, activation='relu'))
model.add(Dense(1000, activation='relu'))
model.add(Dense(y.shape[1], activation='sigmoid'))


model.compile(optimizer='adam', loss='mae', metrics=['mae', keras.metrics.RootMeanSquaredError()])
model.summary()

# Model fit
history = model.fit(x=X_train,
                    y=y_train,
                    batch_size=config['inets']['model_fit']['batch_size'],
                    epochs=config['inets']['model_fit']['epochs'],
                    verbose=config['inets']['model_fit']['verbose'],
                    callbacks=config['inets']['model_fit']['callbacks'],
                    validation_data=(X_val, y_val),
                    shuffle=config['inets']['model_fit']['shuffle'],
                    class_weight=config['inets']['model_fit']['class_weight'],
                    sample_weight=config['inets']['model_fit']['sample_weight'],
                    initial_epoch=config['inets']['model_fit']['initial_epoch'],
                    steps_per_epoch=config['inets']['model_fit']['steps_per_epoch'],
                    validation_steps=config['inets']['model_fit']['validation_steps'],
                    validation_batch_size=config['inets']['model_fit']['validation_batch_size'],
                    validation_freq=config['inets']['model_fit']['validation_freq'],
                   )
print(history.history['val_loss'])


2022-05-18 10:20:52.761959: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 batch_normalization (BatchN  (None, 7261)             29044     
 ormalization)                                                   
                                                                 
 dense (Dense)               (None, 6000)              43572000  
                                                                 
 dense_1 (Dense)             (None, 5000)              30005000  
                                                                 
 dense_2 (Dense)             (None, 4000)              20004000  
                                                                 
 dense_3 (Dense)             (None, 1000)              4001000   
                                                                 
 dense_4 (Dense)             (None, 10)                10010     
                                                        

In [None]:
save_model(model)