In [151]:
import mdptoolbox as mdpt, numpy as np
import mdptoolbox.example
import MDP

Suppose, given a transition function and discount rate, we generate a random reward function over all transitions. We then sparsify the reward function by setting some proportion (e.g. 10%) of the transition values to 0. We then generate the optimal policy for said reward function (using, for instance, policy iteration). We now attempt to build a model that can predict the sparsity used to generate the optimal policy given the transition function, discount rate, and policy itself, but *not* the reward function, as otherwise the problem would be trivial.

In [182]:
### Generate a bunch of MDPs with different parameters, sparsity

NUM_MDPs = 1000
NUM_STATES = 10
NUM_ACTIONS = 4

def get_transition_matrix(num_states, num_actions, generator = np.random.dirichlet):
    """
    Returns a transition matrix for a given number of states and actions
    
    Returns:
        P: (num_actions, num_states, num_states) array, where P[a, s, s'] is the probability of 
        transitioning from state s to state s' given action a
    """
    P = np.zeros((num_actions, num_states, num_states)) # (A, S, S) shape
    for a in range(num_actions):
        for s in range(num_states):
            P[a, s, :] = generator(np.ones(num_states))
    return P

def get_reward_matrix(num_states, num_actions, sparsity = 0.0, generator = np.random.normal,
                      only_pos_rewards = False):
    """
    Returns a reward matrix for a given number of states and actions
    [Fix 2/27/24: sparsity should be deterministic, while sparse rewards should be in random order]
    """
    num_sparse_rewards = int(sparsity * num_actions * num_states ** 2)
    rewards = np.array([(0 if i < num_sparse_rewards else (abs(generator()) if only_pos_rewards else generator())) 
                        for i in range(num_actions * num_states ** 2)])
    np.random.shuffle(rewards)
    return rewards.reshape((num_actions, num_states, num_states))

def get_reward_matrix_variance(num_states, num_actions, variance_level = 0.0, sparse_var = 10.0, dense_var = 1.0, 
                               generator = np.random.normal, only_pos_rewards = False):
    """
    Returns a reward matrix for a given number of states and actions
    Coherent/sparse rewards are generated with a higher variance
    """
    num_var_rewards = int(variance_level * num_actions * num_states ** 2)
    rewards = np.array([((generator(0, sparse_var) if not only_pos_rewards else abs(generator(0, sparse_var))) if i < num_var_rewards else 
                        (generator(0, sparse_var) if not only_pos_rewards else abs(generator(0, sparse_var))))
                        for i in range(num_actions * num_states ** 2)])
    np.random.shuffle(rewards)
    return rewards.reshape((num_actions, num_states, num_states))

DISCOUNT = 0.9
EPSILON = 0.01 # roughly indicates the "skill level" of the agent
MAX_ITER = 1000

The sparsity levels generated by generate_tests are divided using arange from 0 to 1 and then scrambled randomly, meaning that in effect each sparsity level in the training and test sets is sampled uniformly from [0, 1].

In [179]:
def generate_tests(num_mdps = NUM_MDPs, sparsity_levels = None, mdp_generator = mdpt.mdp.PolicyIteration, 
                   P_generator = None, var_or_sparsity = "sparsity", only_pos_rewards = False):
    """
    Generate a bunch of MDPs with different sparsity levels, and return the sparsity levels and the MDPs

    Args:
        sparsity_levels: a list of sparsity levels to generate MDPs with
    Returns:
        sparsity_levels: the sparsity levels used to generate the MDPs, in the same order as the MDPs
        MDPS: an array of MDPs
    """
    sparsity_levels = sparsity_levels if sparsity_levels is not None else np.arange(num_mdps) / num_mdps
    sparsity_copy = sparsity_levels.copy() # defensive copy
    np.random.shuffle(sparsity_copy)
    reward_matrix = get_reward_matrix_variance if var_or_sparsity == "variance" else get_reward_matrix
    MDPS = np.array([mdp_generator(
        get_transition_matrix(NUM_STATES, NUM_ACTIONS) if P_generator is None else P_generator(NUM_STATES, NUM_ACTIONS), 
        reward_matrix(NUM_STATES, NUM_ACTIONS, sparsity_copy[i], only_pos_rewards = only_pos_rewards), 
        DISCOUNT, max_iter = MAX_ITER) 
        for i in range(num_mdps)
    ])
    return sparsity_copy, MDPS

sparsity_levels, MDPS = generate_tests()
for mdp in MDPS:
    mdp.run()
    # print(mdp.policy) # debug
# print(MDPS[0].policy) # debug

In [180]:
### Idea 1: neural network
# Thanks again ChatGPT for outlining the code structure

def fixed_P_generator(num_states, num_actions):
    """
    Returns a fixed transition matrix for a given number of states and actions
    (Ideally something we hope will give interesting results, like having some states be absorbing)
    """
    P = np.zeros((num_actions, num_states, num_states)) # (A, S, S') shape
    for a in range(num_actions):
        for s in range(num_states):
            P[a, s, (s + 1) % num_states] = 1
    return P

def sparse_P_generator(num_states, num_actions):
    """
    Returns a sparse transition matrix for a given number of states and actions
    
    Returns:
        P: (num_actions, num_states, num_states) array, where P[a, s, s'] is the probability of 
        transitioning from state s to state s' given action a
    """
    P = np.zeros((num_actions, num_states, num_states)) # (A, S, S') shape
    for a in range(num_actions):
        for s in range(num_states):
            P[a, s, np.random.randint(num_states)] = 1
    return P

sparsity, MDPs = generate_tests(10000, P_generator = sparse_P_generator)
# print(np.array(MDPs[0].P).shape)
training_data = [(np.array(mdp.P), mdp.discount, mdp.policy, sparsity[i]) for i, mdp in enumerate(MDPs)]

from sklearn.preprocessing import OneHotEncoder
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

import warnings
warnings.filterwarnings("ignore")

# Step 1: Feature extraction function
def extract_features(transition_function, discount_rate, optimal_policy):
    """
    Extract features from the MDP's transition function, discount rate, and optimal policy
    """
    # opt_policy = optimal_policy.reshape(-1, 1)  # Reshape for sklearn which expects 2D input

    # # Initialize the OneHotEncoder
    # encoder = OneHotEncoder(sparse=False)  # Use sparse=False to get a dense array

    # # Fit and transform
    # opt_policy_one_hot = encoder.fit_transform(opt_policy)

    features = np.concatenate((transition_function.flatten(), [discount_rate], optimal_policy.flatten()))
    # print(features.shape)
    # length A*S*S + 1 + A*S

    # Placeholder features
    # features = np.random.rand(411)

    # Policy-only features
    # features = optimal_policy
    return features

# Step 2: Data preparation (assuming you have your data in an appropriate format)
# This is a placeholder function - you would replace it with actual data loading and processing
def prepare_data(training_data):
    features = []
    labels = []
    for transition_function, discount_rate, optimal_policy, sparsity_level in training_data:
        features.append(extract_features(transition_function, discount_rate, optimal_policy))
        labels.append(sparsity_level)
    return np.array(features), np.array(labels)

# Step 3: Model selection

def build_model(input_dim):
    model = Sequential([
        Dense(64, activation='relu', input_shape=(input_dim,)),
        Dropout(0.2),
        Dense(64, activation='relu'),
        Dropout(0.2),
        Dense(64, activation='relu'),
        Dropout(0.2),
        Dense(1, activation='linear')  # Linear activation for regression output
    ])

    # Num parameters: 411*64 + 64 + 64*64 + 64 + 64*64 + 64 + 64*1 + 1 = ~26500
    # Num data points: 100000
    
    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='mean_squared_error',  # Suitable for regression
                  metrics=['mae'])  # Mean Absolute Error as an additional metric
    # ``loss" refers to training data, ``val_loss" refers to validation data
    return model

features, labels = prepare_data(training_data)
# Example: features shape is (num_samples, num_features), adjust 'input_dim' accordingly
input_dim = features.shape[1]  # Assuming 'features' is already defined and preprocessed

model = build_model(input_dim)

# Training the model
model.fit(features, labels, epochs=100, validation_split=0.2, verbose = 1, 
          callbacks=[tf.keras.callbacks.EarlyStopping(patience=3)])

# Don't forget to preprocess your new data before making predictions
# predicted_sparsity = model.predict(new_features)

# Step 5: Prediction function
def predict_sparsity(transition_function, discount_rate, optimal_policy):
    features = extract_features(transition_function, discount_rate, optimal_policy).reshape(1, -1)
    predicted_sparsity = model(features) # more efficient than .predict() for single samples
    return predicted_sparsity

# Testing model
test_sparsity, test_MDPs = generate_tests(10000, P_generator=sparse_P_generator)
test_data = [(np.array(mdp.P), mdp.discount, mdp.policy) for mdp in (test_MDPs)]
NUM_TESTS = 1000
mse = np.zeros(min(NUM_TESTS, len(test_data)))

for i in range(min(NUM_TESTS, len(test_data))):
    transition_function, discount_rate, optimal_policy = test_data[i]
    prediction = predict_sparsity(transition_function, discount_rate, optimal_policy)[0][0]
    mse[i] = (prediction - test_sparsity[i])**2
    # print(f"Predicted sparsity level for MDP {i+1}: {prediction}, actual sparsity level: {test_sparsity[i]}, Squared error: {mse[i]}")

print(f"Mean squared error: {np.mean(mse)}, sample size: {min(NUM_TESTS, len(test_data))}")
print("Expected squared error: when x, y ~ U[0, 1], E[(x-y)^2] = 1/12 = 0.0833...")

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Mean squared error: 0.04151206620225779, sample size: 1000
Expected squared error: when x, y ~ U[0, 1], E[(x-y)^2] = 1/12 = 0.0833...


With ten actions:
- As a control, when the input layer (with same dimension as transition_function + discount rate + optimal policy) is randomized, MSE = ~0.115
- I should also note that I'm choosing hyperparameters here in a rather unprincipled way by guess-timating their effects on the model
- The loss seems to settle around 0.033 after ~20% into each epoch when given 10^5 training points 
    - Maybe there's some sort of irreducible randomness going on when you randomize the reward function and don't pass it into the models?
- When reward is defined over (A, S, S') (i.e. all transitions) instead of state-action pairs, loss rises to ~0.076, i.e. basically random

With 100 actions:
- ~~Model does slightly better (now ~0.028); maybe patterns in MDP/sparsity become more apparent with more states?~~ There was an error in how I calculated the MSE here, so now I'm not sure
- In terms of computation, generating the MDPs takes a lot longer than training the model
    - Increasing epsilon doesn't improve MDP generation/solving time (it actually makes it worse for some reason); I assume then that most of the calculation is in generating the MDPs themselves

With deterministic MDPs: 
- I initially had an error that made the validation and test loss very different; it turns out that my test set was from denser MDPs, which actually tells us that the those respective models are fundamentally different
- Even when the reward function is defined over transitions, the models with deterministic MDPs approach ~0.033 (as we would expect).

In [167]:
### Idea 2: Multiple linear regression 

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error

def extract_features(transition_function, discount_rate, optimal_policy):
    """
    Extract features from the MDP's transition function, discount rate, and optimal policy
    """
    # features = np.concatenate((transition_function.flatten(), [discount_rate], optimal_policy.flatten()))
    features = optimal_policy
    return features

sparsity, MDPs = generate_tests(10000, P_generator = sparse_P_generator)
# print(np.array(MDPs[0].P).shape)
training_data = [(np.array(mdp.P), mdp.discount, mdp.policy, sparsity[i]) for i, mdp in enumerate(MDPs)]
# print(sparsity)
features, labels = prepare_data(training_data)
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Create a model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean squared error: {mse}")
print("Expected squared error: when x, y ~ U[0, 1], E[(x-y)^2] = 1/12 = 0.0833...")
print(f"Mean absolute error: {mae}")

model.coef_

Mean squared error: 0.03490242308421741
Expected squared error: when x, y ~ U[0, 1], E[(x-y)^2] = 1/12 = 0.0833...
Mean absolute error: 0.14758464850651995


array([-0.04306978, -0.04014652, -0.03965629, -0.04077469, -0.04082115,
       -0.03976853, -0.03893425, -0.03873499, -0.04249063, -0.04097574])

- Interesting! When I increased the number of states from 10 to 100 and *decreased* the number of training data points from 10^5 to 10^4, test loss *decreased* from ~0.033 to ~0.014 and stayed that way with training data = 10^3
- When I increased the number of actions from 4 to 40, both training methods got higher MSE (~0.07) with 1000 data samples

In [156]:
### Idea 3: hand-crafted features

NUM_ACTIONS = 5
NUM_TRAINING_SAMPLES = 10000
from sklearn.preprocessing import OneHotEncoder, normalize
def extract_features(transition_function, discount_rate, optimal_policy):
    """
    Extract features from the MDP's transition function, discount rate, and optimal policy
    Test features that I think might be relevant
    - Sparsity of the transition function
    - Number and length of loops
    - Distance to loops/absorbing states
    - Number of absorbing states
    - Number of states that are never visited
    - Number of states with lots of outward transitions
    """
    transition_sparsity = np.mean(transition_function == 0)
    num_loops = 0
    loop_lengths = []
    for s in range(transition_function.shape[1]):
        exists_loop = [transition_function[a, s, s] > 0.5 for a in range(transition_function.shape[0])]
        a = np.argmax(exists_loop)
        if exists_loop[a]:
            # checking if there exists an action that leads to the same state with probability > 0.5
            num_loops += 1
            loop_length = 1
            next_state = np.argmax(transition_function[a, s, :])
            while next_state != s:
                loop_length += 1
                next_state = np.argmax(transition_function[a, next_state, :])
            loop_lengths.append(loop_length)
    avg_loop_length = np.mean(loop_lengths) if len(loop_lengths) > 0 else 0

    # Policy features
    encoder = OneHotEncoder(sparse = False, drop = 'first')
    # Drop first to avoid multicollinearity, large coefficients
    encoder.fit(np.arange(NUM_ACTIONS).reshape(-1, 1))
    # print(encoder.categories_)
    # print(optimal_policy)
    optimal_policy = encoder.transform(optimal_policy.reshape(-1, 1)).reshape(-1)
    # in features[0:4] we have the one-hot encoding of the first action (only one of them is 1)
    features = optimal_policy
    # features = 

    # features = normalize(np.array([transition_sparsity, num_loops, avg_loop_length]).reshape(-1, 1), axis=0).reshape(-1)
    # features = np.append(np.array([transition_sparsity, num_loops, avg_loop_length]), optimal_policy)
    # features = np.concatenate((transition_function.flatten(), [discount_rate]))
    # print(features)
    return features

### Neural network
# Data generation
sparsity, MDPs = generate_tests(NUM_TRAINING_SAMPLES, P_generator = fixed_P_generator)
# print(np.array(MDPs[0].P).shape)
training_data = [(np.array(mdp.P), mdp.discount, mdp.policy, sparsity[i]) for i, mdp in enumerate(MDPs)]

features, labels = prepare_data(training_data)
# Example: features shape is (num_samples, num_features), adjust 'input_dim' accordingly
input_dim = features.shape[1]  # Assuming 'features' is already defined and preprocessed

model = build_model(input_dim)

# Training the model
model.fit(features, labels, epochs=100, validation_split=0.2, verbose = 1, 
          callbacks=[tf.keras.callbacks.EarlyStopping(patience=3)])
# print([(labels[i], model.predict(features[i].reshape(1, -1))) for i in range(10)])


### Multiple linear regression
features, labels = prepare_data(training_data)
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Create a model
model_lin = LinearRegression()
model_lin.fit(X_train, y_train)

# Make predictions
y_pred = model_lin.predict(X_test)

# Evaluate the model
print([(y_test[i], y_pred[i], sum(X_test[i] * model_lin.coef_) + model_lin.intercept_) for i in range(10)])
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
print("Linear regression:")
print(f"Mean squared error: {mse}")
print("Expected squared error: when x, y ~ U[0, 1], E[(x-y)^2] = 1/12 = 0.0833...")
print(f"Mean absolute error: {mae}")

# show the coefficients
model_lin.coef_, model_lin.intercept_

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
[(0.7074, 0.6317900612971554, 0.6317900612971554), (0.0398, 0.29808888874576056, 0.29808888874576056), (0.8858, 0.5393718413223024, 0.5393718413223025), (0.7344, 0.6206758799947386, 0.6206758799947386), (0.0505, 0.35954237502618147, 0.3595423750261816), (0.1539, 0.632874573268676, 0.632874573268676), (0.8491, 0.7700441493010938, 0.7700441493010938), (0.6203, 0.36663316561725523, 0.36663316561725523), (0.2993, 0.25877315940029944, 0.2587731594002993), (0.9012, 0.739812277512853, 0.739812277512853)]
Linear regression:
Mean squared error: 0.03204490490503865
Expected squared error: when x, y ~ U[0, 1], E[(x-y)^2] = 1/12 = 0.0833...
Mean absolute error: 0.14189683452151353


(array([-0.06933592, -0.09891014, -0.09879985, -0.09483585, -0.06172479,
        -0.08538418, -0.09635115, -0.09529149, -0.06706835, -0.0913653 ,
        -0.09194442, -0.09064689, -0.06276526, -0.08421077, -0.08929158,
        -0.09045069, -0.07282011, -0.09955749, -0.09411468, -0.09492564,
        -0.07352159, -0.07047249, -0.0947746 , -0.08861097, -0.07493727,
        -0.09890601, -0.1027882 , -0.09765226, -0.0712478 , -0.09460538,
        -0.09854439, -0.08946201, -0.07123492, -0.08570229, -0.08671676,
        -0.08818883, -0.07532073, -0.08574614, -0.10410712, -0.09781005]),
 1.0133928824772653)

In [157]:
# Check that all the fixed MDPs are equal
assert all([np.array_equal(mdp[0], training_data[0][0]) for mdp in training_data])

- Tossing in only features of the MDP (loop length, etc.) doesn't seem to help (~near-random MSE), but including the policy immediately jumps to 0.033 again
- On linear regression when one-hot encoding is applied to just the optimal policy, the coefficients are the same for every chunk of four elements, and they're all very large (magnitude ~1E10-5E12) for some reason
- Training on just the transition function + discount rate gives basically random results

In [158]:
""" Saving coefficient outputs
First run:
array([-1.30675583e+11, -1.30675583e+11, -1.30675583e+11, -1.30675583e+11,
        2.25297886e+11,  2.25297886e+11,  2.25297886e+11,  2.25297886e+11,
       -1.25434170e+10, -1.25434170e+10, -1.25434170e+10, -1.25434170e+10,
        7.59830556e+11,  7.59830556e+11,  7.59830556e+11,  7.59830556e+11,
        2.88124182e+11,  2.88124182e+11,  2.88124182e+11,  2.88124182e+11,
       -2.20636912e+12, -2.20636912e+12, -2.20636912e+12, -2.20636912e+12,
        1.17074647e+12,  1.17074647e+12,  1.17074647e+12,  1.17074647e+12,
       -1.08094503e+12, -1.08094503e+12, -1.08094503e+12, -1.08094503e+12,
        4.57643503e+11,  4.57643503e+11,  4.57643503e+11,  4.57643503e+11,
       -1.72875563e+12, -1.72875563e+12, -1.72875563e+12, -1.72875563e+12])
Second run:
array([ 6.42943536e+10,  6.42943536e+10,  6.42943536e+10,  6.42943536e+10,
        4.38543769e+10,  4.38543769e+10,  4.38543769e+10,  4.38543769e+10,
        1.25801626e+11,  1.25801626e+11,  1.25801626e+11,  1.25801626e+11,
        2.11727524e+11,  2.11727524e+11,  2.11727524e+11,  2.11727524e+11,
        2.01145141e+09,  2.01145141e+09,  2.01145141e+09,  2.01145141e+09,
       -7.43402751e+11, -7.43402751e+11, -7.43402751e+11, -7.43402751e+11,
        8.19022461e+11,  8.19022461e+11,  8.19022461e+11,  8.19022461e+11,
       -2.38391058e+12, -2.38391058e+12, -2.38391058e+12, -2.38391058e+12,
        5.12756087e+11,  5.12756087e+11,  5.12756087e+11,  5.12756087e+11,
       -1.69323492e+11, -1.69323492e+11, -1.69323492e+11, -1.69323492e+11])
After fixing multicollinearity:
(array([-0.06926151, -0.08675559, -0.10041903, -0.07555895, -0.09954824,
        -0.10887027, -0.06850544, -0.08640547, -0.09076514, -0.05552694,
        -0.08782644, -0.09442183, -0.08313579, -0.10442827, -0.1021993 ,
        -0.06599209, -0.08373147, -0.09186291, -0.07417732, -0.10491376,
        -0.10016291, -0.07514404, -0.08543159, -0.09656789, -0.06529272,
        -0.08765322, -0.10928858, -0.08787039, -0.10028769, -0.09871483]),
 intercept = 0.9679213745503301)

 TODO: implement automated hyperparameter tuning
"""

' Saving coefficient outputs\nFirst run:\narray([-1.30675583e+11, -1.30675583e+11, -1.30675583e+11, -1.30675583e+11,\n        2.25297886e+11,  2.25297886e+11,  2.25297886e+11,  2.25297886e+11,\n       -1.25434170e+10, -1.25434170e+10, -1.25434170e+10, -1.25434170e+10,\n        7.59830556e+11,  7.59830556e+11,  7.59830556e+11,  7.59830556e+11,\n        2.88124182e+11,  2.88124182e+11,  2.88124182e+11,  2.88124182e+11,\n       -2.20636912e+12, -2.20636912e+12, -2.20636912e+12, -2.20636912e+12,\n        1.17074647e+12,  1.17074647e+12,  1.17074647e+12,  1.17074647e+12,\n       -1.08094503e+12, -1.08094503e+12, -1.08094503e+12, -1.08094503e+12,\n        4.57643503e+11,  4.57643503e+11,  4.57643503e+11,  4.57643503e+11,\n       -1.72875563e+12, -1.72875563e+12, -1.72875563e+12, -1.72875563e+12])\nSecond run:\narray([ 6.42943536e+10,  6.42943536e+10,  6.42943536e+10,  6.42943536e+10,\n        4.38543769e+10,  4.38543769e+10,  4.38543769e+10,  4.38543769e+10,\n        1.25801626e+11,  1.25801

In [176]:
### Idea 3.5: Hyperparameter tuning

from tensorflow.keras.optimizers import Adam

def build_model(n_layers=1, n_units=64, dropout_rate=0.5, learning_rate=0.001, input_dim = NUM_STATES * (NUM_ACTIONS - 1)):
    model = Sequential()
    model.add(Dense(n_units, activation='relu', input_shape=(input_dim,)))  # Assuming input_shape is defined
    for _ in range(n_layers - 1):
        model.add(Dense(n_units, activation='relu'))
        model.add(Dropout(dropout_rate))
    model.add(Dense(1, activation='sigmoid'))  # Assuming binary classification or regression
    model.compile(optimizer=Adam(learning_rate=learning_rate),
                  loss='mean_squared_error',  # or 'mean_squared_error' for regression
                  metrics=['accuracy'])  # or other metrics for regression
    return model

from kerastuner import HyperModel, RandomSearch

class MyHyperModel(HyperModel):
    def build(self, hp):
        return build_model(
            n_layers=hp.Int('n_layers', 1, 5),  # Number of layers
            n_units=hp.Int('n_units', 32, 256),  # Number of units per layer
            dropout_rate=hp.Float('dropout_rate', 0.1, 0.5),  # Dropout rate
            learning_rate=hp.Float('learning_rate', 1e-4, 1e-2)  # Learning rate
        )

hypermodel = MyHyperModel()

tuner = RandomSearch(
    hypermodel,
    objective='val_loss',  # or 'val_accuracy' for classification
    max_trials=20,  # Number of trials to run
    executions_per_trial=2,  # Number of models to build and fit for each trial
    directory='my_dir',  # Directory to save logs and models
    project_name='sparsity_prediction'
)

tuner.search(X_train, y_train, epochs=20, validation_split=0.2)

Reloading Tuner from my_dir\sparsity_prediction\tuner0.json


In [175]:
### Testing variance definition of coherence, only positive rewards

### Neural network
# Data generation
sparsity1, MDPs1 = generate_tests(NUM_TRAINING_SAMPLES, P_generator = sparse_P_generator, 
                                  var_or_sparsity = "variance")
sparsity2, MDPs2 = generate_tests(NUM_TRAINING_SAMPLES, P_generator = sparse_P_generator,
                                  only_pos_rewards = True)

features1, labels1 = prepare_data([(np.array(mdp.P), mdp.discount, mdp.policy, sparsity1[i]) for i, mdp in enumerate(MDPs1)])
features2, labels2 = prepare_data([(np.array(mdp.P), mdp.discount, mdp.policy, sparsity2[i]) for i, mdp in enumerate(MDPs2)])

X_train1, X_test1, y_train1, y_test1 = train_test_split(features1, labels1, test_size=0.2, random_state=42)
X_train2, X_test2, y_train2, y_test2 = train_test_split(features2, labels2, test_size=0.2, random_state=42)
model_var = LinearRegression()
model_pos = LinearRegression()
model_var.fit(X_train1, y_train1)
model_pos.fit(X_train2, y_train2)

# Make predictions
y_pred1 = model_var.predict(X_test1)
y_pred2 = model_pos.predict(X_test2)

# Evaluate the model
mse1 = mean_squared_error(y_test1, y_pred1)
mae1 = mean_absolute_error(y_test1, y_pred1)
print(f"Mean squared error: {mse1}, mean absolute error: {mae1}")
mse2 = mean_squared_error(y_test2, y_pred2)
mae2 = mean_absolute_error(y_test2, y_pred2)
print(f"Mean squared error: {mse2}, mean absolute error: {mae2}")


Mean squared error: 0.08041135458219313, mean absolute error: 0.24394241106752718
Mean squared error: 0.04654428173014684, mean absolute error: 0.17532788010060416


- Naively forcing all rewards to be positive via the abs() function decreases classifier accuracy
- Naively using the variance definition of coherence destroys classifier accuracy (although I'm probably not doing it right), regardless of whether abs() is used or not
    - Not sure why this is happening

In [181]:
### Neural network
# Data generation
sparsity, MDPs = generate_tests(NUM_TRAINING_SAMPLES, P_generator = fixed_P_generator,
                                var_or_sparsity="variance")
# print(np.array(MDPs[0].P).shape)
training_data = [(np.array(mdp.P), mdp.discount, mdp.policy, sparsity[i]) for i, mdp in enumerate(MDPs)]

features, labels = prepare_data(training_data)
# Example: features shape is (num_samples, num_features), adjust 'input_dim' accordingly
input_dim = features.shape[1]  # Assuming 'features' is already defined and preprocessed

model = build_model(input_dim)

# Training the model
model.fit(features, labels, epochs=100, validation_split=0.2, verbose = 1, 
          callbacks=[tf.keras.callbacks.EarlyStopping(patience=3)])
model.loss

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100


<keras.callbacks.History at 0x2ca3be9fe20>

In [194]:
### Search over hyperparameters of training data
import itertools

params = list(itertools.product(["variance", "sparsity"], [False, True], [fixed_P_generator, sparse_P_generator, None], [1000, 10000]))
training_param_results = {}

for param in params:
    var_or_sparsity, only_pos_rewards, P_generator, num_training_samples = param
    P_generator_str = P_generator.__name__ if P_generator is not None else "Dense"
    # Train neural network
    sparsity, MDPs = generate_tests(num_training_samples, P_generator = P_generator, 
                                    var_or_sparsity = var_or_sparsity, only_pos_rewards = only_pos_rewards)
    features, labels = prepare_data([(np.array(mdp.P), mdp.discount, mdp.policy, sparsity[i]) for i, mdp in enumerate(MDPs)])
    input_dim = features.shape[1]
    model = build_model(input_dim)
    model.fit(features, labels, epochs=100, validation_split=0.2, verbose = 0, 
              callbacks=[tf.keras.callbacks.EarlyStopping(patience=3)])
    
    # Test data set for NN, training data set for linear regression
    sparsity2, MDPs2 = generate_tests(num_training_samples, P_generator = P_generator,
                                    var_or_sparsity = var_or_sparsity, only_pos_rewards = only_pos_rewards)
    training_data = [(np.array(mdp.P), mdp.discount, mdp.policy, sparsity2[i]) for i, mdp in enumerate(MDPs2)]
    features, labels = prepare_data(training_data)
    nn_loss = model.evaluate(features, labels, verbose=0)[0]
    print(f"NN loss for coherence = {var_or_sparsity}, only_pos_rewards = {only_pos_rewards}, P_generator = {P_generator_str}, num_training_samples = {num_training_samples}: {nn_loss}")

    # Train, test linear regression
    X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)
    model_lin = LinearRegression()
    model_lin.fit(X_train, y_train)
    y_pred = model_lin.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    print(f"Regression model loss for variance = {var_or_sparsity}, only_pos_rewards = {only_pos_rewards}, P_generator = {P_generator_str}, num_training_samples = {num_training_samples}: {mse}, {mae}")
    
    training_param_results[param] = (nn_loss, mse)


NN loss for coherence = variance, only_pos_rewards = False, P_generator = fixed_P_generator, num_training_samples = 1000: 0.09371180087327957
Regression model loss for variance = variance, only_pos_rewards = False, P_generator = fixed_P_generator, num_training_samples = 1000: 0.09146794253082274, 0.2669223046875
NN loss for coherence = variance, only_pos_rewards = False, P_generator = fixed_P_generator, num_training_samples = 10000: 0.08359464257955551
Regression model loss for variance = variance, only_pos_rewards = False, P_generator = fixed_P_generator, num_training_samples = 10000: 0.0833858502318457, 0.25080793749999997
NN loss for coherence = variance, only_pos_rewards = False, P_generator = fixed_P_generator, num_training_samples = 100000: 0.08333457261323929
Regression model loss for variance = variance, only_pos_rewards = False, P_generator = fixed_P_generator, num_training_samples = 100000: 0.08305501349487752, 0.24915917619921873
NN loss for coherence = variance, only_pos_re

In [195]:
training_param_results

{('variance',
  False,
  <function __main__.fixed_P_generator(num_states, num_actions)>,
  1000): (0.09371180087327957, 0.09146794253082274),
 ('variance',
  False,
  <function __main__.fixed_P_generator(num_states, num_actions)>,
  10000): (0.08359464257955551, 0.0833858502318457),
 ('variance',
  False,
  <function __main__.fixed_P_generator(num_states, num_actions)>,
  100000): (0.08333457261323929, 0.08305501349487752),
 ('variance',
  False,
  <function __main__.sparse_P_generator(num_states, num_actions)>,
  1000): (0.09832251071929932, 0.17186699407470704),
 ('variance',
  False,
  <function __main__.sparse_P_generator(num_states, num_actions)>,
  10000): (0.09385699778795242, 0.08944895859643068),
 ('variance',
  False,
  <function __main__.sparse_P_generator(num_states, num_actions)>,
  100000): (0.08356982469558716, 0.08347383734873394),
 ('variance', False, None, 1000): (0.0955672413110733, 0.13809882730712889),
 ('variance', False, None, 10000): (0.08512832224369049, 0.0895