In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
from keras.models import Model
from keras.layers import Input, LSTM, Dense, Embedding, Reshape, Concatenate, Multiply
from keras.utils import to_categorical
from keras.regularizers import l2
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from keras.callbacks import EarlyStopping, TensorBoard
from datetime import datetime
import pickle

def r_squared(y_true, y_pred):
    ss_res = tf.reduce_sum(tf.square(y_true - y_pred))
    ss_tot = tf.reduce_sum(tf.square(y_true - tf.reduce_mean(y_true)))
    return 1 - ss_res / (ss_tot + tf.keras.backend.epsilon())

def build_expert(input_shape, num_neurons, dropout_rate, name):
    # Simple LSTM expert model
    input_layer = Input(shape=input_shape, name=f"input_{name}")
    lstm = LSTM(num_neurons, return_sequences=False, name=f"lstm_{name}")(input_layer)
    dropout = tf.keras.layers.Dropout(dropout_rate, name=f"dropout_{name}")(lstm)
    output_layer = Dense(1, activation='linear', name=f"dense_{name}")(dropout)
    return Model(inputs=input_layer, outputs=output_layer, name=f"expert_{name}")

def build_moe_model(num_offices, num_species, num_neurons_per_expert, input_shape, dropout_rate=0.1):
    # Main input for the model
    main_input = Input(shape=input_shape, name="main_input")
    
    # Inputs for office and species, which will determine expert selection
    office_input = Input(shape=(1,), dtype='int32', name="office_input")
    species_input = Input(shape=(1,), dtype='int32', name="species_input")
    
    # Embeddings for office and species
    office_embedding = Embedding(num_offices, num_offices * num_species, input_length=1, name="office_embedding")(office_input)
    species_embedding = Embedding(num_species, num_offices * num_species, input_length=1, name="species_embedding")(species_input)
    
    # Flatten embeddings
    office_flat = Reshape((num_offices * num_species,))(office_embedding)
    species_flat = Reshape((num_offices * num_species,))(species_embedding)
    
    # Element-wise multiplication to combine embeddings, acting as gating mechanism
    combined_gates = Multiply(name="multiply_gates")([office_flat, species_flat])
    
    # Build experts
    experts = [build_expert(input_shape, num_neurons_per_expert, dropout_rate, f"office_{o}_species_{s}")
               for o in range(num_offices) for s in range(num_species)]
    
    # Expert outputs
    expert_outputs = [expert(main_input) for expert in experts]
    
    # Concatenate all expert outputs
    concatenated_outputs = Concatenate(name="concatenate_experts")(expert_outputs)
    
    # Weighted sum of expert outputs based on combined gates
    final_output = Multiply(name="weighted_sum")([concatenated_outputs, combined_gates])
    
    # Final model
    model = Model(inputs=[main_input, office_input, species_input], outputs=final_output)
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mse',r_squared,tf.keras.metrics.RootMeanSquaredError()])

    return model

def prepare_data_and_train_model(filename, num_neurons_per_expert, dropout_rate=0.1):
    df = pd.read_csv(filename)
    office_encoder = LabelEncoder()
    species_encoder = LabelEncoder()
    df['NOMBRE_OFICINA_encoded'] = office_encoder.fit_transform(df['NOMBRE OFICINA'])
    df['NOMBRE_PRINCIPAL_encoded'] = species_encoder.fit_transform(df['NOMBRE PRINCIPAL'])
    
    scaler_sst = MinMaxScaler()
    scaler_weight = MinMaxScaler()
    df['SST_scaled'] = scaler_sst.fit_transform(df[['SST']])
    df['PESO_DESEMBARCADO_scaled'] = scaler_weight.fit_transform(df[['PESO DESEMBARCADO_KILOGRAMOS']])
    
    X = np.array(df[['SST_scaled']])
    y = np.array(df['PESO_DESEMBARCADO_scaled'])
    offices = np.array(df['NOMBRE_OFICINA_encoded'])
    species = np.array(df['NOMBRE_PRINCIPAL_encoded'])

    X_train, X_test, y_train, y_test, offices_train, offices_test, species_train, species_test = train_test_split(
        X, y, offices, species, test_size=0.2, random_state=42)

    num_offices = df['NOMBRE_OFICINA_encoded'].nunique()
    num_species = df['NOMBRE_PRINCIPAL_encoded'].nunique()
    
    # Proper reshaping for LSTM input
    X_train = X_train.reshape(-1, 1, 1)  # Reshape to (samples, timesteps, features)
    X_test = X_test.reshape(-1, 1, 1)

    # Create model
    model = build_moe_model(num_offices, num_species, num_neurons_per_expert, (1, 1), dropout_rate)
    
    log_dir = "logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1, write_graph=True)

    early_stop = EarlyStopping(monitor='val_loss', patience=15, mode='min', restore_best_weights=True)
    model.fit([X_train, offices_train, species_train], y_train, epochs=50, batch_size=64, validation_split=0.2,
              callbacks=[early_stop, tensorboard_callback], verbose=1)

    mse = model.evaluate([X_test, offices_test, species_test], y_test, verbose=0)
    print(f'Test MSE: {mse}')
    
    return model, office_encoder, species_encoder, scaler_sst, scaler_weight


In [5]:
# Specify the path to your CSV file
filename = 'aggregated_data4.csv'

# Specify the number of neurons per expert and the dropout rate for the LSTM layers
num_neurons_per_expert = 10
dropout_rate = 0.1

# Call the function to prepare data, build the model, and train it
model, office_encoder, species_encoder, scaler_sst, scaler_weight = prepare_data_and_train_model(
    filename, 
    num_neurons_per_expert, 
    dropout_rate
)

NameError: name 'ir' is not defined