In [13]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.models import Model
from keras.layers import Input, LSTM, Dense, Concatenate, Lambda, Dropout, Embedding, Multiply
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from keras.callbacks import EarlyStopping, TensorBoard
import datetime
import pickle

In [14]:
class MoEWeightingLayer(tf.keras.layers.Layer):
    def __init__(self, num_experts, **kwargs):
        super(MoEWeightingLayer, self).__init__(**kwargs)
        self.num_experts = num_experts

    def build(self, input_shape):
        self.w = self.add_weight(name='weights', shape=(input_shape[1], self.num_experts), initializer='uniform', trainable=True)

    def call(self, inputs):
        return tf.matmul(inputs, self.w)

def build_moe_model(num_offices, num_species, input_shape, lstm_units=50, dropout_rate=0.1):
    main_input = Input(shape=(input_shape[0], input_shape[1]))
    office_input = Input(shape=(1,))
    species_input = Input(shape=(1,))

    # Expert models for each office
    expert_outputs = []
    for _ in range(num_offices):
        x = LSTM(lstm_units, return_sequences=True)(main_input)
        x = Dropout(dropout_rate)(x)
        x = LSTM(lstm_units)(x)
        x = Dropout(dropout_rate)(x)
        expert_outputs.append(Dense(1)(x))

    concatenated = Concatenate()(expert_outputs)
    office_weights_output = MoEWeightingLayer(num_offices)(concatenated)
    office_output = Lambda(lambda x: tf.reduce_sum(x, axis=1))(office_weights_output)

    # Species adjustment layer
    species_embedding = Embedding(num_species, 1, input_length=1)(species_input)
    species_embedding = Lambda(lambda x: tf.squeeze(x, axis=-1))(species_embedding)

    # Multiply the office output by the species adjustment factor
    adjusted_output = Multiply()([office_output, species_embedding])

    model = Model(inputs=[main_input, office_input, species_input], outputs=adjusted_output)
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mse'])
    return model

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping, TensorBoard
import datetime

def prepare_data_and_train_model(filename):
    df = pd.read_csv(filename)
    office_encoder = LabelEncoder()
    species_encoder = LabelEncoder()
    df['NOMBRE_OFICINA_encoded'] = office_encoder.fit_transform(df['NOMBRE_OFICINA'])
    df['NOMBRE_PRINCIPAL_encoded'] = species_encoder.fit_transform(df['NOMBRE_PRINCIPAL'])
    
    # Scaler for SST
    scaler_sst = MinMaxScaler()
    df['SST_scaled'] = scaler_sst.fit_transform(df[['SST']])
    
    # Scaler for PESO DESEMBARCADO_KILOGRAMOS
    scaler_weight = MinMaxScaler()
    df['PESO_DESEMBARCADO_scaled'] = scaler_weight.fit_transform(df[['PESO DESEMBARCADO_KILOGRAMOS']])
    
    X = np.array(df[['SST_scaled']])
    y = np.array(df['PESO_DESEMBARCADO_scaled'])
    offices = np.array(df['NOMBRE_OFICINA_encoded'])
    species = np.array(df['NOMBRE_PRINCIPAL_encoded'])
    
    X_train, X_test, y_train, y_test, offices_train, offices_test, species_train, species_test = train_test_split(
        X, y, offices, species, test_size=0.2, random_state=42)

    input_shape = (X_train.shape[1], 1)
    num_offices = df['NOMBRE_OFICINA_encoded'].nunique()
    num_species = df['NOMBRE_PRINCIPAL_encoded'].nunique()
    
    model = build_moe_model(num_offices, num_species, input_shape)
    
    # Setting up TensorBoard
    log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

    early_stop = EarlyStopping(monitor='val_loss', patience=10, mode='min', restore_best_weights=True)
    model.fit([X_train, offices_train, species_train], y_train, epochs=5, batch_size=32, validation_split=0.2,
              callbacks=[early_stop, tensorboard_callback], verbose=1)

    mse = model.evaluate([X_test, offices_test, species_test], y_test, verbose=0)
    print(f'Test MSE: {mse}')
    
    # Return the model, encoders, and both scalers for further use
    return model, office_encoder, species_encoder, scaler_sst, scaler_weight



In [16]:
# Example of how to use the returned objects
model, office_encoder, species_encoder, scaler_sst, scaler_weight = prepare_data_and_train_model('aggregated_data4.csv')

# Saving the encoders and scaler
with open('office_encoder.pkl', 'wb') as file:
    pickle.dump(office_encoder, file)
with open('species_encoder.pkl', 'wb') as file:
    pickle.dump(species_encoder, file)
with open('sst_scaler.pkl', 'wb') as file:
    pickle.dump(scaler_sst, file)
with open('weight_scaler.pkl', 'wb') as file:
    pickle.dump(scaler_weight, file)

KeyboardInterrupt: 

In [None]:
import pickle

# Function to prepare new data using the loaded encoders and scaler
def prepare_new_data(new_data, office_encoder, species_encoder, scaler):
    # Assume new_data is a DataFrame with columns 'SST', 'NOMBRE_OFICINA', 'NOMBRE_PRINCIPAL'
    new_data['NOMBRE_OFICINA_encoded'] = office_encoder.transform(new_data['NOMBRE_OFICINA'])
    new_data['NOMBRE_PRINCIPAL_encoded'] = species_encoder.transform(new_data['NOMBRE_PRINCIPAL'])
    
    # Applying the MinMax scaling to the SST column
    new_data['SST_scaled'] = scaler.transform(new_data[['SST']])
    
    X = np.array(new_data['SST_scaled'])
    offices = np.array(new_data['NOMBRE_OFICINA_encoded'])
    species = np.array(new_data['NOMBRE_PRINCIPAL_encoded'])
    
    # Assuming that the input_shape[1] (feature dimension) is 1
    input_shape = (X.shape[1], 1)
    return [X.reshape(-1, input_shape[0], input_shape[1]), offices, species]

# Example usage:
# new_data = pd.DataFrame({
#     'SST': [25.5],  # Example sea surface temperature
#     'NOMBRE_OFICINA': ['Office1'],  # Example office name
#     'NOMBRE_PRINCIPAL': ['Species1']  # Example species name
# })

# Load the scaler from disk
with open('sst_scaler.pkl', 'rb') as file:
    sst_scaler = pickle.load(file)

# Prepare the new data using the loaded encoders and scaler
prepared_data = prepare_new_data(new_data, office_encoder, species_encoder, sst_scaler)


In [None]:
# Example function to use the model to predict with new data
def predict_with_new_data(model, prepared_data):
    predictions = model.predict(prepared_data)
    return predictions

# Example usage
new_data = pd.DataFrame({
    'SST': [25.5],  # Example sea surface temperature
    'NOMBRE_OFICINA': ['Office1'],  # Example office name
    'NOMBRE_PRINCIPAL': ['Species1']  # Example species name
})

prepared_data = prepare_new_data(new_data, office_encoder, species_encoder)
predictions = predict_with_new_data(model, prepared_data)
print("Predicted Catch:", predictions)