In [None]:
!pip install deap
!pip install bitstring
!pip install pyngrok  # pyngrok installation

# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from keras.layers import LSTM, Input, Dense
from keras.models import Model
from deap import base, creator, tools, algorithms
from scipy.stats import bernoulli
from bitstring import BitArray
from matplotlib import pyplot as plt
from pyngrok import ngrok  # Import from pyngrok

# Function to import data from a CSV file
def import_data(file_path):
    """
    Imports data from a CSV file.

    Args:
        file_path (str): Path to the CSV file.

    Returns:
        pandas.DataFrame: DataFrame with selected data.
    """
    df = pd.read_csv(file_path, sep='\t', encoding='latin1')
    cols = df.columns[2:6]
    return df[cols]

# Function to prepare the dataset for the LSTM model
def prepare_dataset(data, window_size, num_features):
    """
    Prepares the data for use in the LSTM model.

    Args:
        data (pandas.DataFrame): DataFrame with input data.
        window_size (int): Size of the window.
        num_features (int): Number of features.

    Returns:
        numpy.ndarray, numpy.ndarray: Prepared training data (X, Y).
    """
    # Validate the window size
    if window_size <= 0:
        print("Invalid value for window size:", window_size)
        return np.array([]), np.array([])  # Return empty arrays

    X, Y = [], []
    n_future = 1
    # Create input-output pairs for training
    for i in range(window_size, len(data) - n_future + 1):
        X.append(data[i - window_size:i, :num_features])
        Y.append(data[i + n_future - 1:i + n_future, 3])
    return np.array(X), np.array(Y)

# Function to train and evaluate an individual
def train_evaluate(individual, data):
    """
    Trains and evaluates an individual using an LSTM model.

    Args:
        individual (list): Individual with hyperparameters.
        data (numpy.ndarray): Training data.

    Returns:
        tuple: RMSE of the model.
    """
    window_size_bits = BitArray(individual[:5])
    num_units_bits = BitArray(individual[5:])
    window_size = window_size_bits.uint
    num_units = num_units_bits.uint
    print('\nWindow Size:', window_size, ', Num of Units:', num_units)

    if window_size == 0:
        print("Invalid value for window size:", window_size)
        window_size = 1

    if num_units <= 0:
        print("Invalid value for number of units, setting to default:", num_units)
        num_units = 1

    X, Y = prepare_dataset(data, window_size, 4)
    if len(X) == 0 or len(Y) == 0:
        return 100.0,  # Return a high error value if the data is invalid

    # Split the dataset into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X, Y, test_size=0.10, random_state=1120)

    # Define the LSTM model
    inputs = Input(shape=(window_size, 4))
    x = LSTM(num_units, input_shape=(window_size, 4))(inputs)
    predictions = Dense(1, activation='linear')(x)
    model = Model(inputs=inputs, outputs=predictions)
    print(model.summary())
    model.compile(optimizer='adam', loss='mean_squared_error')

    # Train the model
    model.fit(X_train, y_train, epochs=5, batch_size=10, shuffle=True)

    # Predict on the validation set
    y_pred = model.predict(X_val)

    # Calculate the RMSE
    rmse = np.sqrt(mean_squared_error(y_val, y_pred))
    print('Validation RMSE:', rmse, '\n')
    return rmse,

# Function to define individuals and population for the genetic algorithm
def define_individuals_and_population(population_size, gene_length, train_data):
    """
    Defines individuals and population for the genetic algorithm.

    Args:
        population_size (int): Size of the population.
        gene_length (int): Length of the gene.
        train_data (numpy.ndarray): Training data.

    Returns:
        list, base.Toolbox: Initial population and toolbox.
    """
    creator.create('FitnessMax', base.Fitness, weights=(-1.0,))
    creator.create('Individual', list, fitness=creator.FitnessMax)
    toolbox = base.Toolbox()
    toolbox.register('binary', bernoulli.rvs, 0.5)
    toolbox.register('individual', tools.initRepeat, creator.Individual, toolbox.binary, n=gene_length)
    toolbox.register('population', tools.initRepeat, list, toolbox.individual)
    toolbox.register('mate', tools.cxOrdered)
    toolbox.register('mutate', tools.mutShuffleIndexes, indpb=0.6)
    toolbox.register('select', tools.selRoulette)
    toolbox.register('evaluate', train_evaluate, data=train_data)
    return toolbox.population(n=population_size), toolbox

# Function to run the genetic algorithm
def run_genetic_algorithm(population, toolbox, num_generations):
    """
    Runs the genetic algorithm to optimize hyperparameters.

    Args:
        population (list): Initial population.
        toolbox (base.Toolbox): Genetic algorithm toolbox.
        num_generations (int): Number of generations.

    Returns:
        list, dict: Final population and statistics.
    """
    return algorithms.eaSimple(population, toolbox, cxpb=0.6, mutpb=0.4, ngen=num_generations, verbose=False)

# Function to get the best solution from the population
def get_best_solution(population):
    """
    Gets the best solution from the population.

    Args:
        population (list): Population of individuals.

    Returns:
        int, int: Best window size and best number of units.
    """
    best_individual = tools.selBest(population, k=1)[0]
    window_size_bits = BitArray(best_individual[:5])
    num_units_bits = BitArray(best_individual[5:])
    best_window_size = window_size_bits.uint
    best_num_units = num_units_bits.uint
    return best_window_size, best_num_units

# Function to implement the best solution and get real and predicted values
def implement_best_solution(train_data, test_data, best_window_size, best_num_units, scaler):
    """
    Implements the best solution found and gets real and predicted values.

    Args:
        train_data (numpy.ndarray): Training data.
        test_data (numpy.ndarray): Test data.
        best_window_size (int): Best window size.
        best_num_units (int): Best number of LSTM units.
        scaler (StandardScaler): Scaler used for the data.

    Returns:
        numpy.ndarray, numpy.ndarray, dict: Real values, predicted values, and training history.
    """
    if best_window_size <= 0:
        print("Invalid value for window size:", best_window_size)
        return np.array([]), np.array([]), {}  # Return empty arrays and an empty dictionary

    num_features = 4
    X_train, y_train = prepare_dataset(train_data, best_window_size, num_features)
    X_test, y_test = prepare_dataset(test_data, best_window_size, num_features)

    # Define the LSTM model
    inputs = Input(shape=(best_window_size, num_features))
    x = LSTM(best_num_units, input_shape=(best_window_size, num_features))(inputs)
    predictions = Dense(1, activation='linear')(x)
    model = Model(inputs=inputs, outputs=predictions)
    model.compile(optimizer='adam', loss='mean_squared_error')

    # Train the model
    history = model.fit(X_train, y_train, epochs=5, batch_size=10, validation_split=0.1, verbose=1, shuffle=True)

    # Predict on the test set
    y_pred = model.predict(X_test)

    # Calculate the RMSE
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))

    # Inverse transform to get real values
    y_test_real = scaler.inverse_transform(np.concatenate((np.zeros((y_test.shape[0], 3)), y_test), axis=1))[:, 3]
    y_pred_real = scaler.inverse_transform(np.concatenate((np.zeros((y_pred.shape[0], 3)), y_pred), axis=1))[:, 3]

    return y_test_real, y_pred_real, history.history

# Function to setup ngrok and start tunnel
def setup_ngrok(n_port):
    """
    Sets up ngrok and starts a tunnel.

    Args:
        n_port (int): Port to expose.

    Returns:
        str: Public URL of the ngrok tunnel.
    """
    # Configure ngrok token
    token = "2h1iUs3x1soTA6hsOCztzERJ18F_6NMsBSXyDC3n1HKCFP5fa"
    ngrok.set_auth_token(token)

    # Start a tunnel for the specified port
    public_url = ngrok.connect(n_port)
    print("Public URL:", public_url)
    input("Press Enter to open the ngrok URL...")
    return public_url

# Main function to run the entire process
def MyModel():
    global train_data
    np.random.seed(1120)

    # Setup ngrok and start tunnel
    public_url = setup_ngrok(5000)

    file_path = 'https://raw.githubusercontent.com/kevinmero/Unemployment-rate-prediction/main/data/desempleo.csv'

    # Import the data
    df_train = import_data(file_path)

    # Split the data into training and test sets
    train_data, test_data = train_test_split(df_train, test_size=0.10, random_state=42, shuffle=False)

    # Scale the training data
    scaler = StandardScaler()
    train_data_scaled = scaler.fit_transform(train_data)

    # Set parameters for the genetic algorithm
    population_size = 4
    num_generations = 4
    gene_length = 9

    # Run the genetic algorithm to find the best hyperparameters
    population, toolbox = define_individuals_and_population(population_size, gene_length, train_data_scaled)
    run_genetic_algorithm(population, toolbox, num_generations)

    # Get the best solution found by the genetic algorithm
    best_window_size, best_num_units = get_best_solution(population)

    # Scale the test data using the same scaler
    test_data_scaled = scaler.transform(test_data)

    # Implement the best solution to get real and predicted values
    real_values, predicted_values, history = implement_best_solution(train_data_scaled, test_data_scaled, best_window_size, best_num_units, scaler)

    # Plot training and validation loss
    plt.plot(history['loss'], label='Training loss')
    plt.plot(history['val_loss'], label='Validation loss')
    plt.legend()
    plt.title('Training and validation loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.show()

    # Print real and predicted values
    print("\n\n")
    print("Real Values:", real_values)
    print("Predicted Values:", predicted_values)
    print("Best Window Size:", best_window_size)
    print("Best Num of Units:", best_num_units)

# Run the model
MyModel()