# Demo: Considering an easy regression task using the JGepRegression
# Here we start by installing the Julia kernel - this may take a few moments 😴

In [13]:
%%shell
set +e

#---------------------------------------------------#
JULIA_VERSION="1.10.5" # any version ≥ 0.7.0
JULIA_PACKAGES="IJulia BenchmarkTools CSV DataFrames Plots Dates DynamicExpressions FileIO ForwardDiff GZip JSON LineSearches LinearAlgebra Logging Optim OrderedCollections ProgressMeter Random Serialization StaticArrays Statistics Zygote"
JULIA_NUM_THREADS=2
#---------------------------------------------------#

if [ -z `which julia` ]; then
  # Install Julia
  JULIA_VER=`cut -d '.' -f -2 <<< "$JULIA_VERSION"`
  echo "Installing Julia $JULIA_VERSION on the current Colab Runtime..."
  BASE_URL="https://julialang-s3.julialang.org/bin/linux/x64"
  URL="$BASE_URL/$JULIA_VER/julia-$JULIA_VERSION-linux-x86_64.tar.gz"
  if ! wget -nv $URL -O /tmp/julia.tar.gz; then
    echo "Failed to download Julia. Check the URL and your internet connection."
    exit 1
  fi

  if ! tar -x -f /tmp/julia.tar.gz -C /usr/local --strip-components 1; then
    echo "Failed to extract Julia archive. Check if you have sufficient permissions."
    exit 1
  fi

  rm /tmp/julia.tar.gz

  # Install packages
  echo "Installing packages..."
  if ! julia -e "using Pkg; Pkg.add([$(echo $JULIA_PACKAGES | sed "s/ /\", \"/g" | sed "s/^/\"/; s/$/\"/")]); Pkg.precompile()"; then
    echo "Failed to install some packages. Please check the output for details."
  fi

  # Install kernel and rename it to "julia"
  echo "Installing IJulia kernel..."
  if ! julia -e 'using Pkg; Pkg.add("IJulia"); using IJulia; IJulia.installkernel("julia", env=Dict("JULIA_NUM_THREADS"=>"'"$JULIA_NUM_THREADS"'"))'; then
    echo "Failed to install IJulia kernel. Check your internet connection and try again."
    exit 1
  fi

  KERNEL_DIR=`julia -e "using IJulia; print(IJulia.kerneldir())"`
  KERNEL_NAME=`ls -d "$KERNEL_DIR"/julia*`
  if ! mv -f $KERNEL_NAME "$KERNEL_DIR"/julia; then
    echo "Failed to rename kernel. Check if you have sufficient permissions."
    exit 1
  fi

  echo ''
  echo "Successfully installed Julia $JULIA_VERSION with the specified packages!"
  echo "Please reload this page (press Ctrl+R, ⌘+R, or the F5 key) then"
  echo "select 'Julia' from the kernel dropdown menu to start using Julia."
else
  echo "Julia is already installed. Version: `julia -v`"
  echo "Updating packages..."
  if ! julia -e "using Pkg; Pkg.add([$(echo $JULIA_PACKAGES | sed "s/ /\", \"/g" | sed "s/^/\"/; s/$/\"/")]); Pkg.update(); Pkg.precompile()"; then
    echo "Failed to update some packages. Please check the output for details."
  fi
fi

## After that, go to the right corner (small threefold pointing downwards) and change the runtime type to the julia kernel

## In the nextline we just make sure that we have installed it

In [1]:
versioninfo()

In [2]:
# We install the package: - takes another minute :(
using Pkg
using Random
Pkg.add(url="https://github.com/maxreiss123/GeneExpressionProgramming.jl.git")

In [3]:
#Then we import everthing we need - add further libs if you would like to plot the result
using GeneExpressionProgramming

#If we want to reproduce our results
Random.seed!(1)

## We now define the data according to a function:

$$
y = x_1^2 + 0.5 x_1x_2-2x_2^2
$$

In [9]:
#Generate some data
x_data = randn(Float64, 2, 1000);
y_data = @. x_data[1,:] * x_data[1,:] + x_data[1,:] * x_data[2,:] - 2 * x_data[2,:] * x_data[2,:];


x_test = randn(Float64, 2, 1000);
y_test = @. x_data[1,:] * x_data[1,:] + x_data[1,:] * x_data[2,:] - 2 * x_data[2,:] * x_data[2,:];



In [10]:
#Setting number of individuals
population_size = 1000
#Setting number of epochs
epochs = 1000

In [None]:
#define the 
regressor = GepRegressor(number_features)

fit!(regressor, epochs, population_size, x_data', y_data; loss_fun="mse")

pred = regressor(x_data')

@show regressor.best_models_[1].compiled_function
@show regressor.best_models_[1].fitness

# Creating a view nice Plots

In [None]:

#Making a nice plot - data vs
pred_vs_actual = scatter(vec(pred), vec(y_test),
    xlabel="Actual Values",
    ylabel="Predicted Values",
    label="Predictions ",
    title="Predictions vs Actual - Symbolic Regression");


plot!(pred_vs_actual, vec(y_test), vec(y_test),
    label="Prediction Comparison",
    color=:red)

#train loss vs validation loss
train_validation = plot(
    regressor.fitness_history_.train_loss,
    label="Training Loss",
    ylabel="Loss",
    xlabel="Epoch",
    linewidth=2
);

plot!(
    train_validation,
    regressor.fitness_history_.val_loss,
    label="Validation Loss",
    linewidth=2
)
