In [1]:
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

In [2]:
# Set a random seed for reproducibility

random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)

In [3]:
# Load the datase
df1 = pd.read_csv('./algea.csv')

df= df1.drop(['Nitrate', 'Temperature', 'pH', 'CO2'], axis=1)

df.head()

Unnamed: 0,Light,Iron,Phosphate,Population
0,1011.0,0.17,0.08,4736.51
1,1206.5,0.14,0.04,4677.34
2,1450.63,0.04,0.02,3388.85
3,1158.4,0.09,0.06,4899.17
4,1159.92,0.15,0.08,4974.25


In [4]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.callbacks import EarlyStopping

In [5]:
X = df[['Light', 'Iron', 'Phosphate']]
y = df['Population']

# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Reshape y before standardizing
y = y.values.reshape(-1, 1)  
y = scaler.fit_transform(y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((7827, 3), (1957, 3), (7827, 1), (1957, 1))

In [8]:
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import numpy as np

# Initialize the XGBoost regressor
xgb_model = XGBRegressor(random_state=42)

# Train the model
xgb_model.fit(X_train, y_train)

# Predict on the test set
y_pred = xgb_model.predict(X_test)

# Inverse transform the predictions and the actual values
y_pred_inv = scaler.inverse_transform(y_pred.reshape(-1, 1))
y_test_inv = scaler.inverse_transform(y_test)

# Calculate evaluation metrics
mse = mean_squared_error(y_test_inv, y_pred_inv)
r2 = r2_score(y_test_inv, y_pred_inv)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test_inv, y_pred_inv)

# Print the evaluation metrics with 4 decimal points
print(f'MSE: {mse:.4f}')
print(f'R²: {r2:.4f}')
print(f'RMSE: {rmse:.4f}')
print(f'MAE: {mae:.4f}')

MSE: 96256.1841
R²: 0.9555
RMSE: 310.2518
MAE: 262.2866
