In [147]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, time
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.multioutput import MultiOutputRegressor
import random
import torch
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn

In [148]:
df = pd.read_excel('../data/GNP_Aerial_counting_1969_2022.xlsx')

In [149]:
empty_cols = ['MALE', 'CALVES'] #columns that are empty
zero_cols = ['LINE2002', 'LINE2012', 'COLLAR', 'CONSERVANC', 'SANCTUARY'] #columns that are > 80% just 0s
drop_cols = ['NOTES'] # other columns to drop
df.drop(columns=empty_cols, inplace=True)
df.drop(columns=zero_cols, inplace=True)
df.drop(columns=drop_cols, inplace=True)

In [150]:
df['TIME'] = df['TIME'].apply(lambda x: x.hour * 3600 + x.minute * 60 + x.second if pd.notna(x) else x)
df['TIME'] = df['TIME'].fillna(0)

In [151]:
df['TYPE'] = df['TYPE'].map({'Fixed-wing': 0, 'Helicopter': 1})

In [152]:
#zero_count = (df['NUMBER'] == 0).sum()
#print(zero_count / df.shape[0])

In [153]:
def process_date(val):
    if pd.isna(val):
        return np.nan
    elif isinstance(val, str):
        return float(val.split('/')[1])
    elif isinstance(val, datetime):
        return val.day
    else:
        return float(val)

df['DATE'] = df['DATE'].apply(process_date)

In [154]:
month_mapping = {
    'January': 1, 'February': 2, 'March': 3, 'April': 4,
    'May': 5, 'June': 6, 'July': 7, 'August': 8,
    'September': 9, 'October': 10, 'November': 11, 'December': 12
}

df['MONTH'] = df['MONTH'].map(month_mapping)
df['MONTH'] = pd.to_numeric(df['MONTH'], errors='coerce')


In [155]:
df['lat_lag1'] = df.groupby('SPECIES')['LATITUDE'].shift(1)
df['lon_lag1'] = df.groupby('SPECIES')['LONGITUDE'].shift(1)
df['lat_lag2'] = df.groupby('SPECIES')['LATITUDE'].shift(2)
df['lon_lag2'] = df.groupby('SPECIES')['LONGITUDE'].shift(2)
df['count_lag1'] = df.groupby('SPECIES')['NUMBER'].shift(1)
df['count_lag2'] = df.groupby('SPECIES')['NUMBER'].shift(2)
df['count_lag3'] = df.groupby('SPECIES')['NUMBER'].shift(3)
df['count_lag4'] = df.groupby('SPECIES')['NUMBER'].shift(4)
df['count_lag5'] = df.groupby('SPECIES')['NUMBER'].shift(5)
df['rolling_mean_3'] = (
    df.groupby('SPECIES')['NUMBER']
    .transform(lambda x: x.rolling(window=3, min_periods=1).mean())
)

In [156]:
df['SPECIES'] = df['SPECIES'].str.lower()
df['STRATUM'] = df['STRATUM'].str.lower()
#df['MONTH'] = df['MONTH'].str.lower()
df = pd.get_dummies(df, columns=['SPECIES', 'STRATUM'])

In [157]:
'''
correlation_matrix = df.corr()
plt.figure(figsize=(25, 25)) 
sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm")
plt.title("Correlation Matrix Heatmap")
plt.show()
'''

train_drop = ['MONTH', 'COUNT', 'DATE']

### Non-DL Model

#### Run Cross-Val

In [None]:
def run_cv(df, num_splits=5):
    tested_years = [2022]
    ave_mse = 0
    ave_r2 = 0

    for _ in range(num_splits):
        cv_year = random.choice(df['COUNT'].unique())
        while cv_year in tested_years:
            cv_year = random.choice(df['COUNT'].unique())
        
        tested_years.append(cv_year)

        train_df = df[~df['COUNT'].isin([2022, cv_year])]
        test_df = df[df['COUNT'] == cv_year]

        train_df = train_df.drop(columns=train_drop)
        test_df = test_df.drop(columns=train_drop)

        #fillna with mean
        train_df = train_df.fillna(train_df.mean())
        test_df = test_df.fillna(test_df.mean())

        X_train = train_df.drop(columns=['ID', 'LATITUDE', 'LONGITUDE', 'NUMBER'])
        y_train = train_df[['NUMBER', 'LATITUDE', 'LONGITUDE']]
        X_test = test_df.drop(columns=['ID', 'LATITUDE', 'LONGITUDE', 'NUMBER'])
        y_test = test_df[['NUMBER', 'LATITUDE', 'LONGITUDE']]

        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)

        # Multi-output regressor
        model = RandomForestRegressor()
        model.fit(X_train, y_train)

        # Predictions
        y_pred = model.predict(X_test)

        print(f"year being tested for CV: {cv_year}")

        mse = mean_squared_error(y_test, y_pred)
        print(f"Mean Squared Error: {mse}")

        # Example: R² for predictions
        r2 = r2_score(y_test, y_pred)
        print(f"R² Score: {r2}")

        ave_mse += mse
        ave_r2 += r2

    print(f"Average Mean Squared Error: {ave_mse / num_splits}")
    print(f"Average R² Score: {ave_r2 / num_splits}")

run_cv(df)

#### Train on Full Dataset (Excluding 2022 for testing)

In [13]:
top_features = ['count_lag1', 'count_lag2', 'SESSION', 'lat_lag1', 'lon_lag1', 'TIME', 'LINE2014', 'CNTBLK2014', 'IN_STRIP', 'lat_lag2', 'lon_lag2', 'STRATUM_west', 'RIFTVALLEY', 'NUMBER', 'STRATUM_central', 'RFBLK2014', 'FLDPLN_BLK']

In [163]:
# Get Train Test Split
train_df = df[df['COUNT'] != 2022]
test_df = df[df['COUNT'] == 2022]

#train_df = train_df.drop(columns=train_drop)
#test_df = test_df.drop(columns=train_drop)

#fillna with mean
train_df = train_df.fillna(train_df.mean())
test_df = test_df.fillna(test_df.mean())

X_train = train_df.drop(columns=['ID', 'LATITUDE', 'LONGITUDE', 'NUMBER'])
y_train = train_df[['NUMBER', 'LATITUDE', 'LONGITUDE']]
X_test = test_df.drop(columns=['ID', 'LATITUDE', 'LONGITUDE', 'NUMBER'])
y_test = test_df[['NUMBER', 'LATITUDE', 'LONGITUDE']]

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Multi-output regressor
GBmodel = GradientBoostingRegressor()#RandomForestRegressor(max_depth=10, n_estimators=500)
model = MultiOutputRegressor(estimator=GBmodel)
model.fit(X_train, y_train)

# Predictions
predicted = model.predict(X_test)
df_predicted = pd.DataFrame(predicted)

mse = mean_squared_error(y_test.iloc[:, 0], df_predicted.iloc[:, 0])
print(f"Mean Squared Error NUMBER: {mse}")

mae = mean_absolute_error(y_test.iloc[:, 0], df_predicted.iloc[:, 0])
print(f"Mean Absolute Error NUMBER: {mae}")    

r2 = r2_score(y_test.iloc[:, 0], df_predicted.iloc[:, 0])
print(f"R² Score NUMBER: {r2}")

mse = mean_squared_error(y_test.iloc[:, 1], df_predicted.iloc[:, 1])
print(f"Mean Squared Error LATITUDE: {mse}")

mae = mean_absolute_error(y_test.iloc[:, 1], df_predicted.iloc[:, 1])
print(f"Mean Absolute Error LATITUDE: {mae}")    

r2 = r2_score(y_test.iloc[:, 1], df_predicted[1])
print(f"R² Score LATITUDE: {r2}")

mse = mean_squared_error(y_test.iloc[:, 2], df_predicted.iloc[:, 2])
print(f"Mean Squared Error LONGITUDE: {mse}")

mae = mean_absolute_error(y_test.iloc[:, 2], df_predicted.iloc[:, 2])
print(f"Mean Absolute Error LONGITUDE: {mae}")    

r2 = r2_score(y_test.iloc[:, 2], df_predicted.iloc[:, 2])
print(f"R² Score LONGITUDE: {r2}")


KeyboardInterrupt: 

In [167]:
X_train_separate = train_df.drop(columns=['ID', 'LATITUDE', 'LONGITUDE', 'NUMBER'])
X_test_separate = test_df.drop(columns=['ID', 'LATITUDE', 'LONGITUDE', 'NUMBER'])
y_train_separate = y_train['NUMBER']
y_test_separate = y_test['NUMBER']

model = GradientBoostingRegressor(max_depth=3, min_samples_leaf=2, min_samples_split=5, n_estimators=500)
model.fit(X_train_separate, y_train_separate)

predicted = model.predict(X_test_separate)
df_predicted = pd.DataFrame(predicted)

mse = mean_squared_error(y_test_separate, df_predicted.iloc[:, 0])
print(f"Mean Squared Error NUMBER: {mse}")

mae = mean_absolute_error(y_test_separate, df_predicted.iloc[:, 0])
print(f"Mean Absolute Error NUMBER: {mae}")  

r2 = r2_score(y_test_separate, df_predicted.iloc[:, 0])
print(f"R² Score NUMBER: {r2}")

Mean Squared Error NUMBER: 2.254199866979626
Mean Absolute Error NUMBER: 0.49866959283446527
R² Score NUMBER: 0.9868383892972902


In [165]:
X_train_separate = train_df.drop(columns=['ID', 'LATITUDE', 'LONGITUDE', 'NUMBER'])
X_test_separate = test_df.drop(columns=['ID', 'LATITUDE', 'LONGITUDE', 'NUMBER'])
y_train_separate = y_train['LATITUDE']
y_test_separate = y_test['LATITUDE']

model = GradientBoostingRegressor(max_depth= 3, min_samples_leaf=2, min_samples_split=5, n_estimators=500)
model.fit(X_train_separate, y_train_separate)

predicted = model.predict(X_test_separate)
df_predicted = pd.DataFrame(predicted)

mse = mean_squared_error(y_test_separate, df_predicted.iloc[:, 0])
print(f"Mean Squared Error LATITUDE: {mse}")

mae = mean_absolute_error(y_test_separate, df_predicted.iloc[:, 0])
print(f"Mean Absolute Error LATITUDE: {mae}")  

r2 = r2_score(y_test_separate, df_predicted.iloc[:, 0])
print(f"R² Score LATITUDE: {r2}")

Mean Squared Error LATITUDE: 0.000712864676026062
Mean Absolute Error LATITUDE: 0.00955690979974098
R² Score LATITUDE: 0.9617759993573891


In [166]:
X_train_separate = train_df.drop(columns=['ID', 'LATITUDE', 'LONGITUDE', 'NUMBER'])
X_test_separate = test_df.drop(columns=['ID', 'LATITUDE', 'LONGITUDE', 'NUMBER'])
y_train_separate = y_train['LONGITUDE']
y_test_separate = y_test['LONGITUDE']

model = GradientBoostingRegressor(max_depth=3, min_samples_leaf=2, min_samples_split=5, n_estimators=500)
model.fit(X_train_separate, y_train_separate)

predicted = model.predict(X_test_separate)
df_predicted = pd.DataFrame(predicted)

mse = mean_squared_error(y_test_separate, df_predicted.iloc[:, 0])
print(f"Mean Squared Error LONGITUDE: {mse}")

mae = mean_absolute_error(y_test_separate, df_predicted.iloc[:, 0])
print(f"Mean Absolute Error LONGITUDE: {mae}")  

r2 = r2_score(y_test_separate, df_predicted.iloc[:, 0])
print(f"R² Score LONGITUDE: {r2}")

Mean Squared Error LONGITUDE: 0.0004594865313226678
Mean Absolute Error LONGITUDE: 0.012427469948354345
R² Score LONGITUDE: 0.9585958134657896


In [162]:
from sklearn.model_selection import GridSearchCV

train_df = df[df['COUNT'] != 2022]
test_df = df[df['COUNT'] == 2022]

#fillna with mean
train_df = train_df.fillna(train_df.mean())
test_df = test_df.fillna(test_df.mean())

X_train = train_df.drop(columns=['ID', 'LATITUDE', 'LONGITUDE', 'NUMBER'])
y_train = train_df['NUMBER']
X_test = test_df.drop(columns=['ID', 'LATITUDE', 'LONGITUDE', 'NUMBER'])
y_test = test_df['NUMBER']

# Define Random Forest model
gbr = GradientBoostingRegressor(random_state=42)

# Define parameter grid
param_grid = {
    'n_estimators': [50, 100, 200, 500],
    'max_depth': [1, 3, 5, 7],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Perform GridSearchCV
grid_search = GridSearchCV(estimator=gbr, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', verbose=2, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Best parameters and score
print("Best Parameters:", grid_search.best_params_)
print("Best Score:", -grid_search.best_score_)

Fitting 5 folds for each of 144 candidates, totalling 720 fits
[CV] END max_depth=1, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   2.6s
[CV] END max_depth=1, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   2.7s
[CV] END max_depth=1, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   2.7s
[CV] END max_depth=1, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   2.8s
[CV] END max_depth=1, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   2.7s
[CV] END max_depth=1, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   5.1s
[CV] END max_depth=1, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   5.2s
[CV] END max_depth=1, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   5.4s
[CV] END max_depth=1, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   5.4s
[CV] END max_depth=1, min_samples_leaf=1, 

  _data = np.array(data, dtype=dtype, copy=copy,


Best Parameters: {'max_depth': 3, 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 500}
Best Score: 187.7133314232371


In [66]:
print(df_predicted.head())

      0          1          2
0  1.53 -18.697853  34.381200
1  3.52 -18.783694  34.370104
2  2.87 -18.756784  34.413792
3  3.80 -18.758799  34.476623
4  3.30 -18.812879  34.347245


### Train on Actual Full Dataset

In [15]:
# Get Train Test Split
train_df = df.copy()
train_df = train_df.fillna(train_df.mean())

X_train = train_df.drop(columns=['ID', 'LATITUDE', 'LONGITUDE', 'NUMBER'])
y_train = train_df[['NUMBER', 'LATITUDE', 'LONGITUDE']]

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)

# Multi-output regressor
model = RandomForestRegressor()
model.fit(X_train, y_train)

KeyboardInterrupt: 

In [None]:
def get_future_predictions(df, model, scaler, last_year=2022):
    past_year = df[df['COUNT'] == last_year].copy()

    past_year['timestamp'] = pd.to_datetime(past_year['TIME'], unit='s').dt.strftime('%H:%M:%S')

    past_year['timestamp'] = pd.to_datetime(
        past_year['COUNT'].astype(int).astype(str) + '-' + 
        past_year['MONTH'].astype(int).astype(str).str.zfill(2) + '-' +
        past_year['DATE'].astype(float).astype(int).astype(str).str.zfill(2) + ' ' +
        past_year['timestamp']
    )

    species_columns = [col for col in df.columns if col.startswith('SPECIES_')]
    past_year['species'] = past_year[species_columns].idxmax(axis=1).str.replace('SPECIES_', '')
    
    future_rows = past_year.copy()
    future_rows['COUNT'] = last_year + 1

    last_id = past_year['ID'].max()
    future_rows['ID'] = (future_rows.index - future_rows.index[0]) + 1 + last_id

    grouped = future_rows.groupby('species')
    
    for species, group in grouped:
        last_lat = group.iloc[-1]['LATITUDE']
        last_long = group.iloc[-1]['LONGITUDE']
        last_count = group.iloc[-1]['NUMBER']
        
        i = 0
        for row_idx, row in group.iterrows():
            if i == 0:
                row['lat_lag1'] = last_lat
                row['lon_lag1'] = last_long
                row['count_lag1'] = last_count
                row['lat_lag2'] = np.nan
                row['lon_lag2'] = np.nan
                row['count_lag2'] = np.nan
            else:
                row['lat_lag1'] = future_rows.loc[row_idx-1, 'LATITUDE']
                row['lon_lag1'] = future_rows.loc[row_idx-1, 'LONGITUDE']
                row['count_lag1'] = future_rows.loc[row_idx-1, 'NUMBER']
                row['lat_lag2'] = future_rows.loc[row_idx-1, 'lat_lag1']
                row['lon_lag2'] = future_rows.loc[row_idx-1, 'lon_lag1']
                row['count_lag2'] = future_rows.loc[row_idx-1, 'count_lag1']
            
            row['LATITUDE'] = np.nan
            row['LONGITUDE'] = np.nan
            row['NUMBER'] = np.nan

            curr_row_df = row.to_frame().T 
            curr_row = curr_row_df.drop(columns=['ID', 'timestamp', 'species', 'LATITUDE', 'LONGITUDE', 'NUMBER'])
            curr_row_scaled = scaler.transform(curr_row)
            
            predictions = model.predict(curr_row_scaled)
            
            row['NUMBER'] = predictions[0][0]
            row['LATITUDE'] = predictions[0][1]
            row['LONGITUDE'] = predictions[0][2]

            future_rows.loc[row_idx] = row

    combined_df = pd.concat([df, future_rows], ignore_index=True)
    return combined_df

df_copy = df.copy()
get_future_predictions(df_copy, model, scaler)

In [None]:
'''
importances = model.feature_importances_

# Create a DataFrame for better readability
importance_df = pd.DataFrame({
    'Feature': train_df.drop(columns=['ID', 'LATITUDE', 'LONGITUDE', 'NUMBER']).columns,
    'Importance': importances
}).sort_values(by='Importance', ascending=False)

# Select top n features
n = 20
top_features = importance_df.head(n)['Feature'].tolist()

print(top_features)
'''

### Set-Up NN

In [16]:
# Example: Convert features and target to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.to_numpy(), dtype=torch.float32)

X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.to_numpy(), dtype=torch.float32)

In [17]:
# Create datasets
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [18]:
# Define the neural network
class MultiOutputNN(nn.Module):
    def __init__(self, input_size, output_size=3):
        super(MultiOutputNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.bn1 = nn.BatchNorm1d(64)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(64, 32)
        self.bn2 = nn.BatchNorm1d(32) 
        self.output = nn.Linear(32, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.bn2(x)
        x = self.relu(x)
        x = self.output(x)
        return x

### Run 1 NN

In [19]:
model = MultiOutputNN(input_size=X_train_tensor.shape[1])

In [20]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
epochs = 100

for epoch in range(epochs):
    model.train()
    running_loss = 0.0

    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        predictions = model(X_batch)
        loss = criterion(predictions, y_batch)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss / len(train_loader):.4f}")

Epoch 1/100, Loss: 206.5083
Epoch 2/100, Loss: 153.9004
Epoch 3/100, Loss: 150.8271
Epoch 4/100, Loss: 149.8558
Epoch 5/100, Loss: 148.9420
Epoch 6/100, Loss: 148.2635
Epoch 7/100, Loss: 147.7699
Epoch 8/100, Loss: 146.0727
Epoch 9/100, Loss: 146.5780
Epoch 10/100, Loss: 147.0846
Epoch 11/100, Loss: 146.1973
Epoch 12/100, Loss: 145.0987
Epoch 13/100, Loss: 145.7440
Epoch 14/100, Loss: 144.7288
Epoch 15/100, Loss: 144.3359
Epoch 16/100, Loss: 144.1295
Epoch 17/100, Loss: 143.9670
Epoch 18/100, Loss: 142.3732
Epoch 19/100, Loss: 142.9205
Epoch 20/100, Loss: 142.6730
Epoch 21/100, Loss: 143.2867
Epoch 22/100, Loss: 140.6564
Epoch 23/100, Loss: 141.2384
Epoch 24/100, Loss: 141.4233
Epoch 25/100, Loss: 139.5535
Epoch 26/100, Loss: 141.5762
Epoch 27/100, Loss: 139.6026
Epoch 28/100, Loss: 139.9572
Epoch 29/100, Loss: 139.0378
Epoch 30/100, Loss: 141.3274
Epoch 31/100, Loss: 138.1859
Epoch 32/100, Loss: 137.7269
Epoch 33/100, Loss: 137.7840
Epoch 34/100, Loss: 137.1505
Epoch 35/100, Loss: 135

In [21]:
with torch.no_grad():
    actual = y_test_tensor.numpy()
    y_preds = model(X_test_tensor)
    #y_preds[:, 0] = torch.round(y_preds[:, 0])
    predicted = y_preds.detach().numpy()
    
    mse = mean_squared_error(actual, predicted)
    print(f"Mean Squared Error: {mse}")

    mae = mean_absolute_error(actual, predicted)
    print(f"Mean Absolute Error: {mae}")    

    r2 = r2_score(actual, predicted)
    print(f"R² Score: {r2}")

Mean Squared Error: 54.033878326416016
Mean Absolute Error: 1.823999047279358
R² Score: -2.2140512466430664


### Ensemble NNs (So far better than 1)

In [25]:
models = [MultiOutputNN(input_size=X_train.shape[1]) for _ in range(3)]

In [None]:
criterion = nn.MSELoss()
epochs = 100

trained_models = []
for i, model in enumerate(models):
    tot_loss = 0.0
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    model.train()
    
    for epoch in range(epochs):
        running_loss = 0.0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            predictions = model(X_batch)
            loss = criterion(predictions, y_batch)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        
        tot_loss += running_loss / len(train_loader)

    print(f"Loss for Model {i+1}: {tot_loss / 100:.4f}")
    trained_models.append(model)

Loss for Model 1: 0.6375
Loss for Model 2: 0.6034
Loss for Model 3: 0.6245


In [21]:
# Predict with all models
predictions = []

for model in trained_models:
    model.eval()
    with torch.no_grad():
        y_preds = model(X_test_tensor)
        predictions.append(y_preds.detach().numpy())

# Average predictions
predicted = sum(predictions) / len(predictions)

actual = y_test_tensor.numpy()
mse = mean_squared_error(actual, predicted)
print(f"Mean Squared Error: {mse}")

mae = mean_absolute_error(actual, predicted)
print(f"Mean Absolute Error: {mae}")    

r2 = r2_score(actual, predicted)
print(f"R² Score: {r2}")

Mean Squared Error: 0.009994489140808582
Mean Absolute Error: 0.05950450524687767
R² Score: 0.9502697587013245
