# XGBoost model

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

filtered_data = pd.read_csv('source_events_50.csv')
# Step 1: Add additional features (sin(Theta), cos(Theta), distance between planes)
def add_new_features(df):
    df['sin_Theta'] = np.sin(df['Theta'])
    df['cos_Theta'] = np.cos(df['Theta'])
    df['distance'] = np.sqrt((df['Scatter_X'] - df['Absorb_X'])**2 + (df['Scatter_Y'] - df['Absorb_Y'])**2)
    #df = df.drop(columns=['Theta'])  # Drop Theta if not needed
    return df
filtered_data = add_new_features(filtered_data)
# Step 2: Prepare the data for training
X = filtered_data[['Scatter_X', 'Scatter_Y', 'Absorb_X', 'Absorb_Y', 'Theta', 'Energy', 'sin_Theta', 'cos_Theta', 'distance']]
y = filtered_data[['Source_X', 'Source_Y']]
# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the feature data
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(np.array(y_train), dtype=torch.float32)
y_test = torch.tensor(np.array(y_test), dtype=torch.float32)


In [13]:
#perform xgboost regression on the data
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.model_selection import RandomizedSearchCV


# define the model
model = XGBRegressor(objective='reg:squarederror')
# define the evaluation method
cv = KFold(n_splits=10, shuffle=True, random_state=1)
# evaluate the model on the dataset
n_scores = cross_val_score(model, X_train, y_train, scoring='r2', cv=cv, n_jobs=-1)
# report performance
print('mean r2 score: %.3f (%.3f)' % (np.mean(n_scores), np.std(n_scores)))
#print('r2_score: %.3f' % r2_score(y_test, y_pred))
#report r2 score
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print('R2 score: %.3f' % r2_score(y_test, y_pred))
print('Mean Squared Error: %.3f' % mean_squared_error(y_test, y_pred))

mean r2 score: -0.055 (0.005)
R2 score: -0.051
Mean Squared Error: 135.360


# XGBoost model with parameters obtained from gridsearch

In [14]:
modelxg = XGBRegressor(
    n_estimators=200,
    max_depth=4,
    learning_rate=0.5,
    colsample_bytree=0.8,
    subsample=0.8,
    min_child_weight=1,
    gamma=0.1,
    reg_alpha=0.5,
    reg_lambda=8,
    random_state=42,
    n_jobs=-1,
    objective='reg:squarederror',
    eval_metric=['rmse', 'mae']
)

cv = KFold(n_splits=10, shuffle=True, random_state=1)
# evaluate the model on the dataset
n_scores = cross_val_score(modelxg, X_train, y_train, scoring='r2', cv=cv, n_jobs=-1)
# report performance
print('mean r2 score: %.3f (%.3f)' % (np.mean(n_scores), np.std(n_scores)))
#print('r2_score: %.3f' % r2_score(y_test, y_pred))
#report r2 score
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print('R2 score: %.3f' % r2_score(y_test, y_pred))
print('Mean Squared Error: %.3f' % mean_squared_error(y_test, y_pred))


mean r2 score: -0.068 (0.007)
R2 score: -0.051
Mean Squared Error: 135.360


# MultiOutputRegressor model

In [15]:
from sklearn.multioutput import MultiOutputRegressor
multi_model = MultiOutputRegressor(model)
cv_scores = cross_val_score(multi_model, X_train, y_train, cv=3, scoring='r2')

print(f"Cross-validation R2 scores: {cv_scores}")
print(f"Mean R2 score: {cv_scores.mean()}")

Cross-validation R2 scores: [-0.06875081 -0.06823931 -0.06976951]
Mean R2 score: -0.06891987808946864
