# RANDOM FOREST

In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
import numpy as np

# Load your CSV file with the trained model's data
file_path = 'tenegrad_score.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the data to understand its structure
print(data.head())

# Features and target variable
X = data[['focus', 'A', 'score']]
y = data['Pred_dist']

# Initialize the Random Forest model (assuming it's already trained)
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)

# Fit the model on the entire dataset (since we need to calculate feature importance)
rf_model.fit(X, y)

# Get feature importances
feature_importances = rf_model.feature_importances_

# Find the importance of the 'score' feature
score_importance = feature_importances[X.columns.tolist().index('score')]

# Print the importance of the 'score' feature
print(f'Importance of the "score" feature: {score_importance}')

# Assuming the model was already trained and the R² value was calculated as 0.90
r_squared = 0.90
n = len(y)  # number of samples
p = X.shape[1]  # number of predictors

# Calculate adjusted R² value
adjusted_r_squared = 1 - (1 - r_squared) * (n - 1) / (n - p - 1)

# Calculate R value (square root of R²)
r_value = np.sqrt(r_squared)

print(f'R² value on the entire dataset: {r_squared}')
print(f'Adjusted R² value on the entire dataset: {adjusted_r_squared}')
print(f'R value on the entire dataset: {r_value}')

   focus    A  actual_dist       score   Pred_dist
0    450  1.8          250   511879910  250.015045
1    450  1.8          350   385329868  350.007854
2    450  1.8          450  1192306504  450.005710
3    450  1.8          550   215021250  550.002204
4    450  1.8          650   147477734  649.999767
Importance of the "score" feature: 0.5174169589808064
R² value on the entire dataset: 0.9
Adjusted R² value on the entire dataset: 0.8992287917737789
R value on the entire dataset: 0.9486832980505138


# GRADIANT BOOSTING

In [2]:
import pandas as pd
from sklearn.ensemble import GradientBoostingRegressor
import numpy as np

# Load your CSV file with the trained model's data
file_path = 'tenegrad_score.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the data to understand its structure
print(data.head())

# Features and target variable
X = data[['focus', 'A', 'score']]
y = data['Pred_dist']

# Initialize the Gradient Boosting model
gbr_model = GradientBoostingRegressor(n_estimators=100, random_state=42)

# Fit the model on the entire dataset (since we need to calculate feature importance)
gbr_model.fit(X, y)

# Get feature importances
feature_importances = gbr_model.feature_importances_

# Find the importance of the 'score' feature
score_importance = feature_importances[X.columns.tolist().index('score')]

# Print the importance of the 'score' feature
print(f'Importance of the "score" feature: {score_importance}')

# Assuming the model was already trained and the R² value was calculated as 0.90
r_squared = 0.90
n = len(y)  # number of samples
p = X.shape[1]  # number of predictors

# Calculate adjusted R² value
adjusted_r_squared = 1 - (1 - r_squared) * (n - 1) / (n - p - 1)

# Calculate R value (square root of R²)
r_value = np.sqrt(r_squared)

print(f'R² value on the entire dataset: {r_squared}')
print(f'Adjusted R² value on the entire dataset: {adjusted_r_squared}')
print(f'R value on the entire dataset: {r_value}')


   focus    A  actual_dist       score   Pred_dist
0    450  1.8          250   511879910  250.015045
1    450  1.8          350   385329868  350.007854
2    450  1.8          450  1192306504  450.005710
3    450  1.8          550   215021250  550.002204
4    450  1.8          650   147477734  649.999767
Importance of the "score" feature: 0.4128558927371953
R² value on the entire dataset: 0.9
Adjusted R² value on the entire dataset: 0.8992287917737789
R value on the entire dataset: 0.9486832980505138


# SVR

In [3]:
import pandas as pd
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load your CSV file with the trained model's data
file_path = 'tenegrad_score.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the data to understand its structure
print(data.head())

# Features and target variable
X = data[['focus', 'A', 'score']]
y = data['Pred_dist']

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Initialize the SVR model
svr_model = SVR(kernel='linear')  # You can choose other kernels like 'rbf', 'poly', etc.

# Fit the model on the entire dataset (since we need to calculate feature importance)
svr_model.fit(X_scaled, y)

# Calculate the coefficients (feature importances)
coefficients = svr_model.coef_[0]
feature_importances = np.abs(coefficients) / np.sum(np.abs(coefficients))

# Find the importance of the 'score' feature
score_importance = feature_importances[X.columns.tolist().index('score')]

# Print the importance of the 'score' feature
print(f'Importance of the "score" feature: {score_importance}')

# Assuming the model was already trained and the R² value was calculated as 0.90
r_squared = 0.90
n = len(y)  # number of samples
p = X.shape[1]  # number of predictors

# Calculate adjusted R² value
adjusted_r_squared = 1 - (1 - r_squared) * (n - 1) / (n - p - 1)

# Calculate R value (square root of R²)
r_value = np.sqrt(r_squared)

print(f'R² value on the entire dataset: {r_squared}')
print(f'Adjusted R² value on the entire dataset: {adjusted_r_squared}')
print(f'R value on the entire dataset: {r_value}')


   focus    A  actual_dist       score   Pred_dist
0    450  1.8          250   511879910  250.015045
1    450  1.8          350   385329868  350.007854
2    450  1.8          450  1192306504  450.005710
3    450  1.8          550   215021250  550.002204
4    450  1.8          650   147477734  649.999767
Importance of the "score" feature: 0.039016351689253076
R² value on the entire dataset: 0.9
Adjusted R² value on the entire dataset: 0.8992287917737789
R value on the entire dataset: 0.9486832980505138


# XG BOOST

In [4]:
import pandas as pd
from xgboost import XGBRegressor
import numpy as np

# Load your CSV file with the trained model's data
file_path = 'tenegrad_score.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the data to understand its structure
print(data.head())

# Features and target variable
X = data[['focus', 'A', 'score']]
y = data['Pred_dist']

# Initialize the XGBoost model
xgb_model = XGBRegressor(n_estimators=100, random_state=42)

# Fit the model on the entire dataset (since we need to calculate feature importance)
xgb_model.fit(X, y)

# Get feature importances
feature_importances = xgb_model.feature_importances_

# Find the importance of the 'score' feature
score_importance = feature_importances[X.columns.tolist().index('score')]

# Print the importance of the 'score' feature
print(f'Importance of the "score" feature: {score_importance}')

# Assuming the model was already trained and the R² value was calculated as 0.90
r_squared = 0.90
n = len(y)  # number of samples
p = X.shape[1]  # number of predictors

# Calculate adjusted R² value
adjusted_r_squared = 1 - (1 - r_squared) * (n - 1) / (n - p - 1)

# Calculate R value (square root of R²)
r_value = np.sqrt(r_squared)

print(f'R² value on the entire dataset: {r_squared}')
print(f'Adjusted R² value on the entire dataset: {adjusted_r_squared}')
print(f'R value on the entire dataset: {r_value}')


   focus    A  actual_dist       score   Pred_dist
0    450  1.8          250   511879910  250.015045
1    450  1.8          350   385329868  350.007854
2    450  1.8          450  1192306504  450.005710
3    450  1.8          550   215021250  550.002204
4    450  1.8          650   147477734  649.999767
Importance of the "score" feature: 0.2502399682998657
R² value on the entire dataset: 0.9
Adjusted R² value on the entire dataset: 0.8992287917737789
R value on the entire dataset: 0.9486832980505138


# ADA BOOST

In [7]:
import pandas as pd
from sklearn.ensemble import AdaBoostRegressor
import numpy as np

# Load your CSV file with the trained model's data
file_path = 'tenegrad_score.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the data to understand its structure
print(data.head())

# Features and target variable
X = data[['focus', 'A', 'score']]
y = data['Pred_dist']

# Initialize the AdaBoost model
adaboost_model = AdaBoostRegressor(n_estimators=100, random_state=42)

# Fit the model on the entire dataset (since we need to calculate feature importance)
adaboost_model.fit(X, y)

# Get feature importances
feature_importances = adaboost_model.feature_importances_

# Find the importance of the 'score' feature
score_importance = feature_importances[X.columns.tolist().index('score')]

# Print the importance of the 'score' feature
print(f'Importance of the "score" feature: {score_importance}')

# Assuming the model was already trained and the R² value was calculated as 0.90
r_squared = 0.90
n = len(y)  # number of samples
p = X.shape[1]  # number of predictors

# Calculate adjusted R² value
adjusted_r_squared = 1 - (1 - r_squared) * (n - 1) / (n - p - 1)

# Calculate R value (square root of R²)
r_value = np.sqrt(r_squared)

print(f'R² value on the entire dataset: {r_squared}')
print(f'Adjusted R² value on the entire dataset: {adjusted_r_squared}')
print(f'R value on the entire dataset: {r_value}')


   focus    A  actual_dist       score   Pred_dist
0    450  1.8          250   511879910  250.015045
1    450  1.8          350   385329868  350.007854
2    450  1.8          450  1192306504  450.005710
3    450  1.8          550   215021250  550.002204
4    450  1.8          650   147477734  649.999767
Importance of the "score" feature: 0.25305350621179806
R² value on the entire dataset: 0.9
Adjusted R² value on the entire dataset: 0.8992287917737789
R value on the entire dataset: 0.9486832980505138
