In [None]:
import pandas as pd
import numpy as np
import itertools
import shap
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from itertools import combinations

data = pd.read_csv("mergedData_annotated.num.csv")

all_features = ["seedNumber_1","seedEbest_1","seedNumber_3","Pu1_1","Pu2_1","pumin1_4u","pumin5_8u","pumin1_4d","pumin5_8d","E_diff_12","E_3","E_1","E_hybrid_1"]
y = data['Y'] 

models = {
    "Linear Regression": LinearRegression(),
    "Random Forest": RandomForestRegressor(n_estimators=100, random_state=42),
    "Gradient Boosting": GradientBoostingRegressor(n_estimators=100, random_state=42),
    "Support Vector Machine": SVR(),
    "K-Nearest Neighbors": KNeighborsRegressor(n_neighbors=5),
    "Decision Tree": DecisionTreeRegressor(random_state=42)
}

results = []

for r in range(1, len(all_features) + 1):
    for feature_subset in combinations(all_features, r):
        X = data[list(feature_subset)]
        
    
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
        
        for model_name, model in models.items():
        
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            
        
            r2 = r2_score(y_test, y_pred)
            mae = mean_absolute_error(y_test, y_pred)
            rmse = np.sqrt(mean_squared_error(y_test, y_pred))
           
            results.append({
                "Feature Combination": ', '.join(feature_subset),
                "Model": model_name,
                "R2 Score": r2,
                "MAE": mae,
                "RMSE": rmse
            })

results_df = pd.DataFrame(results)
results_df.to_csv("model_comparison_results.csv", index=False)
print("Results saved to model_comparison_results.csv")


  from .autonotebook import tqdm as notebook_tqdm


Results saved to model_comparison_results.csv
