In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score

# Example dataset
# Replace this with your actual dataset
data = {
    'feature1': np.random.rand(100),
    'feature2': np.random.rand(100),
    'feature3': np.random.rand(100),
    'target': np.random.rand(100)
}
df = pd.DataFrame(data)

# Defining the independent variables (features) and the dependent variable (target)
X = df[['feature1', 'feature2', 'feature3']]
y = df['target']

# Splitting the dataset into the Training set and Test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Feature Scaling
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

# Function to predict and calculate R² score
def r2_prediction(regressor, X_test, y_test):
    y_pred = regressor.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    return r2

# Functions to train and evaluate different regression models
def Linear(X_train, y_train, X_test, y_test):
    regressor = LinearRegression()
    regressor.fit(X_train, y_train)
    return r2_prediction(regressor, X_test, y_test)

def svm_linear(X_train, y_train, X_test, y_test):
    regressor = SVR(kernel='linear')
    regressor.fit(X_train, y_train)
    return r2_prediction(regressor, X_test, y_test)

def svm_NL(X_train, y_train, X_test, y_test):
    regressor = SVR(kernel='rbf')
    regressor.fit(X_train, y_train)
    return r2_prediction(regressor, X_test, y_test)

def Decision(X_train, y_train, X_test, y_test):
    regressor = DecisionTreeRegressor(random_state=0)
    regressor.fit(X_train, y_train)
    return r2_prediction(regressor, X_test, y_test)

def random(X_train, y_train, X_test, y_test):
    regressor = RandomForestRegressor(n_estimators=10, random_state=0)
    regressor.fit(X_train, y_train)
    return r2_prediction(regressor, X_test, y_test)

# Function to perform RFE and return selected features
def rfeFeature(indep_X, dep_Y, n):
    rfemodellist = [
        LinearRegression(),
        SVR(kernel='linear'),
        DecisionTreeRegressor(random_state=0),
        RandomForestRegressor(n_estimators=10, random_state=0)
    ]
    rfelist = []

    for model in rfemodellist:
        log_rfe = RFE(estimator=model, n_features_to_select=n)
        log_fit = log_rfe.fit(indep_X, dep_Y)
        log_rfe_feature = indep_X.iloc[:, log_rfe.support_]
        rfelist.append(log_rfe_feature)

    return rfelist

# Example usage of the functions
print("Linear Regression R² Score:", Linear(X_train, y_train, X_test, y_test))
print("SVM Linear R² Score:", svm_linear(X_train, y_train, X_test, y_test))
print("SVM Non-Linear R² Score:", svm_NL(X_train, y_train, X_test, y_test))
print("Decision Tree R² Score:", Decision(X_train, y_train, X_test, y_test))
print("Random Forest R² Score:", random(X_train, y_train, X_test, y_test))

# Performing RFE and displaying the selected features for each model
selected_features = rfeFeature(X, y, 2) # Selecting top 2 features
for idx, features in enumerate(selected_features):
    print(f"Model {idx+1} selected features:\n", features.head())


Linear Regression R² Score: -0.20760527271023466
SVM Linear R² Score: -0.38665213257301634
SVM Non-Linear R² Score: -0.5852610775949976
Decision Tree R² Score: -1.2112303854632245
Random Forest R² Score: -0.4292194014913364
Model 1 selected features:
    feature1  feature2
0  0.149499  0.690764
1  0.894113  0.340861
2  0.368049  0.918417
3  0.782309  0.422161
4  0.128116  0.659654
Model 2 selected features:
    feature1  feature2
0  0.149499  0.690764
1  0.894113  0.340861
2  0.368049  0.918417
3  0.782309  0.422161
4  0.128116  0.659654
Model 3 selected features:
    feature2  feature3
0  0.690764  0.057520
1  0.340861  0.767984
2  0.918417  0.147355
3  0.422161  0.927336
4  0.659654  0.181711
Model 4 selected features:
    feature2  feature3
0  0.690764  0.057520
1  0.340861  0.767984
2  0.918417  0.147355
3  0.422161  0.927336
4  0.659654  0.181711
