In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import time
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression
import pickle
import matplotlib.pyplot as plt

In [2]:
# RFE feature selection function
def rfeFeature(indep_X, dep_Y, n):
    rfelist = []
    from sklearn.linear_model import LinearRegression
    lin = LinearRegression()
    from sklearn.svm import SVR
    SVRl = SVR(kernel='linear')
    from sklearn.svm import SVR
    # SVRnl = SVR(kernel='rbf')
    from sklearn.tree import DecisionTreeRegressor
    dec = DecisionTreeRegressor(random_state=0)
    from sklearn.ensemble import RandomForestRegressor
    rf = RandomForestRegressor(n_estimators=10, random_state=0)
    rfemodellist = [lin, SVRl, dec, rf]
    for i in rfemodellist:
        print(i)
        log_rfe = RFE(i, n_features_to_select=n)
        log_fit = log_rfe.fit(indep_X, dep_Y)
        log_rfe_feature = log_fit.transform(indep_X)
        rfelist.append(log_rfe_feature)
    return rfelist


In [3]:
# Function to split and scale data
def split_scalar(indep_X, dep_Y):
    X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size=0.25, random_state=0)
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    return X_train, X_test, y_train, y_test

# Function to calculate R-squared
def r2_prediction(regressor, X_test, y_test):
    y_pred = regressor.predict(X_test)
    from sklearn.metrics import r2_score
    r2 = r2_score(y_test, y_pred)
    return r2

# Regression functions
def Linear(X_train, y_train, X_test):
    from sklearn.linear_model import LinearRegression
    regressor = LinearRegression()
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2

def svm_linear(X_train, y_train, X_test):
    from sklearn.svm import SVR
    regressor = SVR(kernel='linear')
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2

def svm_NL(X_train, y_train, X_test):
    from sklearn.svm import SVR
    regressor = SVR(kernel='rbf')
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2

def Decision(X_train, y_train, X_test):
    from sklearn.tree import DecisionTreeRegressor
    regressor = DecisionTreeRegressor(random_state=0)
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2

def random(X_train, y_train, X_test):
    from sklearn.ensemble import RandomForestRegressor
    regressor = RandomForestRegressor(n_estimators=10, random_state=0)
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2

In [4]:
# RFE regression function
def rfe_regression(acclog, accsvml, accdes, accrf):
    data = {
        'Linear': acclog,
        'SVMl': accsvml,
        'Decision': accdes,
        'Random': accrf
    }
    rfedataframe = pd.DataFrame(data)
    return rfedataframe


In [21]:
# Load dataset
dataset1 = pd.read_csv("prep.csv", index_col=None)
df2 = dataset1
df2 = pd.get_dummies(df2, drop_first=True)

# Split data into independent (X) and dependent (Y) variables
indep_X = df2.drop('classification_yes', axis=1)
dep_Y = df2['classification_yes']

# Perform RFE feature selection
rfelist = rfeFeature(indep_X, dep_Y, 5)

LinearRegression()
SVR(kernel='linear')
DecisionTreeRegressor(random_state=0)
RandomForestRegressor(n_estimators=10, random_state=0)


In [22]:
# Initialize accuracy lists
acclin = []
accsvml = []
accsvmnl = []
accdes = []
accrf = []


In [23]:

# Train and evaluate models
for features in rfelist:
    X_train, X_test, y_train, y_test = train_test_split(features, dep_Y, test_size=0.25, random_state=0)
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    
    r2_lin = Linear(X_train, y_train, X_test)
    acclin.append(r2_lin)
    
    r2_sl = svm_linear(X_train, y_train, X_test)
    accsvml.append(r2_sl)
    
    r2_d = Decision(X_train, y_train, X_test)
    accdes.append(r2_d)
    
    r2_r = random(X_train, y_train, X_test)
    accrf.append(r2_r)

    
result=rfe_regression(acclin,accsvml,accdes,accrf)

In [24]:
#5
result

Unnamed: 0,Linear,SVMl,Decision,Random
0,0.620124,0.457136,0.77924,0.780135
1,0.604508,0.456871,0.776474,0.776745
2,0.674403,0.628206,0.696181,0.815538
3,0.686361,0.643365,0.836806,0.845303


In [20]:
#6
result

Unnamed: 0,Linear,SVMl,Decision,Random
0,0.624738,0.456874,0.81723,0.814741
1,0.610294,0.530043,0.806415,0.807916
2,0.697365,0.665248,0.782986,0.829427
3,0.705126,0.670093,0.839675,0.875221


In [16]:
#7
result

Unnamed: 0,Linear,SVMl,Decision,Random
0,0.622757,0.5373,0.813952,0.814557
1,0.623155,0.5296,0.81284,0.8134
2,0.697704,0.666684,0.913194,0.940972
3,0.705879,0.667997,0.797454,0.850957


In [8]:
#8
result

Unnamed: 0,Linear,SVMl,Decision,Random
0,0.709204,0.684292,0.952168,0.932773
1,0.701052,0.679964,0.82978,0.922139
2,0.703917,0.673437,0.782986,0.918403
3,0.712812,0.671713,0.913194,0.945312


In [12]:
#9
result

Unnamed: 0,Linear,SVMl,Decision,Random
0,0.716216,0.684977,0.968654,0.958692
1,0.700809,0.682756,0.82978,0.924309
2,0.702878,0.672776,0.826389,0.922309
3,0.71276,0.675771,0.739583,0.904948
