In [7]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import RFE
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.linear_model import LinearRegression

In [8]:
#!pip install matplotlib==3.8.3

In [9]:
def rfefeature(indep_x, dep_y, n):
    rfelist = []
    lin = LinearRegression()
    RF = RandomForestRegressor(n_estimators=10, random_state=0)
    DT = DecisionTreeRegressor(random_state=0)
    svc_model = SVR(kernel='linear')
    
    rfemodellist = [lin, RF, DT, svc_model]
    
    for i in rfemodellist:
        log_rfe = RFE(estimator=i, n_features_to_select=n)
        log_fit = log_rfe.fit(indep_x, dep_y)
        log_rfe_feature = log_fit.transform(indep_x)
        rfelist.append(log_rfe_feature)
    return rfelist

def split_scalar(indep_x, dep_y):
    x_train, x_test, y_train, y_test = train_test_split(indep_x, dep_y, test_size=0.25, random_state=0)
    sc = StandardScaler()
    x_train = sc.fit_transform(x_train)
    x_test = sc.transform(x_test)
    return x_train, x_test, y_train, y_test

def R2_prediction(regressor, x_test, y_test):
    y_pred = regressor.predict(x_test)
    from sklearn.metrics import r2_score
    r_score = r2_score(y_test, y_pred)
    return r_score

def linear(x_train, y_train, x_test, y_test):
    regressor = LinearRegression()
    regressor.fit(x_train, y_train)
    r_score = R2_prediction(regressor, x_test, y_test)
    return r_score

def svm_linear(x_train, y_train, x_test, y_test):
    regressor = SVR(kernel="linear")
    regressor.fit(x_train, y_train)
    r_score = R2_prediction(regressor, x_test, y_test)
    return r_score

def svm_NL(x_train, y_train, x_test, y_test):
    regressor = SVR(kernel="rbf")
    regressor.fit(x_train, y_train)
    r_score = R2_prediction(regressor, x_test, y_test)
    return r_score

def DecisionTree(x_train, y_train, x_test, y_test):
    regressor = DecisionTreeRegressor(random_state=0)
    regressor.fit(x_train, y_train)
    r_score = R2_prediction(regressor, x_test, y_test)
    return r_score

def RandomForest(x_train, y_train, x_test, y_test):
    regressor = RandomForestRegressor(n_estimators=10, random_state=0)
    regressor.fit(x_train, y_train)
    r_score = R2_prediction(regressor, x_test, y_test)
    return r_score

In [13]:
def rfe_Regression(acclin, accsvml, accsvmnl, accdes, accrf):
    rfedataframe = pd.DataFrame(index=["Linear", "SVC", "DecisionTree", "RandomForest"], 
                                columns=["Linear", "SVMl", "SVMnl", "Decision", "Random"])
    for number, idex in enumerate(rfedataframe.index):
        rfedataframe["Linear"][idex] = acclin[number]
        rfedataframe["SVMl"][idex] = accsvml[number]
        rfedataframe["SVMnl"][idex] = accsvmnl[number]
        rfedataframe["Decision"][idex] = accdes[number]
        rfedataframe["Random"][idex] = accrf[number]
    return rfedataframe

In [14]:
dataset1 = pd.read_csv("50_Startups.csv", index_col=None)
df2 = dataset1
df = pd.get_dummies(df2, drop_first=True)
indep_x = df.drop("Profit", axis=1)
dep_y = df["Profit"]

In [30]:
rfelist = rfefeature(indep_x, dep_y, 7)
acclin = []
accsvml = []
accsvmnl = []
accdes = []
accrf = []

In [31]:
for i in rfelist:
    x_train, x_test, y_train, y_test = split_scalar(i, dep_y)

    r_lin = linear(x_train, y_train, x_test, y_test)
    acclin.append(r_lin)

    r_svmnl = svm_linear(x_train, y_train, x_test, y_test)
    accsvml.append(r_svmnl)

    r_svmnl = svm_NL(x_train, y_train, x_test, y_test)
    accsvmnl.append(r_svmnl)

    r_d = DecisionTree(x_train, y_train, x_test, y_test)
    accdes.append(r_d)

    r_rf = RandomForest(x_train, y_train, x_test, y_test)
    accrf.append(r_rf)

result = rfe_Regression(acclin, accsvml, accsvmnl, accdes, accrf)

In [17]:
result
#2

Unnamed: 0,Linear,SVMl,SVMnl,Decision,Random
Linear,-0.156999,-0.064329,-0.064279,-0.156999,-0.160053
SVC,0.946727,-0.062138,-0.06396,0.946889,0.954316
DecisionTree,0.946727,-0.062138,-0.06396,0.946889,0.954316
RandomForest,0.941134,-0.062816,-0.063969,0.857433,0.953842


In [None]:
#!pip install scikit-learn==0.21.3

In [20]:
result
#3

Unnamed: 0,Linear,SVMl,SVMnl,Decision,Random
Linear,0.941887,-0.0628,-0.064138,0.879676,0.957957
SVC,0.932548,-0.062178,-0.064093,0.917673,0.955935
DecisionTree,0.946438,-0.062122,-0.064108,0.958583,0.961389
RandomForest,0.934085,-0.062856,-0.064105,0.808723,0.940801


In [23]:
result
#4

Unnamed: 0,Linear,SVMl,SVMnl,Decision,Random
Linear,0.935093,-0.06284,-0.064175,0.824913,0.939536
SVC,0.932419,-0.06228,-0.064125,0.935656,0.956481
DecisionTree,0.931582,-0.062162,-0.064159,0.918866,0.937709
RandomForest,0.932419,-0.06228,-0.064125,0.935656,0.956481


In [26]:
result
#5

Unnamed: 0,Linear,SVMl,SVMnl,Decision,Random
Linear,0.931582,-0.062264,-0.064173,0.939642,0.946748
SVC,0.931582,-0.062264,-0.064173,0.939642,0.946748
DecisionTree,0.931582,-0.062264,-0.064173,0.939642,0.946748
RandomForest,0.931582,-0.062264,-0.064173,0.939642,0.946748


In [29]:
result
#6

Unnamed: 0,Linear,SVMl,SVMnl,Decision,Random
Linear,0.931582,-0.062264,-0.064173,0.939642,0.946748
SVC,0.931582,-0.062264,-0.064173,0.939642,0.946748
DecisionTree,0.931582,-0.062264,-0.064173,0.939642,0.946748
RandomForest,0.931582,-0.062264,-0.064173,0.939642,0.946748


In [32]:
result
#7

Unnamed: 0,Linear,SVMl,SVMnl,Decision,Random
Linear,0.931582,-0.062264,-0.064173,0.939642,0.946748
SVC,0.931582,-0.062264,-0.064173,0.939642,0.946748
DecisionTree,0.931582,-0.062264,-0.064173,0.939642,0.946748
RandomForest,0.931582,-0.062264,-0.064173,0.939642,0.946748
