In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

In [2]:
def split_scalar(indep_X, dep_Y):
    X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size=0.25, random_state=0)
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    return X_train, X_test, y_train, y_test

In [3]:
def r2_prediction(regressor, X_test, y_test):
    y_pred = regressor.predict(X_test)
    from sklearn.metrics import r2_score
    r2 = r2_score(y_test, y_pred)
    return r2

In [4]:
def Linear(X_train, y_train, X_test):       
    regressor = LinearRegression()
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2  

In [5]:
def svm_linear(X_train, y_train, X_test):                
    regressor = SVR(kernel='linear')
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2 

In [6]:
def svm_NL(X_train, y_train, X_test):                
    regressor = SVR(kernel='rbf')
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2  

In [7]:
def Decision(X_train, y_train, X_test):        
    regressor = DecisionTreeRegressor(random_state=0)
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2 

In [8]:
def random(X_train, y_train, X_test):       
    regressor = RandomForestRegressor(n_estimators=10, random_state=0)
    regressor.fit(X_train, y_train)
    r2 = r2_prediction(regressor, X_test, y_test)
    return r2 

In [9]:
def rfeFeature(indep_X, dep_Y, n):
    models = [
        LinearRegression(),
        SVR(kernel='linear'),
        DecisionTreeRegressor(random_state=0),
        RandomForestRegressor(n_estimators=10, random_state=0)
    ]

    rfelist = []

    for model in models:
        log_rfe = RFE(model)  # Remove the second argument here
        log_fit = log_rfe.fit(indep_X, dep_Y)
        log_rfe_feature = log_fit.transform(indep_X)
        rfelist.append(log_rfe_feature)

    return rfelist



In [23]:
def rfe_regression(r_2lin,r_2svml,r_2des,r_2rf): 
    rfedataframe = pd.DataFrame(index=['Linear', 'SVM', 'Decision', 'Random'], columns=['Linear', 'SVM', 'Decision', 'Random'])

    for number, idex in enumerate(rfedataframe.index):        
        rfedataframe['Linear'][idex] = r_2lin[number]       
        rfedataframe['SVM'][idex] = r_2svml[number]
        rfedataframe['Decision'][idex] = r_2des[number]
        rfedataframe['Random'][idex] = r_2rf[number]
    
    return rfedataframe

In [24]:
dataset1 = pd.read_csv("prep.csv", index_col=None)
df2 = dataset1
df2 = pd.get_dummies(df2, drop_first=True)

In [25]:
indep_X = df2.drop('classification_yes', 1)
dep_Y = df2['classification_yes']
rfelist = rfeFeature(indep_X, dep_Y, 3)


  """Entry point for launching an IPython kernel.


In [26]:
r_2lin = []
r_2svml = []
r_2svmnl = []
r_2des = []
r_2rf = []


In [27]:
for i in rfelist:   
    X_train, X_test, y_train, y_test=split_scalar(i,dep_Y)  
    r2_lin=Linear(X_train,y_train,X_test)
    r_2lin.append(r2_lin)
    
    r2_sl=svm_linear(X_train,y_train,X_test)    
    r_2svml.append(r2_sl)
    
    r2_NL=svm_NL(X_train,y_train,X_test)
    r_2svmnl.append(r2_NL)
    
    r2_d=Decision(X_train,y_train,X_test)
    r_2des.append(r2_d)
    
    r2_r=random(X_train,y_train,X_test)
    r_2rf.append(r2_r)

In [28]:
result = rfe_regression(r_2lin, r_2svml, r_2des, r_2rf)
print(result)

            Linear       SVM  Decision    Random
Linear    0.708586  0.683656  0.968654  0.955031
SVM       0.708869  0.677961  0.998794  0.974438
Decision  0.696629  0.661806  0.826389  0.923177
Random    0.652204  0.597077  0.782986  0.925347
