In [28]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2

def selectKBest(x_indep,y_dep,n):
    feature=SelectKBest(score_func=chi2,k=n)
    feature1=feature.fit(x_indep,y_dep)
    selectk_features=feature1.transform(x_indep)
    return selectk_features

def split_scalar(x_indep,y_dep):
    x_train,x_test,y_train,y_test=train_test_split(x_indep,y_dep,test_size=0.25,random_state=0)
    xc=StandardScaler()
    x_train=xc.fit_transform(x_train)
    x_test=xc.transform(x_test)
    return x_train,x_test,y_train,y_test

def r2_prediction(regressor,x_test,y_test):
    y_pred=regressor.predict(x_test)
    from sklearn.metrics import r2_score
    r2=r2_score(y_test,y_pred)
    return r2

def linear(x_train,y_train,x_test,y_test):
    # Fitting K-NN to the Training set
    from sklearn.linear_model import LinearRegression
    regressor=LinearRegression()
    regressor.fit(x_train,y_train)
    r2=r2_prediction(regressor,x_test,y_test)
    return r2

def svm_linear(x_train,y_train,x_test,y_test):
    from sklearn.svm import SVR
    regressor=SVR(kernel='linear')
    regressor.fit(x_train,y_train)
    r2=r2_prediction(regressor,x_test,y_test)
    return r2

def svm_NL(x_train,y_train,x_test,y_test):
    from sklearn.svm import SVR
    regressor=SVR(kernel='rbf')
    regressor.fit(x_train,y_train)
    r2=r2_prediction(regressor,x_test,y_test)
    return r2

def decision(x_train,y_train,x_test,y_test):
    from sklearn.tree import DecisionTreeRegressor
    regressor=DecisionTreeRegressor(random_state=0)
    regressor.fit(x_train,y_train)
    r2=r2_prediction(regressor,x_test,y_test)
    return r2

def random(x_train,y_train,x_test,y_test):
    from sklearn.ensemble import RandomForestRegressor
    regressor=RandomForestRegressor(n_estimators=10,random_state=0)
    regressor.fit(x_train,y_train)
    r2=r2_prediction(regressor,x_test,y_test)
    return r2


def selectk_regression(acclin,accsvm,accsvmnl,accdt,accrf):
    df=pd.DataFrame(index=['ChiSquare'],columns=['Linear','SVMl','SVMnl','Decision','Random'])
    for number,idex in enumerate(df.index):
        df['Linear'][idex]=acclin[number]
        df['SVMl'][idex]=accsvm[number]
        df['SVMnl'][idex]=accsvmnl[number]
        df['Decision'][idex]=accdt[number]
        df['Random'][idex]=accrf[number]
    return df

In [29]:
dataset=pd.read_csv("prep.csv",index_col=None)
ds1=pd.get_dummies(dataset,drop_first=True)

In [30]:
x_indep=ds1.drop('classification_yes',axis=1)
y_dep=ds1['classification_yes']

kfeatures=selectKBest(x_indep,y_dep,5)

acclin=[]
accsvm=[]
accsvmnl=[]
accdt=[]
accrf=[]

x_train,x_test,y_train,y_test=split_scalar(kfeatures,y_dep)

for i in kfeatures:
    r2_lin=linear(x_train,y_train,x_test,y_test)
    acclin.append(r2_lin)
    r2_svm=svm_linear(x_train,y_train,x_test,y_test)
    accsvm.append(r2_svm)
    r2_svmnl=svm_NL(x_train,y_train,x_test,y_test)
    accsvmnl.append(r2_svmnl)
    r2_dt=decision(x_train,y_train,x_test,y_test)
    accdt.append(r2_dt)
    r2_rf=random(x_train,y_train,x_test,y_test)
    accrf.append(r2_rf)

In [31]:
result=selectk_regression(acclin,accsvm,accsvmnl,accdt,accrf)
result

Unnamed: 0,Linear,SVMl,SVMnl,Decision,Random
ChiSquare,0.551985,0.545395,0.749654,0.696181,0.836806
