In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split 
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
import pickle
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor


def selectkbest(indep_X,dep_Y,n):
    test=SelectKBest(score_func=chi2, k=n)
    fit1=test.fit(indep_X,dep_Y)
    selectK_features=fit1.transform(indep_X)
    selected_col=indep_X.columns[fit1.get_support(indices=True)]
    return selectK_features, selected_col

def split_scaler(indep_X,dep_Y):
    X_train, X_test, y_train, y_test = train_test_split(indep_X, dep_Y, test_size = 0.30, random_state = 0)
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    return X_train, X_test, y_train, y_test

def r2_prediction(regressor,X_test,y_test):
    y_pred = regressor.predict(X_test)
    r2=r2_score(y_test,y_pred)
    return r2

def Linear(X_train,y_train,X_test):
    regressor = LinearRegression()
    regressor.fit(X_train, y_train)
    r2=r2_prediction(regressor,X_test,y_test)
    return  r2

def svm_linear(X_train,y_train,X_test):
    regressor = SVR(kernel = 'linear')
    regressor.fit(X_train, y_train)
    r2=r2_prediction(regressor,X_test,y_test)
    return  r2 

def svm_NL(X_train,y_train,X_test):
    regressor = SVR(kernel = 'rbf')
    regressor.fit(X_train, y_train)
    r2=r2_prediction(regressor,X_test,y_test)
    return  r2  

def Decision(X_train,y_train,X_test):
    regressor = DecisionTreeRegressor(random_state = 0)
    regressor.fit(X_train, y_train)
    r2=r2_prediction(regressor,X_test,y_test)
    return  r2  

def random(X_train,y_train,X_test):
    regressor = RandomForestRegressor(n_estimators = 10, random_state = 0)
    regressor.fit(X_train, y_train)
    r2=r2_prediction(regressor,X_test,y_test)
    return  r2 

def selectk_regression(acclin,accsvml,accsvmnl,accdes,accrf): 
    dataframe=pd.DataFrame(index=['ChiSquare'],columns=['Linear','SVMl','SVMnl','Decision','Random'])
    for number,idex in enumerate(dataframe.index):        
        dataframe.loc[idex,'Linear']=acclin[number]       
        dataframe.loc[idex,'SVMl']=accsvml[number]
        dataframe.loc[idex,'SVMnl']=accsvmnl[number]
        dataframe.loc[idex,'Decision']=accdes[number]
        dataframe.loc[idex,'Random']=accrf[number]
    return dataframe

In [4]:
dataset=pd.read_csv('prep.csv',index_col=None)
df=dataset

df=pd.get_dummies(df,drop_first=True)

indep_X=df.drop('classification_yes',axis=1)
dep_Y=df['classification_yes']


In [8]:
kbest,selected=selectkbest(indep_X,dep_Y,7)

X_train, X_test, y_train, y_test=split_scaler(indep_X,dep_Y)

acclin=[]
accsvml=[]
accsvmnl=[]
accdes=[]
accrf=[]

r2_lin=Linear(X_train,y_train,X_test)
acclin.append(r2_lin)

r2_sl=svm_linear(X_train,y_train,X_test)    
accsvml.append(r2_sl)
    
r2_NL=svm_NL(X_train,y_train,X_test)
accsvmnl.append(r2_NL)
    
r2_d=Decision(X_train,y_train,X_test)
accdes.append(r2_d)
    
r2_r=random(X_train,y_train,X_test)
accrf.append(r2_r)
    
    
result=selectk_regression(acclin,accsvml,accsvmnl,accdes,accrf)


In [6]:
#3
result

Unnamed: 0,Linear,SVMl,SVMnl,Decision,Random
ChiSquare,0.637199,0.612658,0.931856,0.608889,0.906844


In [7]:
#4
result

Unnamed: 0,Linear,SVMl,SVMnl,Decision,Random
ChiSquare,0.637199,0.612658,0.931856,0.608889,0.906844
