In [1]:
#################################### Forward, Backward, Stepwise #######################################
import numpy as np
import pandas as pd
from sklearn import linear_model as lm
from ipynb.fs.full.variable_selection import Variable_selection

In [2]:
####################### Prepare dataset ############################
# import data
from sklearn.datasets import load_boston
boston = load_boston()

# divide to input and target data
# remove categorical variable part
data = pd.DataFrame(boston.data)
data.columns = boston.feature_names
input_data = np.array(data)

# target : price
target_data = np.array(boston.target)
print (np.shape(input_data))

(506, 13)


In [3]:
# regression model
reg = lm.LinearRegression()

# make class
selection_tech = Variable_selection(model=reg,input_data=input_data,target_data=target_data)

In [4]:
# forward selection
var_fw = selection_tech.forward_selection(alpha=0.1)
print(var_fw,"      number of variables : %d" %(len(var_fw)))

[12, 5, 10, 7, 4, 3, 11, 1, 0, 8, 9]       number of variables : 11


In [5]:
names = boston.feature_names
selected_names = [names[i] for i in var_fw]
print(selected_names)

['LSTAT', 'RM', 'PTRATIO', 'DIS', 'NOX', 'CHAS', 'B', 'ZN', 'CRIM', 'RAD', 'TAX']


In [6]:
Rsq_fw,adj_Rsq_fw = selection_tech.R_sq(model=reg,X=np.take(input_data,var_fw,axis=1),Y=target_data)
print("R_sq :",Rsq_fw,"adj_R_sq :",adj_Rsq_fw)

R_sq : 0.740545352046 adj_R_sq : 0.734768021829


In [7]:
# backward elimination
var_bw = selection_tech.backward_elimination(alpha=0.1)
print(var_bw,"      number of variables : %d" %(len(var_bw)))

[0, 1, 3, 4, 5, 7, 8, 9, 10, 11, 12]       number of variables : 11


In [8]:
names = boston.feature_names
selected_names = [names[i] for i in var_bw]
print(selected_names)

['CRIM', 'ZN', 'CHAS', 'NOX', 'RM', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT']


In [9]:
Rsq_bw,adj_Rsq_bw = selection_tech.R_sq(model=reg,X=np.take(input_data,var_bw,axis=1),Y=target_data)
print("R_sq :",Rsq_bw,"adj_R_sq :",adj_Rsq_bw)

R_sq : 0.740545352046 adj_R_sq : 0.734768021829


In [10]:
# stepwise selection
var_st = selection_tech.stepwise_selection(alpha=0.1)
print(var_st,"      number of variables : %d" %(len(var_st)))

[12, 5, 10, 7, 4, 11, 0, 8, 9, 1, 3]       number of variables : 11


In [11]:
names = boston.feature_names
selected_names = [names[i] for i in var_st]
print(selected_names)

['LSTAT', 'RM', 'PTRATIO', 'DIS', 'NOX', 'B', 'CRIM', 'RAD', 'TAX', 'ZN', 'CHAS']


In [12]:
Rsq_st,adj_Rsq_st = selection_tech.R_sq(model=reg,X=np.take(input_data,var_st,axis=1),Y=target_data)
print("R_sq :",Rsq_st,"adj_R_sq :",adj_Rsq_st)

R_sq : 0.740545352046 adj_R_sq : 0.734768021829


In [13]:
###################################### Genetic Algorithm #####################################
import sys
sys.path.append('C:/Users/MJ/Desktop/workspace/BA_tutorial')

In [14]:
from Genetic_Algorithm import Genetic_algorithm

In [15]:
# regression model
reg = lm.LinearRegression()

# make class
Genetic_al = Genetic_algorithm(model=reg,X=input_data,Y=target_data,chrom_num=100,eval_metric='adj_Rsq',chrom_ratio=0.5)

In [21]:
var_GA, eval = Genetic_al.Do_GA(max_iter=100)

Finished 10th generation !!
Finished 20th generation !!
Finished 30th generation !!
Finished 40th generation !!
Finished 50th generation !!
Finished 60th generation !!
Finished 70th generation !!
Finished 80th generation !!
Finished 90th generation !!
Finished 100th generation !!


In [22]:
print(var_GA, "      number of variables : %d" %(len(var_GA)))

[2, 4, 5, 10, 12]       number of variables : 5


In [19]:
names = boston.feature_names
selected_names = [names[i] for i in var_GA]
print(selected_names)

['CRIM', 'ZN', 'CHAS', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'B', 'LSTAT']


In [20]:
print("Final eval metric : ",eval)

Final eval metric :  0.72317090865
