In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import datasets

%matplotlib inline

In [0]:
iris = datasets.load_iris()

In [0]:

data = pd.DataFrame(data = np.c_[iris.data, iris.target], columns=iris["feature_names"] + ["target"])

In [0]:
X = data.drop("target", axis=1)
y = (data.target!=0) * 1

In [0]:
class LogisticRegression:
    def __init__(self, learning_rate = 0.0001, n_iters = 1000):
        self.lr = learning_rate
        self.n_iters = n_iters
        self.weights = None
        self.bias = None
    
    def fit(self, X, y):
        n_samples, n_features = X.shape
        
        self.weights = np.zeros(n_features)
        self.bias = 0
        
        for _ in range(self.n_iters):
            linear_model = np.dot(X, self.weights) + self.bias
            yprediction = self.sigmoid(linear_model)
            
            #gradient descent
            dw = 1/n_samples * np.dot(X.T, (yprediction - y))
            db = 1/n_samples * np.sum(yprediction-y)
            
            #update
            self.weights -= self.lr * dw
            self.bias -= self.lr * db
    
    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        yprediction = self.sigmoid(linear_model)
        self.ypred_cls = [1 if i >= 0.5 else 0 for i in yprediction]
        return
    
    def accuracy(self, y_true):
        return round(np.sum(y_true == self.ypred_cls) / len(y_true) * 100, 2)

            
    def sigmoid(self, x):
        return 1/(1 + np.exp(-x))

In [0]:
def splitter_x_y(data):
    X = data.drop("target", axis=1)
    y = (data.target!=0) * 1
   
    order = np.random.permutation(len(X))
    portion = int(len(X)*0.2)

    X_train = X.iloc[order[portion:],:]
    X_test = X.iloc[order[:portion],:]
    y_train = y.iloc[order[portion:]]
    y_test = y.iloc[order[:portion]]
    
    return X_train, y_train, X_test, y_test
    

In [0]:
def modeller(X_train, y_train, X_test, y_test):
    regressor = LogisticRegression(learning_rate=.001, n_iters= 10)
    regressor.fit(X_train,y_train)
    regressor.predict(X_test)
    return regressor.accuracy(y_test)
    

In [0]:
def forward_selection(X, y, key):
    initial_features = X.columns.tolist()
    model_performance = {}
    feature_selected = []
    
    if len(initial_features) < key:
        return "Specified key out of the range of features within the data"
    
    while len(feature_selected) < key:
        for i in initial_features:
            if len(feature_selected) != 0:
                selected = [j for j in feature_selected]
                selected += [i]
                data = pd.concat((X[selected], y), axis = 1)                    
            else:
                data = pd.concat((X[i], y), axis = 1)
            X_train, y_train, X_test, y_test = splitter_x_y(data)
            model_performance[i] = modeller(X_train, y_train, X_test, y_test)
        feature_selected.append(sorted(model_performance,key=model_performance.get)[-1])
        initial_features = list(set(initial_features) - set(feature_selected))   
        model_performance = {}
    print("==================================================RESULT BOARD==========================================================\n")       
    print(" Selected Best Feature(s):", feature_selected)
    print("========================================================================================================================")
    return

In [11]:
forward_selection(X,y,3)


 Selected Best Feature(s): ['sepal width (cm)', 'petal length (cm)', 'petal width (cm)']


In [0]:
def backward_selection(X, y, key):
    features_selected = X.columns.tolist()
    model_performance = {}
    poor_feature = []
    
    if key >= len(features_selected):
        return "Specified key out of the range of features within the data"
    
    while len(poor_feature) < key: 
        for i in features_selected:
            data = pd.concat((X[i],y), axis = 1)
            X_train, y_train, X_test, y_test = splitter_x_y(data)
            model_performance[i] = modeller(X_train, y_train, X_test, y_test)
        poor_feature.append(sorted(model_performance,key=model_performance.get)[0])

        features_selected = list(set(features_selected) - set(poor_feature))
        model_performance = {}
    print("==================================================RESULT BOARD==========================================================\n")       
    print(" Selected Best Feature(s):", features_selected)
    print("========================================================================================================================")
    
    return

In [14]:
backward_selection(X,y,1)


 Selected Best Feature(s): ['petal length (cm)', 'sepal width (cm)', 'sepal length (cm)']
