2. Consider the Iris dataset described in the class. Using pyTorch or just numpy implement an algorithm that can classify an iris of type 1 (setosa) -- see http://github.com/atcemgil/notes/DataSets.ipynb

Use the logistic regression method (http://github.com/atcemgil/notes/LogisticRegression.ipynb). Even if you can not finish your program, write carefully what needs to be done. Compare with the results of your naive Bayes implementation.

In [1]:
import numpy as np
import pandas as pd

def sigmoid(z):
    return 1 / (1 + np.exp(-z))


def likelihood(features, target, weights):
    scores = np.dot(features, weights)
    ll = np.sum(target * scores - np.log(1 + np.exp(scores)))
    return ll


def logistic_regression(features, target, steps, eta, add_intercept = False):
    N, D = features.shape

    if add_intercept:
        intercept = np.ones((N, 1))
        features = np.hstack((features, intercept))
        D += 1

    weights = np.zeros(D)

    for step in range(steps):
        scores = np.dot(features, weights)
        predictions = sigmoid(scores)

        error = target - predictions
        grad = np.dot(features.T, error)
        weights += eta * grad

        #if step % 10 == 0:
        #    print(likelihood(features, target, weights))

    return weights


def test_logistic_regression(weights, features, targets, add_intercept = False):
    N, D = features.shape
    
    for i in range(N):
        row = features[i]
        target = targets[i]
        
        if add_intercept:
            row = np.hstack((row, 1));
            
        score = np.dot(weights, row)
        prediction = sigmoid(score)
        check = np.abs(target - prediction) < 0.02
        
        print('Data: {} Target: {} Prediction: {} Check: {}'.format(i, target, prediction, check))


def iris_logistic_regression():
    df_iris = pd.read_csv(u'../data/iris.txt', sep=' ')

    target_label = 'c'
    feature_labels = ['sl','sw','pl','pw']
    target_class = 1
    steps = 200
    eta = 0.02

    df_iris[target_label] = (df_iris[target_label] == target_class).astype(int)

    features = df_iris[feature_labels].values
    targets = df_iris[target_label].values

    weights = logistic_regression(features, targets, steps, eta)
    print(weights)
    test_logistic_regression(weights ,features, targets)
    
    weights = logistic_regression(features, targets, steps, eta, True)
    print(weights)
    test_logistic_regression(weights ,features, targets, True)
   

iris_logistic_regression()

[  2.6024471    7.16234658 -10.41157236  -4.74773754]
Data: 0 Target: 1 Prediction: 0.9999999998765203 Check: True
Data: 1 Target: 1 Prediction: 0.9999999925368012 Check: True
Data: 2 Target: 1 Prediction: 0.9999999989415063 Check: True
Data: 3 Target: 1 Prediction: 0.9999999774521254 Check: True
Data: 4 Target: 1 Prediction: 0.9999999999217362 Check: True
Data: 5 Target: 1 Prediction: 0.999999999810683 Check: True
Data: 6 Target: 1 Prediction: 0.9999999985073496 Check: True
Data: 7 Target: 1 Prediction: 0.9999999990713673 Check: True
Data: 8 Target: 1 Prediction: 0.9999999438828049 Check: True
Data: 9 Target: 1 Prediction: 0.9999999935754091 Check: True
Data: 10 Target: 1 Prediction: 0.9999999999617544 Check: True
Data: 11 Target: 1 Prediction: 0.9999999955735202 Check: True
Data: 12 Target: 1 Prediction: 0.9999999939777853 Check: True
Data: 13 Target: 1 Prediction: 0.9999999990264374 Check: True
Data: 14 Target: 1 Prediction: 0.9999999999999307 Check: True
Data: 15 Target: 1 Predicti