In [1]:
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt

In [2]:
with open('train.npy', 'rb') as fin:
    X = np.load(fin)#features
    
with open('target.npy', 'rb') as fin:
    y = np.load(fin)#labels


In [3]:
X_train = X[:660, :]
y_train = y[:660,]

In [4]:
X_test = X[660:, :]
y_test = y[660:,]

In [5]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape,X.shape,y.shape

((660, 2), (660,), (166, 2), (166,), (826, 2), (826,))

In [6]:
def expand(X):
    """
    Adds quadratic features. 
    This expansion allows your linear model to make non-linear separation.
    
    For each sample (row in matrix), compute an expanded row:
    [feature0, feature1, feature0^2, feature1^2, feature0*feature1, 1]
    
    :param X: matrix of features, shape [n_samples,2]
    :returns: expanded features of shape [n_samples,6]
    """
    X_expanded = np.zeros((X.shape[0], 6))
    X_expanded[:,0] = X[ :, 0 ]
    X_expanded[:,1] = X[ :, 1 ]
    X_expanded[:,2] = X[ :, 0 ] **2
    X_expanded[:,3] = X[ :, 1 ] **2
    X_expanded[:,4] = X[ :, 0 ] * X[ :, 1 ]
    X_expanded [:,5]= np.ones(X.shape[0])
    
    return X_expanded

In [7]:
def probability(X, w):
    """
    Given input features and weights
    return predicted probabilities of y==1 given x, P(y=1|x), see description above
        
    Don't forget to use expand(X) function (where necessary) in this and subsequent functions.
    
    :param X: feature matrix X of shape [n_samples,6] (expanded)
    :param w: weight vector w of shape [6] for each of the expanded features
    :returns: an array of predicted probabilities in [0,1] interval.
    """
    
    return 1 / (1 + (np.exp(-np.dot(X,w))))

In [8]:
def compute_loss(X, y, w):
    """
    Given feature matrix X [n_samples,6], target vector [n_samples] of 1/0,
    and weight vector w [6], compute scalar loss function L using formula above.
    Keep in mind that our loss is averaged over all samples (rows) in X.
    """
    
    y_prime = probability(X,w)
    error = -(y* np.log(y_prime) + ( 1 - y) * np.log(1 - y_prime))
    
    
    return np.mean(error) 
    

In [9]:
def compute_grad(X, y, w):
    global y_train
    """
    Given feature matrix X [n_samples,6], target vector [n_samples] of 1/0,
    and weight vector w [6], compute vector [6] of derivatives of L over each weights.
    Keep in mind that our loss is averaged over all samples (rows) in X.
    """
    y_prime = probability(X,w)
    diff = np.expand_dims(y_prime - y, -1)
    
    return np.mean( X *diff , axis = 0)

In [10]:
def sgd(X, y):

    X_expanded = expand(X)

    np.random.seed(42)
    w = np.array([0, 0, 0, 0, 0, 1])

    eta= 0.1 # learning rate

    n_iter = 100
    batch_size = 4
    loss = np.zeros(n_iter)
   

    for i in range(n_iter):
         ind = np.random.choice(X_expanded.shape[0], batch_size)
    #     loss[i] = compute_loss(X_expanded, y_train, w)

        # Keep in mind that compute_grad already does averaging over batch for you!
        # TODO:<your code here>

         w = w - eta *compute_grad(X_expanded[ind, :], y[ind], w)
    return w

In [26]:
hit_matrix = np.array([])
hit_class_a = np.array([])
hit_class_b = np.array([])

In [38]:
#calculation of fees (i) average, (ii) minimum and (iii) maximum. And calculation of the standard deviation.

for i in range(100):
    
    np.random.shuffle(X)
    np.random.shuffle(y)
    
    X_train = X[:660, :]
    y_train = y[:660,]

    X_test = X[660:, :]
    y_test = y[660:,]
    
    w = sgd(X_train, y_train)
    X_test = expand(X_test)
    y_test_prime = probability(X_test, w)
    
    y_test_prime[y_test_prime > 0.5 ]  = 1
    y_test_prime[y_test_prime < 0.5] = 0
    
    average = y_test_prime == y_test
    
    hit_matrix = np.append(hit_matrix, np.count_nonzero(average))
    
    average_class_a = y_test_prime == 0
    average_class_a = average_class_a == y_test
   
    hit_class_a = np.append(hit_class_a, np.count_nonzero(average_class_a))
   
    average_class_b = y_test_prime == 1
    average_class_b = average_class_b == y_test
   
    hit_class_b = np.append(hit_class_b, np.count_nonzero(average_class_b))
    

In [39]:
print ("Media de acertos: ")
print("-Média: ", np.mean(hit_matrix))
print ("-Mínima", np.min(hit_matrix))
print ("-Máxima: ", np.max(hit_matrix))
print ("-Desvio padrão: ", np.std(hit_matrix))


Media de acertos: 
-Média:  99.8069306931
-Mínima 79.0
-Máxima:  116.0
-Desvio padrão:  6.08336080618


In [40]:
print ("Media de acertos da classe A(0): ")
print("-Média: ", np.mean(hit_class_a))
print ("-Mínima", np.min(hit_class_a))
print ("-Máxima: ", np.max(hit_class_a))
print ("-Desvio padrão: ", np.std(hit_class_a))

Media de acertos da classe A(0): 
-Média:  66.2139303483
-Mínima 50.0
-Máxima:  87.0
-Desvio padrão:  6.09126321361


In [41]:
print ("Media de acertos da classe B(1): ")
print("-Média: ", np.mean(hit_class_b))
print ("-Mínima", np.min(hit_class_b))
print ("-Máxima: ", np.max(hit_class_b))
print ("-Desvio padrão: ", np.std(hit_class_b))

Media de acertos da classe B(1): 
-Média:  99.7860696517
-Mínima 79.0
-Máxima:  116.0
-Desvio padrão:  6.09126321361
