In this notebook you will find the implementation of the least squares optimizer using curve fit from scipy on the smiley-frowny gaussian noise data.

# Libraries

In [3]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import cv2
import time
from datetime import date
from sklearn.metrics import accuracy_score, confusion_matrix

from scipy.optimize import curve_fit

In [4]:
def model_smiley(i, a, b, c):
  n = len(i)
  return a * (i/n) ** 2 + b * (i/n) + c


def model_frowny(i, a, b, c):
  n = len(i)
  return a * ((i - n - 1)/n) ** 2 + b * ((i - n - 1)/n) + c


def jac_smiley(i, a, b, c):
  n = len(i)
  J = np.empty((n, 3))
    
  J[:, 0] = (i/n) ** 2
  J[:, 1] = (i/n)
  J[:, 2] = np.ones(n)

  return J


def jac_frowny(i, a, b, c):
  n = len(i)
  J = np.empty((n, 3))
    
  J[:, 0] = ((i - n - 1)/n) ** 2
  J[:, 1] = ((i - n - 1)/n)
  J[:, 2] = np.ones(n)
  
  return J


def run_LeastSquares(data, n):

  """
  Variables:
  - 1 labels smiley, 0 labels frowny
  - data: table with measurements and errors
  - p0: initial guess on the parameters
  - popt: set of optimal values of the parameters returned
  """

  x1 = data[data['type'] == 1]
  x0 = data[data['type'] == 0]

  i = np.arange(1, n+1)

  smiley_array = []
  frowny_array = []

  smiley_chisq = []
  frowny_chisq = []

  # Repeat for all smiley instances
  for j in range(len(x1)):
    
    # mean values (measurements)
    ydata1 = np.array(x1.iloc[j, :n])

    # errors
    dy1    = np.array(x1.iloc[j, n:n*2])

    # initial guess for the parameters
    p0_1 = np.array([1., 0., 10.])

    # return optimal values for the parameters (popt) 'a', 'b', 'c':
    popt1, pcov1 = curve_fit(model_smiley, i,
                             ydata1, p0=p0_1, jac=jac_smiley, sigma=dy1)
    
    # save only parameter 'a'
    smiley_array.append(popt1[0])


  # Repeat for all frowny instances
  for j in range(len(x0)):

    ydata0 = np.array(x0.iloc[j, :n])

    dy0    = np.array(x0.iloc[j, n:n*2])

    p0_0 = np.array([-1., 0., 10.])

    popt0, pcov0 = curve_fit(model_frowny, i,
                             ydata0, p0=p0_0, jac=jac_frowny, sigma=dy0)
    
    frowny_array.append(popt0[0])

  return np.array(smiley_array), np.array(frowny_array)

# Load data and run least squares

In [5]:
# Load data
file_name = 'smileyfrowny/datasets/test_m100_n20_sig5_dg5.csv'

data = pd.read_csv(file_name)

In [6]:
# Run least squares
s, f = run_LeastSquares(data, n=20)

In [7]:
# Sort predictions according to the data set
smiley = pd.DataFrame(s)
frowny = pd.DataFrame(f)

smiley.index = data[data['type']==1].index
frowny.index = data[data['type']==0].index

preds = pd.concat([smiley, frowny])
preds.sort_index(inplace=True)

preds[preds < 0] = 0
preds[preds > 0] = 1

# Get true label
true = data['type']

# Compute accuracy
print('Accuracy: ', accuracy_score(true, preds))

# and print the confusion matrix
print('\nConfusion Matrix:\n\n', confusion_matrix(true, preds))
print('\nNormalized Confusion Matrix:\n\n', confusion_matrix(true, preds, normalize='true'))

Accuracy:  0.92563

Confusion Matrix:

 [[46328  3672]
 [ 3765 46235]]

Normalized Confusion Matrix:

 [[0.92656 0.07344]
 [0.0753  0.9247 ]]
