<a href="https://colab.research.google.com/github/riddlemeS4m/machine-learning-scientist-datacamp/blob/dev-google/loss_functions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

[Insert Title Here]

In [None]:
import pandas as pd
import numpy as np
import warnings

warnings.filterwarnings('ignore')

In [None]:
!pip install scikit-learn



In [None]:
# linear classifiers predict categories based on a decision boundary
# logistic regression and svm are types of linear classifiers, both aim to draw a straight line to separate classes in training data (just using different methods)
# nonlinear classifiers are needed when data is not linearly separable

In [None]:
x = np.arange(3)
y = np.arange(3,6)

X*y
np.sum(x*y)
# same as vv
x@y

# ^^ that's a dot product

In [None]:
# predict function for linear classifiers vv
# raw model output = coefficients * features + intercept
# compute output then check the sign: if positive, predict one, if negative, predict the other
# logistic regression and svm have the same predict function, it's just that their fit functions are different

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

lr = LogisticRegression()
svm = SVC()

lr.fit(X, y)
lr.predict(X)[10]
lr.predict(X)[20]

In [None]:
lr.coef_ @ X[10] + lr.intercept_ # raw model output
lr.coef_ @ X[20] + lr.intercept_

In [None]:
# the sign of the raw model output tells you what side of the decision boundary you're on

In [None]:
# loss functions vv

In [None]:
# loss function is a penalty score that tells us how poorly the model is doing
# minimizing the loss function is jiggling the parameters until it's as small as possible
# the fit function is basically running the minimizing the loss function
# model.score() isn't usually the same as the loss function value

In [None]:
# can't use least squares as a loss function for classification problems, because output is categorical (not numeric)
# might be tempted to use number of errors (0 being not an error, 1 being an error) as a loss function
# however, logistic regression and svc don't use this because it's hard to minimize (not sure why, probably computational reasons)

In [None]:
from scipy.optimize import minimize

minimize(np.square, 0).x
minimize(np.square, 2).x

Exercise

In [None]:
# The squared error, summed over training examples
def my_loss(w):
    s = 0
    for i in range(y.size):
        # Get the true and predicted target values for example 'i'
        y_i_true = y[i]
        y_i_pred = w@X[i]
        s = s + (y_i_true - y_i_pred)**2
    return s

# Returns the w that makes my_loss(w) smallest
w_fit = minimize(my_loss, X[0]).x
print(w_fit)

# Compare with scikit-learn's LinearRegression coefficients
lr = LinearRegression(fit_intercept=False).fit(X,y)
print(lr.coef_)

In [None]:
# loss function diagrams vv

In [None]:
# the idea is, for a classifier model, you need a loss function that is high for very incorrect predictions, and is 0 for correct predictions
# the least squares loss function doesn't work for classifier models, because it's loss function is a quadratic function
# instead, logistic regression and svm use different loss functions
# two of them are called logistic and hinge loss functions

Exercise

In [None]:
# Mathematical functions for logistic and hinge losses
def log_loss(raw_model_output):
   return np.log(1+np.exp(-raw_model_output))
def hinge_loss(raw_model_output):
   return np.maximum(0,1-raw_model_output)

# Create a grid of values and plot
grid = np.linspace(-2,2,1000)
plt.plot(grid, log_loss(grid), label='logistic')
plt.plot(grid, hinge_loss(grid), label='hinge')
plt.legend()
plt.show()

Exercise

In [None]:
# The logistic loss, summed over training examples
def my_loss(w):
    s = 0
    for i in range(0, len(X)):
        raw_model_output = w@X[i]
        s = s + log_loss(raw_model_output * y[i])
    return s

# Returns the w that makes my_loss(w) smallest
w_fit = minimize(my_loss, X[0]).x
print(w_fit)

# Compare with scikit-learn's LogisticRegression
lr = LogisticRegression(fit_intercept=False, C=1000000).fit(X,y)
print(lr.coef_)