# OOP Logistic Regression Implementation

This Jupyter Notebook provides a basic implementation of Logistic Regression for categorisation. The data set contains measurements on a single species of insect captured on two continents. We want to predict an insects continent given the other measurements we have.

#### Step 1 - Import Dependencies

In [None]:
import numpy as np
import pandas as pd

%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('ggplot')

from sklearn.linear_model import LogisticRegression

from sklearn.metrics import mean_absolute_error,mean_squared_error 

#### Step 2 - Import And Visualise The Data

In [None]:
#Import data as a Pandas DataFrame
insects = pd.read_csv('./data/insects.csv', sep='\t')
insects = pd.DataFrame({
    'continent': insects['continent'],
    'latitude': insects['latitude'],
    'sex': insects['sex'],
    'wingsize': insects['wingsize']
})

# Features variable
X_insects = insects[['wingsize', 'latitude', 'sex']]
# Target variable
y_insects = insects['continent']

insects.head()

In [None]:
# Plot the data, colour coded by sex
ax = plt.scatter(insects.continent, insects.wingsize, c=insects['sex'])
plt.xlabel("Wingsize")
plt.ylabel("Sex")
plt.colorbar()
plt.show()

#### Step 3: Instantiate a LogisticRegression Object

In [None]:
insects_regression = LogisticRegression()

#### Step 4: Fit the model

In [None]:
insects_regression.fit(X_insects, y_insects)

Now that the regression has been fit, we can use the `predict` method to forecast whether our model thinks a family will switch wells.

In [None]:
# Predict the target for the whole dataset
insects_predictions = insects_regression.predict(X_insects)

#Predict the target for a new data point
new_insect = pd.DataFrame({
    'wingsize': [1],
    'latitude': [40],
    'sex': [0]
})
new_insect['continent'] = insects_regression.predict(new_insect)
print(f"New insect is:\n{new_insect}")

Additionally (and in many cases more usefully), the `LogisticRegression` has a `predict_proba` method that produces a predicted **probability**.

In [None]:
#Predict the probabilities for the whole dataset
insects_probabilities = insects_regression.predict_proba(X_insects)

#Predict the probabilities for a new data point
new_insect = pd.DataFrame({
    'wingsize': [1],
    'latitude': [40],
    'sex': [0]
})
new_insect_probabilities = insects_regression.predict_proba(new_insect)
print(f"Probabilities for new insect are:\n{new_insect_probabilities}")

#### Step 5: Plot the Predictions

In [None]:
#Plot the predictions compared to the actual data
fig, axs = plt.subplots(1, 3, figsize=(14, 5))

axs[0].scatter(insects.wingsize, insects.continent, s=40, c=insects['sex'])
axs[0].set_title("Actual Data")
axs[0].set_xlabel("Wingsize")
axs[0].set_ylabel("Continent")

axs[1].scatter(insects.wingsize, insects_probabilities[:, 1], s=40, c=insects['sex'])
axs[1].set_title("Prediction Probabilities")
axs[1].set_xlabel("Wingsize")
axs[1].set_ylabel("Continent Probability")

axs[2].scatter(insects.wingsize, insects_predictions, s=40, c=insects['sex'])
axs[2].set_title("Predicted Data")
axs[2].set_xlabel("Wingsize")
axs[2].set_ylabel("Continent Prediction")

#### Step 6: Evaluate The Model

In [None]:
mae = mean_absolute_error(y_true=y_insects,y_pred=insects_probabilities[:, 1]) 
mse = mean_squared_error(y_true=y_insects,y_pred=insects_probabilities[:, 1])
print("MAE:",mae) 
print("MSE:",mse) 
print()

#Calculate the loss and cost of the model
model_loss = pd.DataFrame({
    'Target': y_insects,
    'Predicted result': insects_probabilities[:, 1],
    'Loss': abs(y_insects - insects_regression.predict(X_insects).round(2))
})
model_loss["Predicted result"] = model_loss["Predicted result"].round(0)
model_loss["Loss"] = model_loss["Loss"].abs()
model_cost = (1/model_loss.shape[0]) * (model_loss['Loss'].sum() / model_loss.shape[0])

print(f"The cost of this model is  {model_cost:.5f}")
print(f"The model predicted incorrectly {model_loss['Loss'].sum()} times out of {model_loss.shape[0]} predictions")
print(model_loss)