<font color="green">*To start working on this notebook, or any other notebook that we will use in the Moringa Data Science Course, we will need to save our own copy of it. We can do this by clicking File > Save a Copy in Drive. We will then be able to make edits to our own copy of this notebook.*</font>

# Python Programming: Hyperparameter Tuning

## Example 

In [1]:
# Loading libraries
import numpy as np
from sklearn import linear_model, datasets
from sklearn.model_selection import GridSearchCV

In [2]:
# Loading data
iris = datasets.load_iris()
X = iris.data
y = iris.target
X

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

In [3]:
# Creating the logistic regression
logistic = linear_model.LogisticRegression()

In [4]:
# Creating regularization penalty space
penalty = ['l1', 'l2']

# Creating regularization hyperparameter space
C = np.logspace(0, 4, 10)

# Creating hyperparameter options
hyperparameters = dict(C=C, penalty=penalty)

In [5]:
# Creating grid search using 5-fold cross validation
clf = GridSearchCV(logistic, hyperparameters, cv=5, verbose=0)

In [6]:
# Fitting grid search
best_model = clf.fit(X, y)

Traceback (most recent call last):
  File "C:\Users\ronal\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 531, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\ronal\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 1304, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "C:\Users\ronal\anaconda3\lib\site-packages\sklearn\linear_model\_logistic.py", line 442, in _check_solver
    raise ValueError("Solver %s supports only 'l2' or 'none' penalties, "
ValueError: Solver lbfgs supports only 'l2' or 'none' penalties, got l1 penalty.

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _che

In [7]:
# Viewing best hyperparameters
print('Best Penalty:', best_model.best_estimator_.get_params()['penalty'])
print('Best C:', best_model.best_estimator_.get_params()['C'])

Best Penalty: l2
Best C: 7.742636826811269


In [8]:
# Predicting target vector
best_model.predict(X)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

## <font color="green">Challenge 1</font>

In [9]:
# Challenge 1
# ---
# Create a linear logistic regression model to predict the gender. 
# Perform hyperparameter tuning as well. 
# ---
# Dataset url = http://bit.ly/HeightsWeightsDataset
# --
#
OUR CODE GOES HERE

SyntaxError: invalid syntax (<ipython-input-9-7ad69335442d>, line 9)

## <font color="green">Challenge 2</font>

In [None]:
# Challenge 2
# ---
# Use the breast cancer dataset to create a logistic regression machine learning model.
# To improve the accuracy of the model, perform hyperparameter tuning.
# ---
# Dataset source = https://bit.ly/breast_cancer_dataset1
# ---
# 
OUR CODE GOES HERE

## <font color="green">Challenge 3</font>

In [None]:
# Challenge 3
# ---
# Build a regression model to determine whether a patient has diabetes or not.
# To improve the accuracy of the model, perform hyperparameter tuning.
# ---
# Dataset source = https://bit.ly/Diabetes_dataset
# ---
#
OUR CODE GOES HERE

## <font color="green">Challenge 4</font>

In [None]:
# Challenge 4
# ---
# Build a model to determine whether a patient will be admitted. 
# To improve the accuracy of the model, perform hyperparameter tuning.
# ---
# Dataset url = https://bit.ly/patientAdmissionDataset
# ---
# 
OUR CODE GOES HERE

## <font color="green">Challenge 5</font>

In [None]:
# Challenge 5
# ---
# You have a telecom firm which has collected data of all its customers. 
# The main types of attributes are: 
# 1.Demographics (age, gender etc.) 
# 2.Services availed (internet packs purchased, special offers etc) 
# 3.Expenses (amount of recharge done per month etc.) 
# Based on all this past information, build an accurate model which will predict 
# whether a particular customer will churn or not. 
# ---
# Dataset source = https://bit.ly/churnPredictDataset
# ---
# 
OUR CODE GOES HERE