# Python Programming: Random Search

## Example

In [None]:
## Example 1
# ---
# Perform hyperparameter tuning then predict the quality of wine using Random Search. 
# ---
# Dataset url = http://bit.ly/TuningDataset
# ---
# OUR CODE GOES BELOW 

In [None]:
# Importing the required libraries
# ---
#
import pandas as pd
import numpy as np

In [None]:
# Importing our Dataset
# ---
#
dataset = pd.read_csv("http://bit.ly/TuningDataset", sep=';')

In [None]:
# Previewing our Dataset
# ---
#
dataset.head()

In [None]:
# Performing Data Preprocessing
# ---
# 
X = dataset.iloc[:, 0:11].values
y = dataset.iloc[:, 11].values

In [None]:
# Performing Data Preprocessing
# ---
# 
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [None]:
# Scaling our Data
# ---
# 
from sklearn.preprocessing import StandardScaler
feature_scaler = StandardScaler()
X_train = feature_scaler.fit_transform(X_train)
X_test = feature_scaler.transform(X_test)

In [None]:
# Training and Cross Validation
# ---
# 
from sklearn.ensemble import RandomForestClassifier
classifier = RandomForestClassifier(n_estimators=300, random_state=0)

In [None]:
# Training and Cross Validation
# ---
# Next, to implement cross validation, the cross_val_score method 
# of the sklearn.model_selection library can be used. 
# The cross_val_score returns the accuracy for all the folds. 
# Values for 4 parameters are required to be passed to the cross_val_score class. 
# The first parameter is estimator which basically specifies 
# the algorithm that you want to use for cross validation. 
# The second and third parameters, X and y, contain the X_train and y_train data i.e. features and labels. 
# Finally the number of folds is passed to the cv parameter as shown in the following code
# ---
# 
from sklearn.model_selection import cross_val_score
all_accuracies = cross_val_score(estimator=classifier, X=X_train, y=y_train, cv=5)

In [None]:
# Printing the accuracies returned for five folds 
# by the cross_val_score method by calling print on all_accuracies
# ---
#
print(all_accuracies.mean())

In [None]:
# Step 1: Hyperparameters: Getting Started with Random Search
# ---
# Random search differs from grid search in that we longer 
# provide a discrete set of values to explore for each hyperparameter; rather, 
# we provide a statistical distribution for each hyperparameter 
# from which values may be randomly sampled.
# We'll define a sampling distribution for each hyperparameter.
# ---
# 

# specify parameters and distributions to sample from
from scipy.stats import randint as sp_randint
param_dist = {"max_depth": [3, None],
              "max_features": sp_randint(1, 11),
              "min_samples_split": sp_randint(2, 11),
              "bootstrap": [True, False],
              "criterion": ["gini", "entropy"]}

In [None]:
# Step 2: Instantiating RandomizedSearchCV object 
# ---
# 
from sklearn.model_selection import RandomizedSearchCV 
random_sr = RandomizedSearchCV(classifier, param_dist, cv = 5) 

In [None]:
# Step 3: Calling the fit method
# ---
#
random_sr.fit(X_train, y_train)

In [None]:
# Step 4: Checking the parameters that return the highest accuracy
# ---
#
best_parameters = random_sr.best_params_
print(best_parameters)

In [None]:
# Finding the obtained accuracy
# --
# 
best_result = random_sr.best_score_
print(best_result)

# Compare this with the 

## <font color="green">Challenges</font>

In [None]:
## Challenge 1
# ---
# Question: Implement hyperparameter tuning using random search upon creating a model to classify 
# incomes of persons given the following dataset.
# ---
# Dataset url = http://bit.ly/HyperParameterTuningDataset
# ---
# OUR CODE GOES BELOW
#

In [None]:
## Challenge 2
# ---
# Perform hyperparameter tuning by applying Random search to the challenges that you worked on during Week 8.
# ---  