#Hyperparameters Tuning

In [None]:
url='https://raw.githubusercontent.com/niranjanmeegammana/ML-course/main/projects/fruit%20data.csv'
import pandas as pd
dataframe = pd.read_csv(url)
dataframe

Unnamed: 0,fruit_label,fruit_name,fruit_subtype,mass,width,height,color_score
0,1,apple,granny_smith,192,8.4,7.3,0.55
1,1,apple,granny_smith,180,8.0,6.8,0.59
2,1,apple,granny_smith,176,7.4,7.2,0.6
3,2,mandarin,mandarin,86,6.2,4.7,0.8
4,2,mandarin,mandarin,84,6.0,4.6,0.79
5,2,mandarin,mandarin,80,5.8,4.3,0.77
6,2,mandarin,mandarin,80,5.9,4.3,0.81
7,2,mandarin,mandarin,76,5.8,4.0,0.81
8,1,apple,braeburn,178,7.1,7.8,0.92
9,1,apple,braeburn,172,7.4,7.0,0.89


SVM

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

# Load the data (replace 'data' with your actual data)
data = pd.read_csv(url)

# Preprocess the data
label_encoder = LabelEncoder()
data['fruit_name'] = label_encoder.fit_transform(data['fruit_name'])
data['fruit_subtype'] = label_encoder.fit_transform(data['fruit_subtype'])

# Split the data into features (X) and target (y)
X = data[['fruit_name', 'fruit_subtype', 'mass', 'width', 'height', 'color_score']]
y = data['fruit_label']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train the SVM model
model = SVC(kernel='linear')  # You can choose other kernels as well
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 1.0


In [None]:
import pandas as pd
import svm

#read data
dataframe = pd.read_fwf('tweets.txt')
X = dataframe[['tweet']]
y = dataframe[['sentiment']]

C_values = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100]
gamma_values = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100]

best_score = 0
best_params = {'C': None, 'gamma': None}

for C in C_values:
    for gamma in gamma_values:
        svc = svm.SVC(C=C, gamma=gamma)
        svc.fit(X, y)
        score = svc.score(Xval, yval)

        if score > best_score:
            best_score = score
            best_params['C'] = C
            best_params['gamma'] = gamma

best_score, best_params

## Hyperparameters tuning with GridSearchCV

tries all possible combinations of hyperparameters from the provided grid.

In [None]:
from sklearn.model_selection import GridSearchCV


#Set up the parameter grid for grid search
param_grid = {
    'C': [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100],
    'gamma': [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30, 100]
}

# Create the SVM model
model = SVC(kernel='rbf')

# Perform grid search with cross-validation
grid_search = GridSearchCV(model, param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Get the best parameters and the corresponding score
best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best Score:", best_score)
print("Best Parameters:", best_params)





Best Score: 0.8066666666666666
Best Parameters: {'C': 3, 'gamma': 0.1}


## Hyperparameters tuning with Random Search
 randomly samples a subset of hyperparameter combinations

In [None]:
from sklearn.model_selection import RandomizedSearchCV
import numpy as np

# Set up the parameter distributions for random search
param_dist = {
    'C': np.logspace(-3, 2, 100),
    'gamma': np.logspace(-3, 2, 100)
}

# Create the SVM model
model = SVC(kernel='rbf')

# Perform random search with cross-validation
random_search = RandomizedSearchCV(model, param_distributions=param_dist, n_iter=20, cv=5)
random_search.fit(X_train, y_train)

# Get the best parameters and the corresponding score
best_params = random_search.best_params_
best_score = random_search.best_score_

print("Best Score:", best_score)
print("Best Parameters:", best_params)



Best Score: 0.8355555555555554
Best Parameters: {'gamma': 0.0012618568830660211, 'C': 31.25715849688235}


## Bayesian optimization
uses probabilistic models to model the target function and guides the search towards promising regions in the hyperparameter space.

In [None]:
pip install scikit-optimize

Collecting scikit-optimize
  Downloading scikit_optimize-0.9.0-py2.py3-none-any.whl (100 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/100.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━[0m [32m92.2/100.3 kB[0m [31m2.7 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m100.3/100.3 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
Collecting pyaml>=16.9 (from scikit-optimize)
  Downloading pyaml-23.7.0-py3-none-any.whl (17 kB)
Installing collected packages: pyaml, scikit-optimize
Successfully installed pyaml-23.7.0 scikit-optimize-0.9.0


In [None]:
from skopt import BayesSearchCV

# Define the hyperparameter search space
param_space = {
    'C': (1e-6, 1e+6, 'log-uniform'),
    'gamma': (1e-6, 1e+1, 'log-uniform')
}

# Create the SVM model
model = SVC(kernel='rbf')

# Initialize the Bayesian optimizer
opt = BayesSearchCV(model, param_space, n_iter=20, cv=5)

# Run the optimization
opt.fit(X_train, y_train)

# Get the best hyperparameters
best_params = opt.best_params_

# Build the final model with the best hyperparameters
final_model = SVC(kernel='rbf', C=best_params['C'], gamma=best_params['gamma'])
final_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = final_model.predict(X_test)

# Evaluate the final model's performance
accuracy = accuracy_score(y_test, y_pred)
print("Best Score:", opt.best_score_)
print("Best Parameters:", best_params)
print("Accuracy:", accuracy)



Best Score: 0.96
Best Parameters: OrderedDict([('C', 1000000.0), ('gamma', 5.1296781709517165e-06)])
Accuracy: 1.0
