### Syed Muhammad Zaid | 20B-052-SE

##### Task - 1

###### Data Loading:

In [None]:
import pandas as pd

# Load the Iris dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
columns = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
df = pd.read_csv(url, header=None, names=columns)

df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [None]:
# Display statistical summary of the dataset
df.describe()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667
std,0.828066,0.433594,1.76442,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [None]:
# Check for missing values
df.isnull().sum()

sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
species         0
dtype: int64

In [None]:
# Display the distribution of the target variable
df['species'].value_counts()

Iris-setosa        50
Iris-versicolor    50
Iris-virginica     50
Name: species, dtype: int64

###### Data Preprocessing

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Encode the target variable (species)
le = LabelEncoder()
df['species'] = le.fit_transform(df['species'])

# Split the dataset into features (X) and target (y)
X = df.drop('species', axis=1)
y = df['species']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Display the shapes of the training and testing sets
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((120, 4), (30, 4), (120,), (30,))

######  Hyperparameter Tuning with Grid Search

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV

# Define the Random Forest model
model = RandomForestClassifier(random_state=42)

# Define the hyperparameter grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

In [None]:
# Perform Grid Search with cross-validation
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)

# Display the best hyperparameters
grid_search.best_params_

Fitting 5 folds for each of 108 candidates, totalling 540 fits


{'max_depth': None,
 'min_samples_leaf': 2,
 'min_samples_split': 2,
 'n_estimators': 200}

In [None]:
from sklearn.metrics import accuracy_score

# Train the final model with the best hyperparameters
best_model = grid_search.best_estimator_
best_model.fit(X_train, y_train)

# Evaluate the model on the testing set
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Accuracy of the final model: {accuracy*100:.2f}%")

Accuracy of the final model: 100.00%


##### Task 2: Hyperparameter Tuning with other technique

###### Model Selection

In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform, randint

# Define the SVM model
svm_model = SVC(random_state=42)

# Define the hyperparameter distributions
param_distributions = {
    'C': uniform(0.1, 10),  # Continuous distribution from 0.1 to 10
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'gamma': ['scale', 'auto'],
    'degree': randint(2, 5)  # Integer values from 2 to 4 for polynomial kernel
}

###### Hyperparameter Tuning with Randomized Search

In [None]:
# Perform Randomized Search with cross-validation
random_search = RandomizedSearchCV(estimator=svm_model, param_distributions=param_distributions, n_iter=50, cv=5, n_jobs=-1, verbose=2, random_state=42)
random_search.fit(X_train, y_train)

# Display the best hyperparameters
random_search.best_params_

Fitting 5 folds for each of 50 candidates, totalling 250 fits


{'C': 3.4370861113902182, 'degree': 4, 'gamma': 'auto', 'kernel': 'linear'}

In [None]:
# Train the final model with the best hyperparameters
best_svm_model = random_search.best_estimator_
best_svm_model.fit(X_train, y_train)

# Evaluate the model on the testing set
y_pred = best_svm_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"Accuracy of the final model: {accuracy*100:.2f}%")

Accuracy of the final model: 96.67%


In [None]:
# Train and evaluate the model with default hyperparameters
default_svm_model = SVC(random_state=42)
default_svm_model.fit(X_train, y_train)
default_y_pred = default_svm_model.predict(X_test)
default_accuracy = accuracy_score(y_test, default_y_pred)

print(f"Accuracy with default hyperparameters: {default_accuracy*100:.2f}%")

Accuracy with default hyperparameters: 100.00%
