# **Dimensionality Reduction Using PCA**

# **Import the necessary python libraries and install pre-requisites**

In [11]:
# Data Processing
import pandas as pd

# Models
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

# Training and evaluation metrics
from sklearn.metrics import mean_squared_error


# **Step 1: Load and prepare the Dataset**


we'll use the **Abalone** dataset.
Kaggle: https://www.kaggle.com/datasets/rodolfomendes/abalone-dataset?resource=download

In [14]:
# Load the dataset
data = pd.read_csv('abalone_train.csv')

# Convert 'Sex' column to numerical using one-hot encoding
data = pd.get_dummies(data, columns=['Sex'], drop_first=True)

# Calculate age from rings
data['age'] = data['Rings'] + 1.5
data.drop('Rings', axis=1, inplace=True)

# Define features and target vaariable
X = data.drop('age', axis=1)            # Features
y = data['age']                         # Target

# **Step 2: Train a Model Without Hyperparameter Tuning**

First, establish a baseline by training a model without any hyperparameter tuning.

In [15]:
# Define and train the model
model = SVR()
model.fit(X, y)
predictions = model.predict(X)

In [16]:
# Baseline
baseline_mse = mean_squared_error(y, predictions)
print(f'Baseline MSE: {baseline_mse}')

Baseline MSE: 4.939047132023894


# **Step 3: Hyperparameter Tuning with GridSearchCV**

Use GridSearchCV to find the optimal hyperparameters.

In [18]:
# Define the model and parameters for grid search
param_grid = {
    'C': [0.1, 1, 10],
    'epsilon': [0.01, 0.1, 0.5],
    'kernel': ['linear', 'rbf']
}

grid_search = GridSearchCV(SVR(), param_grid, scoring='neg_mean_squared_error', cv=5)
grid_search.fit(X, y)

In [19]:
# Best parameters and model performance
best_model_grid = grid_search.best_estimator_
best_params_grid = grid_search.best_params_
print(f'Best parameters from Grid Search: {best_params_grid}')

Best parameters from Grid Search: {'C': 10, 'epsilon': 0.01, 'kernel': 'rbf'}


# **Step 4: Hyperparameter Tuning with RandomizedSearchCV**

Now, use RandomizedSearchCV for a more efficient search.

In [20]:
# Define the model and parameters for random search
param_dist = {
    'C': [0.1, 1, 10],
    'epsilon': [0.01, 0.1, 0.5],
    'kernel': ['linear', 'rbf']
}

random_search = RandomizedSearchCV(SVR(), param_distributions=param_dist, n_iter=10, scoring='neg_mean_squared_error', cv=5)
random_search.fit(X, y)

In [21]:
# Best parameters and model performance
best_model_random = random_search.best_estimator_
best_params_random = random_search.best_params_
print(f'Best parameters from Random Search: {best_params_random}')

Best parameters from Random Search: {'kernel': 'rbf', 'epsilon': 0.01, 'C': 10}


# **Step 5: Compare Model Performance**

Finally, compare the performance of the models with and without tuning.

In [22]:
# Evaluate tuned models
grid_predictions = best_model_grid.predict(X)
random_predictions = best_model_random.predict(X)

grid_mse = mean_squared_error(y, grid_predictions)
random_mse = mean_squared_error(y, random_predictions)

In [23]:
print(f'Grid Search MSE: {grid_mse}')
print(f'Random Search MSE: {random_mse}')

Grid Search MSE: 4.455123208938927
Random Search MSE: 4.455123208938927
