<a href="https://colab.research.google.com/github/karthikyandrapu/Blackbucks_IIDT_AI-ML-DS_Internship/blob/main/Creating_a_machine_learning_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Task 1: Classify Wine Varieties**

1. Load the Dataset

In [None]:
from sklearn.datasets import load_wine
import pandas as pd
# Load the dataset
wine = load_wine()
X_wine = pd.DataFrame(wine.data, columns=wine.feature_names)
y_wine = pd.Series(wine.target)

2. Data Preprocessing and Feature Engineering

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
# Split the dataset
X_train_wine, X_test_wine, y_train_wine, y_test_wine = train_test_split(X_wine, y_wine, test_size=0.2, random_state=42)
# Feature scaling
scaler = StandardScaler()
X_train_wine_scaled = scaler.fit_transform(X_train_wine)
X_test_wine_scaled = scaler.transform(X_test_wine)

3. Pipeline Creation

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA
# Create a pipeline
pipeline_wine = Pipeline([
    ('scaler', StandardScaler()),
    ('pca', PCA(n_components=2)),
    ('classifier', RandomForestClassifier(random_state=42))
])

4. Model Training and Hyperparameter Tuning

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
# Hyperparameter tuning
param_grid = {
    'pca__n_components': [2, 3, 4],
    'classifier__n_estimators': [50, 100, 200],
    'classifier__max_depth': [None, 10, 20, 30]
}
grid_search_wine = GridSearchCV(pipeline_wine, param_grid, cv=5, scoring='accuracy')
grid_search_wine.fit(X_train_wine, y_train_wine)

5. Model Evaluation

In [None]:
from sklearn.metrics import accuracy_score, classification_report
# Best model evaluation
best_model_wine = grid_search_wine.best_estimator_
y_pred_wine = best_model_wine.predict(X_test_wine)
print("Accuracy:", accuracy_score(y_test_wine, y_pred_wine))
print("Classification Report:\n", classification_report(y_test_wine, y_pred_wine))

Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       1.00      1.00      1.00        14
           2       1.00      1.00      1.00         8

    accuracy                           1.00        36
   macro avg       1.00      1.00      1.00        36
weighted avg       1.00      1.00      1.00        36



**Task 2: Predict California Housing Prices**

1. Load the Dataset

In [None]:
from sklearn.datasets import fetch_california_housing
# Load the dataset
housing = fetch_california_housing()
X_housing = pd.DataFrame(housing.data, columns=housing.feature_names)
y_housing = pd.Series(housing.target)

2. Data Preprocessing and Feature Engineering

In [None]:
# Split the dataset
X_train_housing, X_test_housing, y_train_housing, y_test_housing = train_test_split(X_housing, y_housing, test_size=0.2, random_state=42)
# Feature scaling
scaler = StandardScaler()
X_train_housing_scaled = scaler.fit_transform(X_train_housing)
X_test_housing_scaled = scaler.transform(X_test_housing)

3. Pipeline Creation

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
# Create a pipeline
pipeline_housing = Pipeline([
    ('scaler', StandardScaler()),
    ('regressor', RandomForestRegressor(random_state=42))
])

4. Model Training and Hyperparameter Tuning

In [27]:
# Hyperparameter tuning
param_grid = {
    'regressor__n_estimators': [50, 100, 200],
    'regressor__max_depth': [None, 10, 20, 30]
}
grid_search_housing = GridSearchCV(pipeline_housing, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search_housing.fit(X_train_housing, y_train_housing)

5. Model Evaluation

In [28]:
from sklearn.metrics import mean_squared_error
# Best model evaluation
best_model_housing = grid_search_housing.best_estimator_
y_pred_housing = best_model_housing.predict(X_test_housing)
print("Mean Squared Error:", mean_squared_error(y_test_housing, y_pred_housing))
print("Root Mean Squared Error:", mean_squared_error(y_test_housing, y_pred_housing, squared=False))

Mean Squared Error: 0.2545042828477844
Root Mean Squared Error: 0.5044841750221551
