In [11]:
# Wine Classification Task

# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, accuracy_score


In [12]:
# Load the Wine dataset
wine = load_wine()
X = pd.DataFrame(wine.data, columns=wine.feature_names)
y = pd.Series(wine.target)


In [13]:
# Data Preprocessing and Feature Engineering
# Check for missing values
print(X.isnull().sum())

alcohol                         0
malic_acid                      0
ash                             0
alcalinity_of_ash               0
magnesium                       0
total_phenols                   0
flavanoids                      0
nonflavanoid_phenols            0
proanthocyanins                 0
color_intensity                 0
hue                             0
od280/od315_of_diluted_wines    0
proline                         0
dtype: int64


In [14]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [15]:
# Pipeline Creation
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('classifier', LogisticRegression())
])

In [17]:
# Model Training and Hyperparameter Tuning
param_grid = [
    {'classifier': [LogisticRegression()], 'classifier__C': [0.1, 1, 10]},
    {'classifier': [RandomForestClassifier()], 'classifier__n_estimators': [10, 50, 100]},
    {'classifier': [SVC()], 'classifier__C': [0.1, 1, 10], 'classifier__kernel': ['linear', 'rbf']},
    {'classifier': [KNeighborsClassifier()], 'classifier__n_neighbors': [3, 5, 7]}
]
grid_search = GridSearchCV(pipe, param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

In [18]:
# Model Evaluation
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

print("Best Model Parameters:", grid_search.best_params_)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Best Model Parameters: {'classifier': LogisticRegression(), 'classifier__C': 0.1}
Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       1.00      1.00      1.00        14
           2       1.00      1.00      1.00         8

    accuracy                           1.00        36
   macro avg       1.00      1.00      1.00        36
weighted avg       1.00      1.00      1.00        36



In [None]:
# Predict California Housing Prices Task

# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, PowerTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [None]:
# Load the California Housing dataset
housing = fetch_california_housing()
X = pd.DataFrame(housing.data, columns=housing.feature_names)
y = pd.Series(housing.target)

In [None]:
# Data Preprocessing and Feature Engineering
# Check for missing values
print(X.isnull().sum())

In [None]:
# Transform skewed features
pt = PowerTransformer()
X_transformed = pt.fit_transform(X)

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_transformed, y, test_size=0.2, random_state=42)

In [None]:
# Pipeline Creation
pipe = Pipeline([
    ('scaler', StandardScaler()),
    ('regressor', LinearRegression())
])

In [None]:
# Model Training and Hyperparameter Tuning
param_grid = [
    {'regressor': [LinearRegression()]},
    {'regressor': [DecisionTreeRegressor()], 'regressor__max_depth': [3, 5, 7]},
    {'regressor': [RandomForestRegressor()], 'regressor__n_estimators': [50, 100, 200]},
    {'regressor': [GradientBoostingRegressor()], 'regressor__n_estimators': [50, 100, 200]}
]

grid_search = GridSearchCV(pipe, param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

In [None]:
# Model Evaluation
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

print("Best Model Parameters:", grid_search.best_params_)
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))
print("MAE:", mean_absolute_error(y_test, y_pred))
print("R²:", r2_score(y_test, y_pred))