<a href="https://colab.research.google.com/github/rutujapalatkar27/upgraded-octo-invention/blob/main/Copy_of_MCQ_Coding_questions_(Model_Evaluation).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_classification, make_regression
from sklearn.model_selection import KFold, cross_val_score
from sklearn.preprocessing import PolynomialFeatures
import pandas as pd
import numpy as np

**Question 11**

In [None]:
def classify_imbalanced_data(X, y, test_size=0.2, random_state=42):
    """
    Classify imbalanced data using Logistic Regression and return accuracy and F1-score.

    Args:
        X (pd.DataFrame or np.array): Features.
        y (pd.Series or np.array): Target labels.
        test_size (float): Proportion of test data.
        random_state (int): Random state for reproducibility.

    Returns:
        dict: A dictionary with accuracy and F1-score.
    """
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size,
                                                        random_state=random_state, stratify=y)

    # scale the data (useful for Logistic Regression)
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Create a Logistic Regression model without class weighting
    model = LogisticRegression(class_weight=None)

    # Train the model
    model.fit(X_train, y_train)

    # Make predictions
    y_pred = model.predict(X_test)

    # Calculate accuracy and F1-score
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    return {
        'accuracy': accuracy,
        'f1_score': f1
    }

# Example usage:
# Generate synthetic imbalanced data

X, y = make_classification(n_samples=1000, n_features=10, n_classes=2,
                           weights=[0.9, 0.1], flip_y=0, random_state=42)
X = pd.DataFrame(X, columns=[f'feature_{i}' for i in range(10)])
y = pd.Series(y, name='target')

# Run the model
result = classify_imbalanced_data(X, y)
print(f"Accuracy: {result['accuracy']:.2f}")
print(f"F1-Score: {result['f1_score']:.2f}")


Accuracy: 0.92
F1-Score: 0.48


**Question 12**

In [None]:
def kfold_cross_validation_model(k=5):
    """
    Train a model using K-Fold Cross-Validation on a small dataset and return the accuracy scores.

    Args:
        k (int): Number of folds for K-Fold Cross-Validation.

    Returns:
        dict: A dictionary with accuracy scores for each fold and the mean accuracy.
    """
    # Generate a small synthetic dataset
    X, y = make_classification(n_samples=50, n_features=5, n_classes=2, random_state=42)

    # Initialize a logistic regression model
    model = LogisticRegression()

    # Set up K-Fold Cross-Validation
    kf = KFold(n_splits=k, shuffle=True, random_state=42)

    # Perform cross-validation and calculate accuracy for each fold
    accuracy_scores = cross_val_score(model, X, y, cv=kf, scoring='accuracy')

    # Return accuracy scores for each fold and the mean accuracy
    return {
        'accuracy_scores': accuracy_scores,
        'mean_accuracy': np.mean(accuracy_scores)
    }

# Example usage:
result = kfold_cross_validation_model(k=5)

# Print the results
print(f"Accuracy scores for each fold: {result['accuracy_scores']}")
print(f"Mean accuracy: {result['mean_accuracy']:.2f}")

Accuracy scores for each fold: [1.  1.  1.  0.9 0.9]
Mean accuracy: 0.96


**Question 13**

In [None]:
def logistic_regression(test_size=0.2, random_state=42):
    """
    Train a Logistic Regression model and generate a classification report.

    Args:
        test_size (float): Proportion of the dataset to include in the test split.
        random_state (int): Random state for reproducibility.

    Returns:
        dict: A dictionary containing the classification report.
    """
    # Generate a small synthetic dataset
    X, y = make_classification(n_samples=200, n_features=5, n_classes=2, random_state=random_state)

    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state, stratify=y)

    # Scale the features (Logistic Regression works better with scaled data)
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Initialize the Logistic Regression model
    model = LogisticRegression()

    # Train the model
    model.fit(X_train, y_train)

    # Predict the test set results
    y_pred = model.predict(X_test)

    # Generate and return the classification report
    report = classification_report(y_test, y_pred, output_dict=True)
    return report

# Example usage:
report = logistic_regression()

# Display the classification report
display(pd.DataFrame(report).transpose())

Unnamed: 0,precision,recall,f1-score,support
0,0.818182,0.9,0.857143,20.0
1,0.888889,0.8,0.842105,20.0
accuracy,0.85,0.85,0.85,0.85
macro avg,0.853535,0.85,0.849624,40.0
weighted avg,0.853535,0.85,0.849624,40.0


**Question 14**

In [None]:
def regression_with_metrics(test_size=0.2, random_state=42):
    """
    Train a Linear Regression model and return evaluation metrics.

    Args:
        test_size (float): Proportion of the dataset to include in the test split.
        random_state (int): Random state for reproducibility.

    Returns:
        dict: A dictionary with MAE, MSE, RMSE, and R-squared metrics.
    """
    # Generate a small synthetic regression dataset
    X, y = make_regression(n_samples=200, n_features=5, noise=0.1, random_state=random_state)

    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

    # Initialize the Linear Regression model
    model = LinearRegression()

    # Train the model
    model.fit(X_train, y_train)

    # Predict the test set results
    y_pred = model.predict(X_test)

    # Calculate regression metrics
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)

    # Return the evaluation metrics
    return {
        'MAE': mae,
        'MSE': mse,
        'RMSE': rmse,
        'R-squared': r2
    }

# Example usage:
metrics = regression_with_metrics()

# Print the evaluation metrics
for metric, value in metrics.items():
    print(f"{metric}: {value:.4f}")

MAE: 0.0774
MSE: 0.0085
RMSE: 0.0922
R-squared: 1.0000


**Question 15**

In [None]:
def logistic_regression_hyperparameter_tuning(test_size=0.2, random_state=42):
    """
    Perform hyperparameter tuning on a Logistic Regression classifier using GridSearchCV,
    and return the best accuracy score.

    Args:
        test_size (float): Proportion of test data (default is 0.2).
        random_state (int): Random state for reproducibility (default is 42).

    Returns:
        float: Best accuracy score after hyperparameter tuning.
    """
    # 1. Generate synthetic classification data
    X, y = make_classification(n_samples=1000, n_features=20, n_informative=15,
                               n_redundant=5, random_state=random_state, class_sep=0.7)

    # 2. Split the data into training and validation sets
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=test_size, random_state=random_state)

    # 3. Optionally scale the features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_val = scaler.transform(X_val)

    # 4. Define the Logistic Regression model
    model = LogisticRegression(solver='liblinear', random_state=random_state)

    # 5. Set up hyperparameter grid for tuning
    param_grid = {
        'penalty': ['l1', 'l2'],           # L1 (Lasso) or L2 (Ridge) regularization
        'C': [0.01, 0.1, 1, 10, 100],      # Regularization strength
        'max_iter': [100, 200, 300]        # Number of iterations
    }

    # 6. Perform hyperparameter tuning using GridSearchCV, put cv=5, and scoring='accuracy'
    grid_search = GridSearchCV(model, param_grid, cv=5, scoring='accuracy')
    grid_search.fit(X_train, y_train)

    # 7. Get the best model and make predictions
    best_model = grid_search.best_estimator_
    y_val_pred = best_model.predict(X_val)

    # 8. Calculate accuracy on the validation set
    accuracy = accuracy_score(y_val, y_val_pred)

    print(f"Best Hyperparameters: {grid_search.best_params_}")
    return accuracy

# Example usage
best_accuracy = logistic_regression_hyperparameter_tuning()
print(f"Best validation accuracy: {best_accuracy:.2f}")

Best Hyperparameters: {'C': 1, 'max_iter': 100, 'penalty': 'l2'}
Best validation accuracy: 0.76
