In [11]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV

In [12]:
# Step 1: Load Data
def load_data():
    from sklearn.datasets import load_iris
    data = load_iris()
    X = pd.DataFrame(data.data, columns=data.feature_names)
    y = pd.Series(data.target)
    return X, y

In [13]:
# Step 2: Preprocessing
def preprocess_data(X, y):
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    return X_scaled, y

In [14]:
# Step 3: Train-Test Split
def split_data(X, y, test_size=0.2, random_state=42):
    return train_test_split(X, y, test_size=test_size, random_state=random_state, stratify=y)

In [15]:
# Step 4: Train LDA Model
def train_lda_model(X_train, y_train):
    lda = LinearDiscriminantAnalysis()
    lda.fit(X_train, y_train)
    return lda

In [16]:
# Step 5: Optimize Hyperparameters
def optimize_hyperparameters(X_train, y_train):
    param_grid = [
        {'solver': ['svd']},  # SVD cannot have shrinkage
        {'solver': ['lsqr', 'eigen'], 'shrinkage': ['auto', None]}  # Only these solvers support shrinkage
    ]
    
    lda = LinearDiscriminantAnalysis()
    grid_search = GridSearchCV(lda, param_grid, cv=5, scoring='accuracy', error_score='raise')  # Debugging enabled
    grid_search.fit(X_train, y_train)
    
    print("Best Parameters:", grid_search.best_params_)
    return grid_search.best_estimator_

In [17]:
# Step 6: Compute Predictions
def compute_predictions(model, X_test):
    return model.predict(X_test)

In [18]:
# Step 7: Evaluate Model
def evaluate_model(y_test, y_pred):
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Classification Report:\n", classification_report(y_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

In [19]:
# Step 8: Predict New Data
def predict_new_data(model, new_data):
    return model.predict(new_data)

In [20]:
# Main Execution
if __name__ == "__main__":
    X, y = load_data()
    X, y = preprocess_data(X, y)
    X_train, X_test, y_train, y_test = split_data(X, y)
    
    lda_model = train_lda_model(X_train, y_train)
    best_lda = optimize_hyperparameters(X_train, y_train)
    
    y_pred = compute_predictions(best_lda, X_test)
    evaluate_model(y_test, y_pred)
    
    # Example new data prediction
    new_data = np.array([X_test[0]])  # Using first test sample as example
    print("New Data Prediction:", predict_new_data(best_lda, new_data))

Best Parameters: {'solver': 'svd'}
Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00        10
           2       1.00      1.00      1.00        10

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Confusion Matrix:
 [[10  0  0]
 [ 0 10  0]
 [ 0  0 10]]
New Data Prediction: [0]
