In [None]:
# LDA is particularly useful for dimensionality reduction in supervised learning scenarios where the categories (labels) of the output variable are known.

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import OneHotEncoder, StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset
data = pd.read_csv('https://raw.githubusercontent.com/datasciencedojo/datasets/master/titanic.csv')

# Preview data
print(data.head())

# Handle missing values by creating a copy to avoid chained assignment
data.loc[:, 'Age'] = data['Age'].fillna(data['Age'].median())
data.loc[:, 'Embarked'] = data['Embarked'].fillna(data['Embarked'].mode()[0])

# Drop columns that may not be useful for this analysis
data = data.drop(['Name', 'Ticket', 'Cabin'], axis=1)

# Encoding categorical data
encoder = OneHotEncoder()
categorical_features = ['Pclass', 'Sex', 'Embarked']
encoded_features = encoder.fit_transform(data[categorical_features]).toarray()
encoded_feature_names = encoder.get_feature_names_out(categorical_features)

# Create a DataFrame from encoded features and concatenate with the original dataset
encoded_df = pd.DataFrame(encoded_features, columns=encoded_feature_names)
data_encoded = pd.concat([data.drop(categorical_features, axis=1), encoded_df], axis=1)

# Scale data before applying PCA
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data_encoded.drop('Survived', axis=1))

# Assuming data_scaled is the scaled data excluding 'Survived' and encoded features
# 'Survived' is used as the target
lda = LinearDiscriminantAnalysis(n_components=1)
lda_result = lda.fit_transform(data_scaled, data['Survived'])

# Plot the result
plt.figure(figsize=(8, 6))
sns.scatterplot(x=lda_result[:, 0], y=[0]*len(lda_result), hue=data['Survived'], style=data['Survived'], palette='viridis')
plt.title('LDA Result on Titanic Dataset')
plt.xlabel('LD1')
plt.yticks([])
plt.show()


In [None]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

# Assuming data is preprocessed and ready for modeling
X = data_scaled  # Features
y = data['Survived']  # Target variable

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize LDA as classifier
lda = LinearDiscriminantAnalysis(n_components=1)
X_train_lda = lda.fit_transform(X_train, y_train)
X_test_lda = lda.transform(X_test)

# Using LDA as classifier
y_pred = lda.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


# **Best Use Cases for LDA**

**Binary or Multiclass Classification Problems:** LDA is inherently good for classification problems, especially when the classes are well-separated and the data distribution is approximately normal.

**Dimensionality Reduction for Supervised Learning:** Unlike PCA, LDA takes class labels into account, making it more suitable when you need to preserve class discriminatory information.

**Pattern Recognition and Feature Extraction:** In fields such as facial recognition, speech recognition, and biometrics, LDA helps in extracting features that convey the most class-discriminatory information.

**Preprocessing for Other Machine Learning Models:** Reduced feature sets obtained via LDA can be used as inputs to other machine learning models, potentially increasing their effectiveness or reducing overfitting.