# 📘 Linear Discriminant Analysis (LDA)

Use LDA for dimensionality reduction with supervision, requiring known class labels to maximize class separability.

### 🔧 Step 1: Import Required Libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.preprocessing import StandardScaler
import seaborn as sns

### 📂 Step 2: Load Dataset and Class Labels

In [None]:
# Load the gene expression data from a CSV file, using the first column as the index
df = pd.read_csv('your_expression_data.csv', index_col=0)

# Load the sample labels from a CSV file, assuming it contains one column with labels, and convert it to a Series
labels = pd.read_csv('your_labels.csv', index_col=0).squeeze()

### ⚖️ Step 3: Standardize the Data

In [None]:
# Initialize the StandardScaler to standardize features by removing the mean and scaling to unit variance
scaler = StandardScaler()

# Fit the scaler to the data and transform it to produce standardized data
scaled_data = scaler.fit_transform(df)

### 📉 Step 4: Apply LDA

In [None]:
# Initialize the Linear Discriminant Analysis (LDA) model with 2 components
lda = LinearDiscriminantAnalysis(n_components=2)

# Fit the LDA model to the standardized data and labels, and transform the data
lda_result = lda.fit_transform(scaled_data, labels)

# Create a DataFrame to store the LDA results with column names 'LD1' and 'LD2'
lda_df = pd.DataFrame(lda_result, columns=['LD1', 'LD2'])

# Add the labels as a new column in the DataFrame for visualization purposes
lda_df['Label'] = labels.values

### 📊 Step 5: Visualize the LDA Results

In [None]:
# Set the figure size for the plot
plt.figure(figsize=(8,6))

# Create a scatter plot using seaborn, with 'LD1' and 'LD2' as axes and color points by 'Label'
sns.scatterplot(data=lda_df, x='LD1', y='LD2', hue='Label', palette='Set2')

# Set the title of the plot
plt.title('LDA of Gene Expression Data')

# Label the x-axis
plt.xlabel('Linear Discriminant 1')

# Label the y-axis
plt.ylabel('Linear Discriminant 2')

# Add a grid to the plot for better readability
plt.grid(True)

# Add a legend to the plot
plt.legend()

# Display the plot
plt.show()

### 🏷️ Optional: Load Labels for Coloring

In [None]:
# Uncomment and use if you have a label file
# labels = pd.read_csv('your_labels.csv', index_col=0).squeeze()
# df['Label'] = labels

### 💾 Optional: Save Transformed Data

In [None]:
# Save the result of Linear Discriminant Analysis (LDA) transformation
# linear discriminant analysis (lda)_df.to_csv('linear discriminant analysis (lda)_result.csv')

### 🌐 Optional: 3D Visualization

In [None]:
# Import the 3D plotting toolkit from Matplotlib
from mpl_toolkits.mplot3d import Axes3D

# Create a new figure with a specified size
fig = plt.figure(figsize=(10, 7))

# Add a 3D subplot to the figure for 3D visualization
ax = fig.add_subplot(111, projection='3d')


# Uncomment and modify if you have 3D data
# ax.scatter(embedded_df['X'], embedded_df['Y'], embedded_df['Z'], c=labels, cmap='viridis')
# ax.set_title('3D Visualization')
# plt.show()

### 🎛️ Optional: Interactive Parameters (Requires ipywidgets)

In [None]:
# Import the `interact` function from the ipywidgets library for creating interactive widgets
from ipywidgets import interact

# Define a function `update` that takes `n_components` as an argument (default is 2)
def update(n_components=2):
    # Initialize a PCA model with the specified number of components
    model = PCA(n_components=n_components)
    
    # Fit the PCA model to the scaled data and transform it to reduce dimensions
    result = model.fit_transform(scaled_data)
    
    # Create a DataFrame to store the PCA results with column names based on the number of components
    df_plot = pd.DataFrame(result, columns=['Component {}'.format(i+1) for i in range(n_components)])
    
    # Set the figure size for the plot
    plt.figure(figsize=(8, 6))
    
    # Create a scatter plot using seaborn, plotting the first two components
    sns.scatterplot(x=df_plot.iloc[:, 0], y=df_plot.iloc[:, 1])
    
    # Set the title of the plot
    plt.title('Linear Discriminant Analysis (LDA) Interactive')
    
    # Label the x-axis
    plt.xlabel('Component 1')
    
    # Label the y-axis
    plt.ylabel('Component 2')
    
    # Add a grid to the plot for better readability
    plt.grid(True)
    
    # Display the plot
    plt.show()

# Create an interactive widget to adjust `n_components` between 2 and 10 and call the `update` function
interact(update, n_components=(2, 10))