# 📘 Autoencoder (Neural Network)

Use an unsupervised deep learning model (autoencoder) to learn compressed representations of gene expression data.

### 🔧 Step 1: Import Required Libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.optimizers import Adam

### 📂 Step 2: Load and Standardize Dataset

In [None]:
# Load the gene expression dataset from a CSV file, using the first column as the index
df = pd.read_csv('your_expression_data.csv', index_col=0)

# Initialize a StandardScaler to standardize the dataset (mean=0, variance=1)
scaler = StandardScaler()

# Apply the scaler to the dataset to transform it into a standardized form
scaled_data = scaler.fit_transform(df)

### 🤖 Step 3: Build and Train Autoencoder

In [None]:
# Get the number of input features (dimensionality of the dataset)
input_dim = scaled_data.shape[1]

# Define the input layer with the shape equal to the number of input features
input_layer = Input(shape=(input_dim,))

# Add the first hidden layer to encode the input into 64 dimensions with ReLU activation
encoded = Dense(64, activation='relu')(input_layer)

# Add the second hidden layer to reduce the encoding to 2 dimensions with linear activation
encoded = Dense(2, activation='linear')(encoded)

# Add the first decoding layer to expand the 2-dimensional encoding back to 64 dimensions with ReLU activation
decoded = Dense(64, activation='relu')(encoded)

# Add the output layer to reconstruct the input with the same dimensionality as the original data
decoded = Dense(input_dim, activation='linear')(decoded)

# Create the autoencoder model by specifying the input and output layers
autoencoder = Model(inputs=input_layer, outputs=decoded)

# Compile the autoencoder model with the Adam optimizer and mean squared error (MSE) loss function
autoencoder.compile(optimizer=Adam(), loss='mse')

# Train the autoencoder on the standardized dataset for 50 epochs with a batch size of 32
autoencoder.fit(scaled_data, scaled_data, epochs=50, batch_size=32, verbose=0)

# Create a separate encoder model to extract the encoded (compressed) representation
encoder = Model(inputs=input_layer, outputs=encoded)

# Use the encoder to predict (generate) the encoded representation of the standardized dataset
encoded_data = encoder.predict(scaled_data)

# Convert the encoded representation into a DataFrame with column names 'Enc1' and 'Enc2'
auto_df = pd.DataFrame(encoded_data, columns=['Enc1', 'Enc2'])

### 📊 Step 4: Visualize Encoded 2D Representation

In [None]:
# Create a new figure with specified dimensions
plt.figure(figsize=(8,6))

# Create a scatter plot using the encoded data, with 'Enc1' on the x-axis and 'Enc2' on the y-axis
sns.scatterplot(data=auto_df, x='Enc1', y='Enc2')

# Set the title of the plot
plt.title('2D Representation from Autoencoder')

# Label the x-axis as 'Encoded Feature 1'
plt.xlabel('Encoded Feature 1')

# Label the y-axis as 'Encoded Feature 2'
plt.ylabel('Encoded Feature 2')

# Add a grid to the plot for better readability
plt.grid(True)

# Display the plot
plt.show()

### 🏷️ Optional: Load Labels for Coloring

In [None]:
# Uncomment and use if you have a label file
# labels = pd.read_csv('your_labels.csv', index_col=0).squeeze()
# df['Label'] = labels

### 💾 Optional: Save Transformed Data

In [None]:
# Save the result of Autoencoder transformation
# autoencoder_df.to_csv('autoencoder_result.csv')

### 🌐 Optional: 3D Visualization

In [None]:
# Import the 3D plotting toolkit from Matplotlib
from mpl_toolkits.mplot3d import Axes3D

# Create a new figure with specified dimensions
fig = plt.figure(figsize=(10, 7))

# Add a 3D subplot to the figure for 3D visualization
ax = fig.add_subplot(111, projection='3d')

### extra for 3D visualization
# Uncomment and modify if you have 3D data
# ax.scatter(embedded_df['X'], embedded_df['Y'], embedded_df['Z'], c=labels, cmap='viridis')
# ax.set_title('3D Visualization')
# plt.show()

### 🎛️ Optional: Interactive Parameters (Requires ipywidgets)

In [None]:
# Import the interact function from ipywidgets for creating interactive widgets
from ipywidgets import interact

# Define a function 'update' that takes 'n_components' as an argument (default is 2)
def update(n_components=2):
    # Create a PCA model with the specified number of components
    model = PCA(n_components=n_components)
    
    # Fit the PCA model to the scaled data and transform it to reduce dimensions
    result = model.fit_transform(scaled_data)
    
    # Create a DataFrame from the PCA result with column names based on the number of components
    df_plot = pd.DataFrame(result, columns=['Component {}'.format(i+1) for i in range(n_components)])
    
    # Create a new figure with specified dimensions
    plt.figure(figsize=(8, 6))
    
    # Create a scatter plot using the first two components of the PCA result
    sns.scatterplot(x=df_plot.iloc[:, 0], y=df_plot.iloc[:, 1])
    
    # Set the title of the plot
    plt.title('Autoencoder Interactive')
    
    # Label the x-axis as 'Component 1'
    plt.xlabel('Component 1')
    
    # Label the y-axis as 'Component 2'
    plt.ylabel('Component 2')
    
    # Add a grid to the plot for better readability
    plt.grid(True)
    
    # Display the plot
    plt.show()

# Create an interactive widget to update the plot based on the number of components (range: 2 to 10)
interact(update, n_components=(2, 10))