In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.callbacks import EarlyStopping

# Load the CSV file
file_path = "master_file.csv"  # Replace with your CSV file path
df = pd.read_csv(file_path)

# Separate filenames and feature columns
filenames = df.iloc[:, 0]  # First column as filenames
features = df.iloc[:, 1:]  # All other columns as features

# Standardize the feature columns
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Define the Autoencoder structure
input_dim = features_scaled.shape[1]  # Number of original features
encoding_dim = 2  # Number of dimensions to reduce to (e.g., 2 for visualization)

# Define layers
input_layer = Input(shape=(input_dim,))
encoded = Dense(64, activation='relu')(input_layer)  # First hidden layer
encoded = Dense(32, activation='relu')(encoded)       # Second hidden layer
bottleneck = Dense(encoding_dim, activation='linear')(encoded)  # Bottleneck layer
decoded = Dense(32, activation='relu')(bottleneck)    # Decoding layer 1
decoded = Dense(64, activation='relu')(decoded)       # Decoding layer 2
output_layer = Dense(input_dim, activation='linear')(decoded)   # Output layer

# Build the Autoencoder model
autoencoder = Model(inputs=input_layer, outputs=output_layer)

# Compile the model
autoencoder.compile(optimizer='adam', loss='mse')

# Set up early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)

# Train the Autoencoder
autoencoder.fit(features_scaled, features_scaled,
                epochs=100, 
                batch_size=32,
                shuffle=True,
                callbacks=[early_stopping],
                verbose=1)

# Extract the encoder part of the model to get the reduced features
encoder = Model(inputs=input_layer, outputs=bottleneck)
features_encoded = encoder.predict(features_scaled)

# Create a new DataFrame with filenames and encoded features
encoded_df = pd.DataFrame(features_encoded, columns=[f'Encoded_{i+1}' for i in range(encoding_dim)])
encoded_df.insert(0, 'Filename', filenames)  # Insert filenames as the first column

# Save the resulting DataFrame to a new CSV file
output_path = "master_file_autoencoder_1.csv"
encoded_df.to_csv(output_path, index=False)

print(f"Autoencoder-transformed features saved to {output_path}")


Epoch 1/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - loss: 0.9951
Epoch 2/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 1.0187 
Epoch 3/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.9371 
Epoch 4/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.9372
Epoch 5/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.8941
Epoch 6/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.8090 
Epoch 7/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.7760 
Epoch 8/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.7832
Epoch 9/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 946us/step - loss: 0.7242
Epoch 10/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.7150 
Epoch 11/10

In [6]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, LeakyReLU
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import silhouette_score

# Load the CSV file
file_path = "master_file.csv"  # Replace with your CSV file path
df = pd.read_csv(file_path)

# Separate filenames and feature columns
filenames = df.iloc[:, 0]  # First column as filenames
features = df.iloc[:, 1:]  # All other columns as features

# Standardize the feature columns
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Define the Autoencoder structure
input_dim = features_scaled.shape[1]  # Number of original features
encoding_dim = 5  # Reduced dimensionality

# Define layers with non-linear activation functions
input_layer = Input(shape=(input_dim,))
encoded = Dense(256)(input_layer)
encoded = LeakyReLU(alpha=0.1)(encoded)
encoded = Dense(128)(encoded)
encoded = LeakyReLU(alpha=0.1)(encoded)
encoded = Dense(64)(encoded)
encoded = LeakyReLU(alpha=0.1)(encoded)
bottleneck = Dense(encoding_dim)(encoded)  # Bottleneck layer
decoded = Dense(64)(bottleneck)
decoded = LeakyReLU(alpha=0.1)(decoded)
decoded = Dense(128)(decoded)
decoded = LeakyReLU(alpha=0.1)(decoded)
decoded = Dense(256)(decoded)
decoded = LeakyReLU(alpha=0.1)(decoded)
output_layer = Dense(input_dim, activation='linear')(decoded)

# Build the Autoencoder model
autoencoder = Model(inputs=input_layer, outputs=output_layer)

# Compile the model
autoencoder.compile(optimizer='adam', loss='mse')

# Set up early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)

# Train the Autoencoder
autoencoder.fit(features_scaled, features_scaled,
                epochs=100, 
                batch_size=32,
                shuffle=True,
                callbacks=[early_stopping],
                verbose=1)

# Extract the encoder part of the model to get the reduced features
encoder = Model(inputs=input_layer, outputs=bottleneck)
features_encoded = encoder.predict(features_scaled)



# Create a new DataFrame with filenames and encoded features
encoded_df = pd.DataFrame(features_encoded, columns=[f'Encoded_{i+1}' for i in range(encoding_dim)])
encoded_df.insert(0, 'Filename', filenames)  # Insert filenames as the first column

# Save the resulting DataFrame to a new CSV file
output_path = "master_file_autoencoder_5d_deep.csv"
encoded_df.to_csv(output_path, index=False)

print(f"Autoencoder-transformed features saved to {output_path}")


Epoch 1/100




[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 17ms/step - loss: 0.9911
Epoch 2/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.9698 
Epoch 3/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.8732 
Epoch 4/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.8029 
Epoch 5/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.7426 
Epoch 6/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.7303 
Epoch 7/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.6947 
Epoch 8/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.6861 
Epoch 9/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.6735 
Epoch 10/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.6688 
Epoch 11/100
[1m4/4

In [7]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import silhouette_score

# Load the CSV file
file_path = "master_file.csv"
df = pd.read_csv(file_path)

# Separate filenames and feature columns
filenames = df.iloc[:, 0]
features = df.iloc[:, 1:]

# Standardize features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Define simpler Autoencoder structure
input_dim = features_scaled.shape[1]
encoding_dim = 5

input_layer = Input(shape=(input_dim,))
encoded = Dense(64, activation='relu')(input_layer)
bottleneck = Dense(encoding_dim, activation='linear')(encoded)  # Bottleneck layer
decoded = Dense(64, activation='relu')(bottleneck)
output_layer = Dense(input_dim, activation='linear')(decoded)

autoencoder = Model(inputs=input_layer, outputs=output_layer)
autoencoder.compile(optimizer='adam', loss='mse')

# Early stopping
early_stopping = EarlyStopping(monitor='loss', patience=10, restore_best_weights=True)

# Train
autoencoder.fit(features_scaled, features_scaled,
                epochs=100, batch_size=32, shuffle=True,
                callbacks=[early_stopping], verbose=1)

# Extract encoder
encoder = Model(inputs=input_layer, outputs=bottleneck)
features_encoded = encoder.predict(features_scaled)



# Save to CSV
encoded_df = pd.DataFrame(features_encoded, columns=[f'Encoded_{i+1}' for i in range(encoding_dim)])
encoded_df.insert(0, 'Filename', filenames)
encoded_df.to_csv("master_file_autoencoder_simple_5.csv", index=False)
print("Results saved to master_file_autoencoder_simple.csv")


Epoch 1/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 1.0098
Epoch 2/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1.0168 
Epoch 3/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 1.0059 
Epoch 4/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.9499
Epoch 5/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.9046
Epoch 6/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.8794 
Epoch 7/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.8206
Epoch 8/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.7974
Epoch 9/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.7550 
Epoch 10/100
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0s/step - loss: 0.7323
Epoch 11/100
[1