In [1]:
## Heavy Decoder
# encoder_dense_layers_trial = [[10, 8], [12, 10], [14, 12], [16, 14], [18, 16], [20, 18],[22, 20]]
# decoder_dense_layers_trial = [[6, 8, 10, 12], [8, 10, 12, 14], [10, 12, 14, 16], [12, 14, 16, 18], [14, 16, 18, 20], 
#                               [16, 18, 20, 22], [18, 20, 22, 24]]
# bottle_neck_trial = [8, 10, 12, 14, 16, 18]

In [2]:
#pip install tf2onnx
from tensorflow import keras
import time
import pickle
import pandas as pd

In [3]:
run_summary = []
FILE ="Combined_AM_Data.csv"

In [4]:
def generate_autoencoder_reports(encoder_dense_layers, bottle_neck, decoder_dense_layers):

    # Load Data
    df_am = pd.read_csv(FILE)

    columns_needed = ['income', 'road_dist', 'cooking', 'y_am_pef', 'tempin', 
                    'humidin', 'pm25in', 'co2in', 'tempdiffin', 'humidiffin', 
                    'pm25diffin', 'pm10', 'pm25', 'o3', 'no2', 'co', 'so2', 'temp', 
                    'windsd', 'humid', 'varp', 'dewpt', 'airp', 'seap', 'solrhr', 'solramnt', 'grdt', 'class']

    cleaned_df = df_am.filter(columns_needed)
    df = cleaned_df.sample(frac=1, random_state=42).reset_index(drop=True)

    df = df.dropna()
    minority_class = df[df['class'] == 0]
    X_train = minority_class.drop('class', axis=1)

    # Define the Input shape
    INPUT_SHAPE = X_train.shape[1]
    FILE_NAME = f"L{INPUT_SHAPE}_E{'_'.join(map(str, encoder_dense_layers))}_B{bottle_neck}_D{'_'.join(map(str, decoder_dense_layers))}"
    
    def build_autoencoder(input_shape, **kwargs):

      encoder_dense_layers = kwargs.get('encoder_dense_layers', [])
      bottle_neck = kwargs.get('bottle_neck', input_shape // 2)
      decoder_dense_layers = kwargs.get('decoder_dense_layers', [])
      decoder_activation = kwargs.get('decoder_activation', 'sigmoid')

      # Autoencoder Model
      encoder_input = keras.Input(shape=(input_shape,), name="encoder")
      x = keras.layers.Flatten()(encoder_input)

      # Encoder Dense Layers
      for units in encoder_dense_layers:
          x = keras.layers.Dense(units, activation="relu")(x)

      encoder_output = keras.layers.Dense(bottle_neck, activation="relu")(x)
      encoder = keras.Model(encoder_input, encoder_output, name="encoder")

      # Decoder Model
      decoder_input = keras.Input(shape=(bottle_neck,), name="decoder")
      x = decoder_input

      # Decoder Dense Layers
      for units in decoder_dense_layers:
          x = keras.layers.Dense(units, activation="relu")(x)

      decoder_output = keras.layers.Dense(input_shape, activation=decoder_activation)(x)
      decoder = keras.Model(decoder_input, decoder_output, name="decoder")

      # Autoencoder Model
      autoencoder_input = keras.Input(shape=(input_shape,), name="input")
      encoded = encoder(autoencoder_input)
      decoded = decoder(encoded)
      autoencoder = keras.Model(autoencoder_input, decoded, name="autoencoder")

      return autoencoder, encoder, decoder

    autoencoder, encoder, decoder = build_autoencoder(INPUT_SHAPE, encoder_dense_layers=encoder_dense_layers,
                                                      bottle_neck=bottle_neck,
                                                      decoder_dense_layers=decoder_dense_layers)
    opt = keras.optimizers.Adam(learning_rate=0.001)
    autoencoder.compile(opt, loss="mse")

    history = autoencoder.fit(X_train, X_train, epochs=125, batch_size=16, validation_split=0.25, verbose=0)
    
    # Extract the loss values
    loss = history.history['loss']
    val_loss = history.history['val_loss']

    # Print the last epoch's loss values
    last_epoch_loss = loss[-1]
    last_epoch_val_loss = val_loss[-1]

    # Saving history
    with open(FILE_NAME + '_history.pickle', 'wb') as file:
        pickle.dump(history.history, file)

    # Generate synthetic data
    num_samples = len(X_train)
    #input_data = np.random.normal(size=(num_samples, INPUT_SHAPE))


    # IMP
    generated_data = autoencoder.predict(X_train) #################################################################



    reshaped_data = generated_data.reshape(num_samples, -1)
    df_generated = pd.DataFrame(reshaped_data, columns=X_train.columns)

    # Calculate mean and standard deviation of original and synthetic datasets
    common_columns = set(X_train.columns) & set(df_generated.columns)
    results = {}

    for column in common_columns:
        mean_df1 = X_train[column].mean()
        std_df1 = X_train[column].std()
        mean_df2 = df_generated[column].mean()
        std_df2 = df_generated[column].std()

        results[column] = {'Mean_df1': mean_df1, 'Std_df1': std_df1,
                           'Mean_df2': mean_df2, 'Std_df2': std_df2}

    comparison_df = pd.DataFrame(results)
    comparison_df.to_csv(FILE_NAME + 'mean_std.csv', index=True)

    # Add back the class label
    X_train['class'] = 0.0
    df_generated['class'] = 0.0
    X_train.to_csv(FILE_NAME + '_Original_minority_data.csv', index=False)
    df_generated.to_csv(FILE_NAME + '_Synthetic_minority_data.csv', index=False)
    print(FILE_NAME, last_epoch_loss, last_epoch_val_loss)

    run_summary.append([FILE_NAME, last_epoch_loss, last_epoch_val_loss])

In [5]:
def runner(encoder_dense_layers, bottle_neck, decoder_dense_layers):
    generate_autoencoder_reports(encoder_dense_layers, bottle_neck, decoder_dense_layers)

In [6]:
encoder_dense_layers_trial = [[10, 8], [12, 10], [14, 12], [16, 14], [18, 16], [20, 18],[22, 20]]
decoder_dense_layers_trial = [[6, 8, 10, 12], [8, 10, 12, 14], [10, 12, 14, 16], [12, 14, 16, 18], [14, 16, 18, 20], 
                              [16, 18, 20, 22], [18, 20, 22, 24]]
bottle_neck_trial = [8, 10, 12, 14, 16, 18]

In [7]:
total_iterations = len(bottle_neck_trial) * len(encoder_dense_layers_trial) * len(decoder_dense_layers_trial)
print("Total Model in Pipeline:",total_iterations)

# Print total models
for bn in bottle_neck_trial:
  for enc_layers in encoder_dense_layers_trial:
      for dec_layers in decoder_dense_layers_trial:
          print(enc_layers, bn, dec_layers)

Total Model in Pipeline: 294
[10, 8] 8 [6, 8, 10, 12]
[10, 8] 8 [8, 10, 12, 14]
[10, 8] 8 [10, 12, 14, 16]
[10, 8] 8 [12, 14, 16, 18]
[10, 8] 8 [14, 16, 18, 20]
[10, 8] 8 [16, 18, 20, 22]
[10, 8] 8 [18, 20, 22, 24]
[12, 10] 8 [6, 8, 10, 12]
[12, 10] 8 [8, 10, 12, 14]
[12, 10] 8 [10, 12, 14, 16]
[12, 10] 8 [12, 14, 16, 18]
[12, 10] 8 [14, 16, 18, 20]
[12, 10] 8 [16, 18, 20, 22]
[12, 10] 8 [18, 20, 22, 24]
[14, 12] 8 [6, 8, 10, 12]
[14, 12] 8 [8, 10, 12, 14]
[14, 12] 8 [10, 12, 14, 16]
[14, 12] 8 [12, 14, 16, 18]
[14, 12] 8 [14, 16, 18, 20]
[14, 12] 8 [16, 18, 20, 22]
[14, 12] 8 [18, 20, 22, 24]
[16, 14] 8 [6, 8, 10, 12]
[16, 14] 8 [8, 10, 12, 14]
[16, 14] 8 [10, 12, 14, 16]
[16, 14] 8 [12, 14, 16, 18]
[16, 14] 8 [14, 16, 18, 20]
[16, 14] 8 [16, 18, 20, 22]
[16, 14] 8 [18, 20, 22, 24]
[18, 16] 8 [6, 8, 10, 12]
[18, 16] 8 [8, 10, 12, 14]
[18, 16] 8 [10, 12, 14, 16]
[18, 16] 8 [12, 14, 16, 18]
[18, 16] 8 [14, 16, 18, 20]
[18, 16] 8 [16, 18, 20, 22]
[18, 16] 8 [18, 20, 22, 24]
[20, 18] 8 [6

In [8]:
start_time = time.time()

for bn in bottle_neck_trial:
  for enc_layers in encoder_dense_layers_trial:
      for dec_layers in decoder_dense_layers_trial:
          runner(enc_layers, bn, dec_layers)
                
end_time = time.time()
total_time = end_time - start_time                
total_time_minutes = total_time / 60
print(f"Total time: {total_time_minutes} minutes") 

L27_E10_8_B8_D6_8_10_12 0.019324323162436485 0.020469218492507935
L27_E10_8_B8_D8_10_12_14 0.01884509064257145 0.02001008950173855
L27_E10_8_B8_D10_12_14_16 0.016048550605773926 0.017023390159010887
L27_E10_8_B8_D12_14_16_18 0.0183480866253376 0.019687261432409286
L27_E10_8_B8_D14_16_18_20 0.018244164064526558 0.019582808017730713
L27_E10_8_B8_D16_18_20_22 0.018875591456890106 0.02061733789741993
L27_E10_8_B8_D18_20_22_24 0.01777852140367031 0.018931332975625992
L27_E12_10_B8_D6_8_10_12 0.0217658169567585 0.023579465225338936
L27_E12_10_B8_D8_10_12_14 0.019422801211476326 0.020270273089408875
L27_E12_10_B8_D10_12_14_16 0.021136095747351646 0.02320871502161026
L27_E12_10_B8_D12_14_16_18 0.01743827573955059 0.018786119297146797
L27_E12_10_B8_D14_16_18_20 0.013849608600139618 0.01530672237277031
L27_E12_10_B8_D16_18_20_22 0.015996498987078667 0.017534691840410233
L27_E12_10_B8_D18_20_22_24 0.0160363856703043 0.01717158779501915
L27_E14_12_B8_D6_8_10_12 0.019638551399111748 0.0203648991882

In [9]:
run_summary

[['L27_E10_8_B8_D6_8_10_12', 0.019324323162436485, 0.020469218492507935],
 ['L27_E10_8_B8_D8_10_12_14', 0.01884509064257145, 0.02001008950173855],
 ['L27_E10_8_B8_D10_12_14_16', 0.016048550605773926, 0.017023390159010887],
 ['L27_E10_8_B8_D12_14_16_18', 0.0183480866253376, 0.019687261432409286],
 ['L27_E10_8_B8_D14_16_18_20', 0.018244164064526558, 0.019582808017730713],
 ['L27_E10_8_B8_D16_18_20_22', 0.018875591456890106, 0.02061733789741993],
 ['L27_E10_8_B8_D18_20_22_24', 0.01777852140367031, 0.018931332975625992],
 ['L27_E12_10_B8_D6_8_10_12', 0.0217658169567585, 0.023579465225338936],
 ['L27_E12_10_B8_D8_10_12_14', 0.019422801211476326, 0.020270273089408875],
 ['L27_E12_10_B8_D10_12_14_16', 0.021136095747351646, 0.02320871502161026],
 ['L27_E12_10_B8_D12_14_16_18', 0.01743827573955059, 0.018786119297146797],
 ['L27_E12_10_B8_D14_16_18_20', 0.013849608600139618, 0.01530672237277031],
 ['L27_E12_10_B8_D16_18_20_22', 0.015996498987078667, 0.017534691840410233],
 ['L27_E12_10_B8_D18_20

In [10]:
run_summary_df = pd.DataFrame(run_summary, columns=['Model', 'Train Loss', 'Validation Loss'])  
run_summary_df.to_csv('run_summary.csv', index=False)    