In [None]:
from collections import Counter
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from utils import get_train_split,get_test_split

Preprocessing

In [None]:
x,y =  get_train_split()
X_test,y_test = get_test_split()

In [None]:
counter = Counter(y)
print(counter)
# Assuming 'X' is your feature matrix and 'y' is the corresponding target variable

# Instantiate SMOTE
smote = SMOTE()

# Apply SMOTE to balance the dataset
X_train, y_train = smote.fit_resample(x, y)

counter = Counter(y_train)
print(counter)

In [None]:
# Create an instance of StandardScaler
scaler = StandardScaler()

# Fit the scaler on your data
scaler.fit(X_train)

# Transform the data using the scaler
X_scaled = scaler.transform(X_train)

# Calculate the mean and variance of the scaled data
mean = np.mean(X_scaled, axis=0)
variance = np.var(X_scaled, axis=0)

# Check if the mean and variance are close to zero and one, respectively
if np.allclose(mean, 0) and np.allclose(variance, 1):
    print("The features have zero mean and unit variance.")
else:
    print("The features do not have zero mean and unit variance.")

In [None]:
# Apply PCA
pca = PCA(n_components=.95)  # Specify the desired number of components
X_train = pca.fit_transform(X_train)
X_test = pca.transform(X_test)

# Explained variance ratio
explained_variance_ratio = pca.explained_variance_ratio_
print("Explained variance ratio:", explained_variance_ratio)

# Access the principal components
principal_components = pca.components_
print("Principal components:", principal_components)

# Access the transformed data
print("Transformed data shape:", X_train.shape)

Model Training

In [None]:
from tensorflow.keras.utils import to_categorical
import tensorflow as tf
from tensorflow.keras.layers import Dense, Activation, Flatten, Convolution1D, Dropout,MaxPooling1D,GlobalAveragePooling1D
from tensorflow.keras import Model, layers,Sequential,regularizers
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping,ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import LearningRateScheduler
import mlflow
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import seaborn as sns
from sklearn.metrics import confusion_matrix

In [None]:
y_train = to_categorical(y_train, num_classes=5)
y_test = to_categorical(y_test, num_classes=5)

In [None]:
# Define your model architecture
model= Sequential()
model.add(Convolution1D(32,5,activation='relu',input_shape=(29,1)))
model.add(Convolution1D(64,5,activation='relu'))         
model.add(MaxPooling1D(3))
model.add(Convolution1D(128, 3, activation='relu'))
model.add(Convolution1D(256, 3, activation='relu'))
model.add(GlobalAveragePooling1D())
model.add(Dropout(0.3))
model.add(Dense(1024,activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(256,activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(32,activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(5,activation='softmax'))

# Compile the model
learning_rate = 0.001  # Set the desired learning rate

optimizer = Adam(learning_rate=learning_rate)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])


# Define early stopping callback
early_stopping = EarlyStopping(patience=3, monitor='val_loss')

# Train the model with early stopping
trained_model = model.fit(X_train, y_train, epochs=5, batch_size=64, validation_split=0.2, callbacks=[early_stopping])


In [None]:
# Evaluate the model on test data
loss, accuracy = model.evaluate(X_test, y_test)

# Make predictions on new data
train_predictions = model.predict(X_test)

In [None]:
# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Define the hyperparameters to tune
learning_rates = [0.001, 0.01, 0.1]
batch_sizes = [16, 32, 64]
num_epochs = [5, 10]

best_accuracy = 0.0
best_model_path = None
best_run_id = None

def train_model(learning_rate, batch_size, num_epochs):

    mlflow.log_param("learning_rate", learning_rate)
    mlflow.log_param("batch_size", batch_size)
    mlflow.log_param("num_epochs", num_epochs)

    global best_accuracy
    global best_model_path
    global best_run_id
    
    # Define the model architecture
    model = Sequential()
    model.add(Dense(64, activation='relu', input_dim=X_train.shape[1]))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(5, activation='softmax'))
    
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    
    # Train the model
    early_stopping = EarlyStopping(patience=3, monitor='val_loss')
    history = model.fit(X_train, y_train, epochs=num_epochs, batch_size=batch_size, validation_data=(X_val, y_val), 
                        callbacks=[early_stopping],verbose=1)
    
    # Evaluate the model on the validation set
    val_loss, val_accuracy = model.evaluate(X_val, y_val, verbose=0)
    
    # Log the validation loss and accuracy
    mlflow.log_metric("val_loss", val_loss)
    mlflow.log_metric("val_accuracy", val_accuracy)
    
    # Check if the current model is the best so far
    if val_accuracy > best_accuracy:
        best_accuracy = val_accuracy
        
        # Save the model
        best_model_path = f"best_model_lr_{learning_rate}_bs_{batch_size}_epochs_{num_epochs}.h5"
        model.save(best_model_path)
        
        # Get the run ID for the best model
        best_run_id = mlflow.active_run().info.run_id
    
    return model

# Iterate over the hyperparameters
for lr in learning_rates:
    for bs in batch_sizes:
        for epochs in num_epochs:
            # Start a new MLflow run
            with mlflow.start_run(run_name=f"lr_{lr}_bs_{bs}_epochs_{epochs}"):
                # Train the model
                model = train_model(lr, bs, epochs)
                
                # # Log the hyperparameters only once per run
                # if epochs == num_epochs[0]:
                #     mlflow.log_param("learning_rate", lr)
                #     mlflow.log_param("batch_size", bs)
                #     mlflow.log_param("num_epochs", epochs)

# Load the best model
best_model = tf.keras.models.load_model(best_model_path)

# Print the run ID for the best model
print(f"Best Run ID: {best_run_id}")

In [None]:
# Retrieve the run information for the best run
run_info = mlflow.get_run(best_run_id)

# Get the hyperparameters logged for the best run
hyperparams = run_info.data.params

if 'learning_rate' in hyperparams:
    print(f"Learning Rate: {hyperparams['learning_rate']}")

if 'batch_size' in hyperparams:
    print(f"Batch Size: {hyperparams['batch_size']}")

if 'num_epochs' in hyperparams:
    print(f"Number of Epochs: {hyperparams['num_epochs']}")