In [None]:
# from google.colab import drive
# drive.mount('/content/drive')
# PROJECT_ROOT_ON_DRIVE = '/content/drive/MyDrive/your_colab_projects/dynamic_dnn_trainer'

In [None]:
# --- Option A: If cloning from GitHub/GitLab ---
# !git clone <your_repository_url> dynamic_dnn_trainer_colab
# %cd dynamic_dnn_trainer_colab
# !pip install -q -r requirements.txt # Ensure requirements.txt is in your repo

# --- Option B: If you uploaded a zip of your project ---
# !unzip dynamic_dnn_trainer.zip -d project_files
# %cd project_files/dynamic_dnn_trainer
# !pip install -q -r requirements.txt

# --- Option C: If project is already in Colab environment (e.g., from Drive mount) ---
# Just ensure you are in the project's root directory.
# For example, if PROJECT_ROOT_ON_DRIVE was set:
# %cd $PROJECT_ROOT_ON_DRIVE
# !pip install -q -r requirements.txt # Still good to ensure packages

# Verify current directory
!pwd
!ls

In [None]:
import sys
import os
from pathlib import Path
import numpy as np
import pandas as pd
import tensorflow as tf

# Add src to Python path
# Assumes the notebook is run from the project root directory where 'src' is a subfolder
src_path = str(Path(os.getcwd()) / "src")
if src_path not in sys.path:
    sys.path.append(src_path)
print(f"Added '{src_path}' to sys.path")

# Import from our custom modules
try:
    from src import config
    from src.data_ingestion import loader
    from src.eda import exploratory_analysis as eda
    from src.preprocessing import transformers
    from src.modeling import dnn_builder, trainer
    from src.evaluation import plots
    from src.utils import helpers
    print("Successfully imported all custom modules.")
except ImportError as e:
    print(f"Error importing modules: {e}")
    print("Ensure you are in the project root and 'src' is in sys.path.")
    print(f"Current sys.path: {sys.path}")

In [None]:
print(f"--- TensorFlow Version: {tf.__version__} ---")
gpu_devices = tf.config.list_physical_devices('GPU')
if gpu_devices:
    print(f"--- GPU(s) available: {len(gpu_devices)} ---")
    for i, device in enumerate(gpu_devices):
        print(f"  GPU {i}: Name={device.name}, Type={device.device_type}")
    # Colab usually assigns GPU:0 if a GPU runtime is selected
    try:
        with tf.device('/GPU:0'): # Standard name for Colab GPU
            tf.constant([[1.0]]) # Simple op
        print("--- GPU test op successful. GPU is likely active. ---")
    except RuntimeError as e:
        print(f"--- GPU test op failed: {e}. May fall back to CPU. ---")
else:
    print("--- No GPU available (or not a GPU runtime). TensorFlow will use CPU. ---")

In [None]:
NOTEBOOK_EDA_PLOTS_DIR = config.PLOTS_OUTPUT_DIR / "notebook_eda_output"
NOTEBOOK_MODEL_SAVE_PATH = config.MODEL_OUTPUT_DIR / "notebook_dnn_model_multiclass.keras"
NOTEBOOK_HISTORY_SAVE_PATH = config.PROCESSED_DATA_DIR / "notebook_dnn_training_history_multiclass.pkl"
NOTEBOOK_HISTORY_PLOT_PATH = config.PLOTS_OUTPUT_DIR / "notebook_dnn_training_history_multiclass.png"
NOTEBOOK_TENSORBOARD_LOG_DIR = config.PROJECT_ROOT / "outputs" / "logs" / "notebook_dnn_multiclass_run"
NOTEBOOK_PREPROCESSOR_SAVE_PATH = config.PROCESSED_DATA_DIR / "notebook_feature_preprocessor.pkl"


# Ensure directories exist using our helper
helpers.ensure_directory_exists(NOTEBOOK_EDA_PLOTS_DIR)
helpers.ensure_directory_exists(NOTEBOOK_MODEL_SAVE_PATH.parent)
helpers.ensure_directory_exists(NOTEBOOK_HISTORY_SAVE_PATH.parent)
helpers.ensure_directory_exists(NOTEBOOK_HISTORY_PLOT_PATH.parent)
helpers.ensure_directory_exists(NOTEBOOK_TENSORBOARD_LOG_DIR)
helpers.ensure_directory_exists(NOTEBOOK_PREPROCESSOR_SAVE_PATH.parent)

print("Notebook output directories ensured.")

In [None]:
print(">>> PART 1: Data Ingestion and Preprocessing <<<")

# 1.1 Load Raw Data
print("\n--- 1.1 Loading Raw Data ---")
raw_df = loader.load_data_from_csv(csv_file_path=config.RAW_DATA_FILE_PATH)
if raw_df is None:
    raise ValueError("Failed to load raw data. Halting notebook execution.")
print(f"Raw data loaded. Shape: {raw_df.shape}")
display(raw_df.head()) # Use display for better rendering in notebooks

# 1.2 Perform Exploratory Data Analysis (EDA)
# Set run_eda = True or False as desired for the notebook run
run_eda_notebook = True
if run_eda_notebook:
    print("\n--- 1.2 Performing Exploratory Data Analysis (EDA) ---")
    _ = eda.generate_descriptive_statistics(raw_df.copy(), df_name="Raw Data (Notebook)")
    _ = eda.count_missing_values(raw_df.copy(), df_name="Raw Data (Notebook)")
    
    existing_numerical_for_plot = [col for col in config.NUMERICAL_FEATURES if col in raw_df.columns]
    if existing_numerical_for_plot:
        eda.plot_histograms(
            df=raw_df.copy(),
            columns_to_plot=existing_numerical_for_plot,
            plot_save_directory=NOTEBOOK_EDA_PLOTS_DIR # Save to notebook specific dir
        )
        eda.plot_correlation_heatmap(
            df=raw_df.copy(),
            columns_for_corr=existing_numerical_for_plot,
            plot_save_path=NOTEBOOK_EDA_PLOTS_DIR / "correlation_matrix_notebook.png"
        )
    else:
        print("No numerical features from config found for EDA plots.")
    print(f"EDA plots (if any) saved to: {NOTEBOOK_EDA_PLOTS_DIR}")
else:
    print("\n--- Skipping EDA for this notebook run ---")

# 1.3 Split Data
print("\n--- 1.3 Splitting Data ---")
X_train_raw_df, X_test_raw_df, y_train_raw_series, y_test_raw_series = \
    transformers.split_dataframe_into_train_test(
        dataframe=raw_df,
        target_column_name=config.TARGET_COLUMN,
        test_set_ratio=config.TEST_SET_SIZE,
        random_seed=config.RANDOM_SEED,
        stratify_by_target=True
    )

# 1.4 Create Feature Preprocessor
print("\n--- 1.4 Creating Feature Preprocessor ---")
feature_preprocessor_obj = transformers.create_feature_preprocessor(
    numerical_cols=config.NUMERICAL_FEATURES,
    categorical_cols=config.CATEGORICAL_FEATURES
)

# 1.5 Apply Feature Preprocessing (Output to NumPy)
print("\n--- 1.5 Applying Feature Preprocessing ---")
X_train_np, X_test_np = transformers.apply_feature_preprocessing_to_numpy(
    X_train_df=X_train_raw_df.copy(),
    X_test_df=X_test_raw_df.copy(),
    preprocessor_object=feature_preprocessor_obj,
    fit_preprocessor_on_train=True
)
if helpers.save_object_as_pickle(feature_preprocessor_obj, NOTEBOOK_PREPROCESSOR_SAVE_PATH):
    print(f"Fitted feature preprocessor saved to: {NOTEBOOK_PREPROCESSOR_SAVE_PATH}")

# 1.6 Prepare Target Variable (Output to NumPy)
print("\n--- 1.6 Preparing Target Variable ---")
y_train_np = transformers.prepare_target_to_numpy(y_train_raw_series.copy())
y_test_np = transformers.prepare_target_to_numpy(y_test_raw_series.copy())

print(f"Data prepared. X_train_np shape: {X_train_np.shape}, y_train_np shape: {y_train_np.shape}")

In [None]:
print("\n>>> PART 2: DNN Modeling, Training & Basic Evaluation <<<")

# 2.1 Build DNN Model
print("\n--- 2.1 Building DNN Model ---")
input_shape_for_model = (X_train_np.shape[1],)
dnn_model = dnn_builder.build_dynamic_dnn_model(
    input_features_shape=input_shape_for_model,
    model_arch_params=config.DNN_MODEL_PARAMS["architecture"],
    compilation_params=config.DNN_MODEL_PARAMS["compilation"]
)

# 2.2 Train DNN Model
print("\n--- 2.2 Training DNN Model ---")
# Use test set as validation data for callbacks in this notebook example
validation_data_for_training_nb = (X_test_np, y_test_np)

notebook_callbacks = trainer.create_standard_callbacks(
    early_stopping_params={"monitor": "val_accuracy", "patience": 10, "restore_best_weights": True},
    model_checkpoint_filepath=NOTEBOOK_MODEL_SAVE_PATH,
    model_checkpoint_params={"monitor": "val_accuracy", "save_best_only": True},
    tensorboard_logdir=NOTEBOOK_TENSORBOARD_LOG_DIR
)

# Use training params from config, can override epochs for notebook if desired
notebook_training_params = config.DNN_MODEL_PARAMS["training"].copy()
# notebook_training_params["epochs"] = 5 # Example: Shorter run for notebook

trained_model_nb, training_history_nb = trainer.train_keras_model(
    model_to_train=dnn_model,
    X_train_data=X_train_np,
    y_train_data=y_train_np,
    training_params=notebook_training_params,
    validation_data_tuple=validation_data_for_training_nb,
    callbacks_to_use=notebook_callbacks,
    history_log_path=NOTEBOOK_HISTORY_SAVE_PATH
)
if NOTEBOOK_MODEL_SAVE_PATH.exists():
    print(f"Model training complete. Best model saved to: {NOTEBOOK_MODEL_SAVE_PATH}")

# 2.3 Plot Training History
print("\n--- 2.3 Plotting Training History ---")
if training_history_nb and training_history_nb.history:
    plots.plot_training_history(
        history_data=training_history_nb.history,
        plot_title="DNN Model Training History (Notebook)",
        save_plot_path=NOTEBOOK_HISTORY_PLOT_PATH # Saves the plot
    )
    # To display inline in Colab after saving (optional):
    # from IPython.display import Image
    # display(Image(filename=NOTEBOOK_HISTORY_PLOT_PATH))
else:
    print("Skipping plotting: no training history data.")

print("\n--- Notebook Pipeline for Parts 1 & 2 COMPLETED ---")