<a href="https://colab.research.google.com/github/subhajitphy/Bhattacharya_distance/blob/main/ML_peak_freq.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
class EfficientNN(nn.Module):
    def __init__(self):
        super(EfficientNN, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(2, 40),
            nn.BatchNorm1d(40),
            nn.ReLU(),
            nn.Linear(40, 40),
            nn.BatchNorm1d(40),
            nn.ReLU(),
            nn.Linear(40, 40),
            nn.BatchNorm1d(40),
            nn.ReLU(),
            nn.Linear(40, 40),
            nn.BatchNorm1d(40),
            nn.ReLU(),
            nn.Linear(40, 40),
            nn.BatchNorm1d(40),
            nn.ReLU(),
            nn.Linear(40, 40),
            nn.BatchNorm1d(40),
            nn.ReLU(),
            nn.Linear(40, 1)
        )

    def forward(self, x):
        return self.model(x)

In [3]:
def train_and_predict(input_data=None, verbose=True):
    """
    Trains a neural network with validation and provides a prediction function.

    Parameters:
    input_data (np.array): Optional 2D array of shape (n_samples, 2) for prediction
    verbose (bool): Whether to print training progress

    Returns:
    tuple: (predict_function,
            predictions if input_data provided else None,
            dict of validation metrics)
    """
    # Load and prepare data
    inp1, inp2, outp = np.load('all_file_nef.pkl', allow_pickle=True)
    X = np.vstack([inp1, inp2]).T
    y = outp.reshape(-1, 1)

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Normalize data
    scaler_X = StandardScaler()
    scaler_y = StandardScaler()

    X_train = scaler_X.fit_transform(X_train)
    X_test = scaler_X.transform(X_test)
    y_train = scaler_y.fit_transform(y_train.reshape(-1, 1)).flatten()
    y_test = scaler_y.transform(y_test.reshape(-1, 1)).flatten()

    # Convert to tensors
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
    y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1).to(device)
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
    y_test_tensor = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1).to(device)

    # Create DataLoader
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    train_loader = DataLoader(train_dataset, batch_size=1024, shuffle=True)

    # Initialize model
    model = EfficientNN().to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-6)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)

    # Training with validation
    num_epochs = 20
    train_losses = []
    val_losses = []
    best_val_loss = float('inf')
    best_model_state = None

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        epoch_loss = 0
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            predictions = model(batch_X)
            loss = criterion(predictions, batch_y)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()

        train_loss = epoch_loss / len(train_loader)
        train_losses.append(train_loss)

        # Validation phase
        model.eval()
        with torch.no_grad():
            val_predictions = model(X_test_tensor)
            val_loss = criterion(val_predictions, y_test_tensor).item()
            val_losses.append(val_loss)

            # Save best model
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                best_model_state = model.state_dict()

        if verbose:
            print(f"Epoch {epoch+1}/{num_epochs}, "
                  f"Train Loss: {train_loss:.6f}, "
                  f"Val Loss: {val_loss:.6f}")

        scheduler.step()

    # Load best model
    model.load_state_dict(best_model_state)

    # Final validation metrics
    model.eval()
    with torch.no_grad():
        final_predictions = model(X_test_tensor)
        mse = criterion(final_predictions, y_test_tensor).item()

        # Convert back to original scale for additional metrics
        y_pred = scaler_y.inverse_transform(final_predictions.cpu().numpy())
        y_true = scaler_y.inverse_transform(y_test_tensor.cpu().numpy())

        mae = np.mean(np.abs(y_true - y_pred))
        r2 = 1 - (np.sum((y_true - y_pred) ** 2) /
                 np.sum((y_true - np.mean(y_true)) ** 2))

    validation_metrics = {
        'mse': mse,
        'mae': mae,
        'r2_score': r2,
        'train_losses': train_losses,
        'val_losses': val_losses
    }

    # Define prediction function
    def predict(new_inputs):
        """
        Predict outputs for new input data

        Parameters:
        new_inputs (np.array): 2D array of shape (n_samples, 2)

        Returns:
        np.array: Predicted outputs in original scale
        """
        model.eval()
        if len(new_inputs.shape) == 1:
            new_inputs = new_inputs.reshape(1, -1)

        if new_inputs.shape[1] != 2:
            raise ValueError("Input must have 2 features")

        inputs_scaled = scaler_X.transform(new_inputs)
        inputs_tensor = torch.tensor(inputs_scaled, dtype=torch.float32).to(device)

        with torch.no_grad():
            pred_scaled = model(inputs_tensor)

        pred = scaler_y.inverse_transform(pred_scaled.cpu().numpy())
        return pred.flatten()

    # Return predictions if input_data provided
    if input_data is not None:
        predictions = predict(input_data)
        return predict, predictions, validation_metrics
    return predict, None, validation_metrics

In [4]:
# Example usage:
if __name__ == "__main__":
    # Train the model and get prediction function with validation
    predict_fn, _, metrics = train_and_predict(verbose=True)

    print("\nValidation Metrics:")
    print(f"MSE: {metrics['mse']:.6f}")
    print(f"MAE: {metrics['mae']:.6f}")
    print(f"R2 Score: {metrics['r2_score']:.6f}")

Epoch 1/20, Train Loss: 0.009543, Val Loss: 0.000573
Epoch 2/20, Train Loss: 0.002317, Val Loss: 0.000930
Epoch 3/20, Train Loss: 0.002036, Val Loss: 0.001004
Epoch 4/20, Train Loss: 0.001938, Val Loss: 0.000105
Epoch 5/20, Train Loss: 0.002074, Val Loss: 0.000438
Epoch 6/20, Train Loss: 0.001576, Val Loss: 0.000153
Epoch 7/20, Train Loss: 0.001547, Val Loss: 0.000204
Epoch 8/20, Train Loss: 0.001531, Val Loss: 0.000114
Epoch 9/20, Train Loss: 0.001511, Val Loss: 0.000311
Epoch 10/20, Train Loss: 0.001407, Val Loss: 0.000298
Epoch 11/20, Train Loss: 0.001222, Val Loss: 0.000350
Epoch 12/20, Train Loss: 0.001167, Val Loss: 0.000075
Epoch 13/20, Train Loss: 0.001178, Val Loss: 0.000323
Epoch 14/20, Train Loss: 0.001068, Val Loss: 0.000131
Epoch 15/20, Train Loss: 0.001046, Val Loss: 0.000119
Epoch 16/20, Train Loss: 0.000914, Val Loss: 0.000230
Epoch 17/20, Train Loss: 0.000946, Val Loss: 0.000082
Epoch 18/20, Train Loss: 0.000921, Val Loss: 0.000181
Epoch 19/20, Train Loss: 0.000871, Va

In [5]:
predict([-8.5,1.2])

NameError: name 'predict' is not defined