<a href="https://colab.research.google.com/github/jsuj1th/Colab/blob/main/DL/CSCE_636_600_Spring_2025_Project_3_%2B_Sujith_Julakanti_%2B_335007274.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tamu_csce_636_project1

Collecting tamu_csce_636_project1
  Downloading tamu_csce_636_project1-0.0.7-py3-none-any.whl.metadata (1.5 kB)
Downloading tamu_csce_636_project1-0.0.7-py3-none-any.whl (14 kB)
Installing collected packages: tamu_csce_636_project1
Successfully installed tamu_csce_636_project1-0.0.7


In [None]:
# Dataset_generation:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-


from mpi4py import MPI
import numpy as np
import pandas as pd
from scipy.optimize import linprog
from itertools import combinations, product
import os
from tqdm import tqdm

# MPI setup
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
size = comm.Get_size()

# Set a unique random seed per job and rank to ensure different matrix generation
np.random.seed(3)

output_dir = "/scratch/user/haikookhandor/DL/datasets_mpi/"
os.makedirs(output_dir, exist_ok=True)

# Parameters
max_valid_samples = 10000 # total valid samples per rank for debugging
save_threshold_bytes = 5 * 1024 * 1024

# (n, k, m) combinations
allowed_combinations = [
    (9, 4, [2, 3,4,5]),
    (9, 5, [2, 3,4]),
    (9, 6, [2,3]),
    (10, 4, [2, 3,4,5,6]),
    (10, 5, [2, 3,4,5]),
    (10, 6, [2,3,4]),
]
triplets = [(n, k, m) for n, k, ms in allowed_combinations for m in ms]

# Divide combinations across ranks
combo_chunks = np.array_split(triplets, size)
my_combos = combo_chunks[rank]

def generate_all_tuples(n, m):
    for a in range(n):
        for b in range(n):
            if b == a:
                continue
            rest = [i for i in range(n) if i not in {a, b}]
            for X in combinations(rest, m - 1):
                for psi in product([-1, 1], repeat=m):
                    yield (a, b, X, psi)

def solve_lp(G, k, n, m, a, b, X, psi):
    Y = [i for i in range(n) if i not in set(X).union({a, b})]
    x_list = [a] + sorted(X) + [b] + sorted(Y)
    tau_inv = {v: i for i, v in enumerate(x_list)}
    c = -psi[0] * G[:, a]
    A_ub, b_ub = [], []

    for j in X:
        row = (psi[tau_inv[j]] * G[:, j] - psi[0] * G[:, a])
        A_ub.append(row)
        b_ub.append(0)
        row = -psi[tau_inv[j]] * G[:, j]
        A_ub.append(row)
        b_ub.append(-1)

    for j in Y:
        A_ub.append(G[:, j])
        b_ub.append(1)
        A_ub.append(-G[:, j])
        b_ub.append(1)

    A_eq = [G[:, b]]
    b_eq = [1]

    res = linprog(c, A_ub=np.array(A_ub), b_ub=np.array(b_ub),
                  A_eq=np.array(A_eq), b_eq=np.array(b_eq),
                  method='highs')

    if res.status == 0:
        return -res.fun
    elif res.status == 3:
        return np.inf
    else:
        return 0

def compute_m_height(G, k, n, m):
    max_h = 1
    for a, b, X, psi in generate_all_tuples(n, m):
        h = solve_lp(G, k, n, m, a, b, X, psi)
        if h == np.inf:
            return np.inf
        if h > max_h:
            max_h = h
    return max_h

# Initialize global sample buffer
samples = []
file_counter = 0
total_valid_samples = 0

with tqdm(total=max_valid_samples, disable=(rank != 0)) as pbar:
    while total_valid_samples < max_valid_samples:
        for n, k, m in my_combos:
            if total_valid_samples >= max_valid_samples:
                break

            P = np.random.uniform(-100, 100, size=(k, n - k))
            G = np.hstack([np.eye(k), P])
            h_m = compute_m_height(G, k, n, m)

            if h_m != np.inf and h_m > 0:
                samples.append({
                    "n": n,
                    "k": k,
                    "m": m,
                    "h_m": h_m,
                    "P": P.flatten().tolist()
                })
                total_valid_samples += 1
                pbar.update(1)

                if len(samples) >= 10:
                    df = pd.DataFrame(samples)
                    mem_size = df.memory_usage(deep=True).sum()
                    if mem_size >= save_threshold_bytes:
                        filename = f"{output_dir}/rank{rank}_part{file_counter}.csv"
                        df.to_csv(filename, index=False)
                        print(f"[Rank {rank}] Saved {filename} ({mem_size / (1024**2):.2f} MB)")
                        file_counter += 1
                        samples = []

# Save any remaining samples
if samples:
    df = pd.DataFrame(samples)
    filename = f"{output_dir}/rank{rank}_final.csv"
    df.to_csv(filename, index=False)
    print(f"[Rank {rank}] Saved {filename} (final)")

ModuleNotFoundError: No module named 'mpi4py'

In [None]:
import os
import ast
import csv
import time
import joblib
import numpy as np
import pandas as pd
from numpy.linalg import svd, norm, cond
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from torch.utils.data import DataLoader, TensorDataset
import torch
import torch.nn as nn
from typing import Tuple

# ---------------------------
# Configuration
# ---------------------------
INPUT_PATH = '/Users/sujithjulakanti/Desktop/Colab/DL/DL_Project/results_dataframe.pkl'
MODEL_SAVE_DIR = 'model_dir/saved_models_14'
LOG_FILE = 'log_dir/training_progress_14.log'
CSV_PATH = 'DL_APPROACH/pred_dir/test_predictions_14.csv'
SCALER_PATH = 'DL_APPROACH/scaler_dir/scaler_14.pkl'

os.makedirs(os.path.dirname(MODEL_SAVE_DIR), exist_ok=True)
os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)
os.makedirs(os.path.dirname(CSV_PATH), exist_ok=True)
os.makedirs(os.path.dirname(SCALER_PATH), exist_ok=True)

N_VAL, K_VAL, M_VAL = 10, 4, 5
EXPECTED_P_LEN = 25

SAVE_EVERY_N_EPOCHS = 10
BATCH_SIZE = 512
EPOCHS = 100
PATIENCE = 35
LEARNING_RATE = 1e-3

# ---------------------------
# Utility Functions
# ---------------------------
def load_and_filter_data(path: str, n_val: int, k_val: int, m_val: int) -> pd.DataFrame:
    df = joblib.load(path)
    print(f"Loaded data: {df.shape}")
    df = df[(df['n'] == n_val) & (df['k'] == k_val) & (df['m'] == m_val)]
    if isinstance(df['P'].iloc[0], str):
        df['P'] = df['P'].apply(ast.literal_eval)
    return df

def preprocess_data(df: pd.DataFrame) -> Tuple[np.ndarray, np.ndarray]:
    max_nk = max((df['n'] - df['k']) * df['k'])
    X, y = [], df['result'].values.astype(np.float32)

    for _, row in df.iterrows():
        n, k, m = int(row['n']), int(row['k']), int(row['m'])
        P_matrix = np.array(row['P'], dtype=np.float32).reshape(k, n - k)

        padded_P = np.zeros((k, n - k), dtype=np.float32)
        padded_P[:P_matrix.shape[0], :P_matrix.shape[1]] = P_matrix

        features = np.concatenate([
            np.array([n, k, m], dtype=np.float32),
            padded_P.flatten(),
            np.mean(padded_P, axis=1), np.std(padded_P, axis=1),
            np.mean(padded_P, axis=0), np.std(padded_P, axis=0),
            np.linalg.norm(padded_P, axis=1),
            [norm(padded_P, ord='fro'), cond(padded_P) if np.linalg.cond(padded_P) else 1e6],
            np.pad(svd(padded_P, full_matrices=False)[1][:5], (0, 5 - len(svd(padded_P, full_matrices=False)[1])), constant_values=0)
        ])
        X.append(features)
    return np.array(X, dtype=np.float32), y

def save_scaler(X: np.ndarray, path: str) -> np.ndarray:
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    joblib.dump(scaler, path)
    return X_scaled

def prepare_dataloaders(X: np.ndarray, y: np.ndarray) -> Tuple[DataLoader, DataLoader, DataLoader]:
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

    train_loader = DataLoader(TensorDataset(torch.tensor(X_train), torch.tensor(y_train).unsqueeze(1)), batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(TensorDataset(torch.tensor(X_val), torch.tensor(y_val).unsqueeze(1)), batch_size=BATCH_SIZE)
    test_loader = DataLoader(TensorDataset(torch.tensor(X_test), torch.tensor(y_test).unsqueeze(1)), batch_size=BATCH_SIZE)
    return train_loader, val_loader, test_loader

class MHeightRegressor(nn.Module):
    def __init__(self, input_dim: int):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 512), nn.BatchNorm1d(512), nn.ReLU(),
            nn.Linear(512, 256), nn.BatchNorm1d(256), nn.ReLU(),
            nn.Linear(256, 200), nn.BatchNorm1d(200), nn.ReLU(),
            nn.Linear(200, 128), nn.BatchNorm1d(128), nn.ReLU(),
            nn.Linear(128, 64), nn.BatchNorm1d(64), nn.ReLU(),
            nn.Linear(64, 32), nn.BatchNorm1d(32), nn.ReLU(),
            nn.Linear(32, 16), nn.BatchNorm1d(16), nn.ReLU(),
            nn.Linear(16, 1), nn.Softplus()
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.net(x) + 1

def evaluate(model: nn.Module, loader: DataLoader, criterion: nn.Module, device: torch.device) -> float:
    model.eval()
    loss_total = 0
    with torch.no_grad():
        for xb, yb in loader:
            xb, yb = xb.to(device), yb.to(device)
            preds = torch.log2(model(xb))
            ylog = torch.log2(yb)
            loss_total += criterion(preds, ylog).item() * xb.size(0)
    return loss_total / len(loader.dataset)

def save_predictions(preds: list, targets: list, inputs: list, csv_path: str):
    header = ['n', 'k', 'm'] + [f'P{i}' for i in range(EXPECTED_P_LEN)] + ['true_result', 'predicted_result']
    with open(csv_path, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(header)
        for x_vec, y_true, y_pred in zip(inputs, targets, preds):
            writer.writerow(list(x_vec[:3]) + list(x_vec[3:]) + [y_true, y_pred])

    simple_csv_path = csv_path.replace('.csv', '_simple.csv')
    with open(simple_csv_path, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['true_result', 'predicted_result'])
        for y_true, y_pred in zip(targets, preds):
            writer.writerow([y_true, y_pred])
    print(f"Predictions saved to {csv_path} and {simple_csv_path}")

# ---------------------------
# Main Execution
# ---------------------------
os.makedirs(MODEL_SAVE_DIR, exist_ok=True)
device = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")

print("Using device:", device)

df = load_and_filter_data(INPUT_PATH, N_VAL, K_VAL, M_VAL)
X, y = preprocess_data(df)
X_scaled = save_scaler(X, SCALER_PATH)
train_loader, val_loader, test_loader = prepare_dataloaders(X_scaled, y)

input_dim = X_scaled.shape[1]
model = MHeightRegressor(input_dim).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
criterion = nn.MSELoss()

best_val_loss = float('inf')
best_epoch = 0

with open(LOG_FILE, 'w') as f:
    f.write(f"Training started at {time.ctime()}\n")

for epoch in range(1, EPOCHS + 1):
    model.train()
    for xb, yb in train_loader:
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        loss = criterion(torch.log2(model(xb)), torch.log2(yb))
        loss.backward()
        optimizer.step()

    val_loss = evaluate(model, val_loader, criterion, device)
    print(f"Epoch {epoch}: Validation Loss = {val_loss:.6f}")
    with open(LOG_FILE, 'a') as f:
        f.write(f"Epoch {epoch}: Validation Loss = {val_loss:.6f}\n")

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_epoch = epoch
        torch.save(model.state_dict(), os.path.join(MODEL_SAVE_DIR, 'best_model_13-2.pt'))
    elif epoch - best_epoch >= PATIENCE:
        print(f"Early stopping at epoch {epoch}. Best epoch was {best_epoch}.")
        break

    if epoch % SAVE_EVERY_N_EPOCHS == 0:
        torch.save(model.state_dict(), os.path.join(MODEL_SAVE_DIR, f'model_epoch{epoch}.pt'))

# ---------------------------
# Final Evaluation
# ---------------------------
model.load_state_dict(torch.load(os.path.join(MODEL_SAVE_DIR, 'best_model_14.pt')))
model.eval()

all_preds, all_targets, all_inputs = [], [], []

with torch.no_grad():
    for xb, yb in test_loader:
        xb = xb.to(device)
        preds = torch.log2(model(xb)).cpu()
        ylog = torch.log2(yb)
        all_preds.extend(preds.flatten())
        all_targets.extend(ylog.flatten())
        all_inputs.extend(xb.cpu().numpy())

r2 = r2_score(all_targets, all_preds)
mae = mean_absolute_error(all_targets, all_preds)
mse = mean_squared_error(all_preds, all_targets)
test_loss = evaluate(model, test_loader, criterion, device)

with open(LOG_FILE, 'a') as f:
    f.write(f"Training ended at {time.ctime()}\n")
    f.write(f"Best Epoch: {best_epoch} | Best Val Loss: {best_val_loss:.6f}\n")
    f.write(f"Test Loss: {test_loss:.6f} | R² Score: {r2:.4f} | MAE: {mae:.4f} | MSE: {mse:.4f}\n")

save_predictions(all_preds, all_targets, all_inputs, CSV_PATH)

print(f"Test Results -- Loss: {test_loss:.6f}, R2: {r2:.4f}, MAE: {mae:.4f}, MSE: {mse:.4f}")


In [None]:
from tamu_csce_636_project1 import Evaluator
import torch
import torch.nn as nn
import numpy as np
import joblib
import torch
import torch.nn as nn
import numpy as np
import joblib
from numpy.linalg import svd, norm, cond
from typing import List, Tuple
# Initialize the evaluator
evaluator = Evaluator(
    first_name="Sujith",
    last_name="Julakantu",
    email="s02@tamu.edu",
    print=False,
)
# === Paths ===
MODEL_WEIGHTS_PATH = '/content/best_model_14.pt'
SCALER_PATH = '/content/scaler_14.pkl'

# === Define the model architecture (must match training exactly) ===
class MHeightRegressor(nn.Module):
    def __init__(self, input_dim: int):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 512), nn.BatchNorm1d(512), nn.ReLU(),
            nn.Linear(512, 256), nn.BatchNorm1d(256), nn.ReLU(),
            nn.Linear(256, 200), nn.BatchNorm1d(200), nn.ReLU(),
            nn.Linear(200, 128), nn.BatchNorm1d(128), nn.ReLU(),
            nn.Linear(128, 64), nn.BatchNorm1d(64), nn.ReLU(),
            nn.Linear(64, 32), nn.BatchNorm1d(32), nn.ReLU(),
            nn.Linear(32, 16), nn.BatchNorm1d(16), nn.ReLU(),
            nn.Linear(16, 1),
            nn.Softplus()
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.net(x) + 1

# === Preprocessing function for new test samples ===
def preprocess_new_data(samples: List[Tuple[int, int, int, np.ndarray]]) -> np.ndarray:
    features = []
    TARGET_FEATURE_SIZE = 58  # Always create 58 features

    for n, k, m, P_matrix in samples:
        P_matrix = np.array(P_matrix, dtype=np.float32).reshape(k, n - k)

        padded_P = np.zeros((k, n - k), dtype=np.float32)
        padded_P[:P_matrix.shape[0], :P_matrix.shape[1]] = P_matrix

        basic_features = np.array([n, k, m], dtype=np.float32)
        flat_P = padded_P.flatten()
        row_mean = np.mean(padded_P, axis=1)
        row_std = np.std(padded_P, axis=1)
        col_mean = np.mean(padded_P, axis=0)
        col_std = np.std(padded_P, axis=0)
        row_l2 = np.linalg.norm(padded_P, axis=1)
        frob_norm = norm(padded_P, ord='fro')

        try:
            cond_number = cond(padded_P)
        except np.linalg.LinAlgError:
            cond_number = 1e6

        try:
            U, s_vals, Vt = svd(padded_P, full_matrices=False)
        except np.linalg.LinAlgError:
            s_vals = np.zeros(min(k, n - k))

        top_singulars = np.pad(s_vals[:5], (0, 5 - len(s_vals)), constant_values=0)

        feature_vec = np.concatenate([
            basic_features,
            flat_P,
            row_mean, row_std,
            col_mean, col_std,
            row_l2,
            [frob_norm, cond_number],
            top_singulars
        ])

        # === Important Padding Step ===
        if feature_vec.shape[0] < TARGET_FEATURE_SIZE:
            pad_width = TARGET_FEATURE_SIZE - feature_vec.shape[0]
            feature_vec = np.pad(feature_vec, (0, pad_width), constant_values=0)
        elif feature_vec.shape[0] > TARGET_FEATURE_SIZE:
            feature_vec = feature_vec[:TARGET_FEATURE_SIZE]  # Trim if somehow too long

        features.append(feature_vec)

    return np.array(features, dtype=np.float32)

# === Load the scaler and model ===
scaler = joblib.load(SCALER_PATH)
device = torch.device('cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu')

# Build dummy input to get correct input dimension
dummy_sample = (10, 4, 5, np.random.rand(4, 6))
dummy_features = preprocess_new_data([dummy_sample])
input_dim = dummy_features.shape[1]

model = MHeightRegressor(input_dim)
model.load_state_dict(torch.load(MODEL_WEIGHTS_PATH, map_location=device))
model = model.to(device)
model.eval()

# === Prediction function ===
def predict_m_heights(n: int, k: int, m: int, list_of_P_matrices: List[np.ndarray]) -> torch.Tensor:
    samples = []
    for P_matrix in list_of_P_matrices:
        samples.append((n, k, m, P_matrix))

    X_new = preprocess_new_data(samples)
    X_scaled = scaler.transform(X_new)
    X_tensor = torch.tensor(X_scaled, dtype=torch.float32).to(device)

    with torch.no_grad():
        preds = model(X_tensor).squeeze(1)

    return preds.cpu().numpy().tolist()  # Evaluator expects pure Python (not torch.Tensor)

σ = evaluator.eval(
    inputs={
        '[5,2,2]': [
            np.array([
                [0.4759809, 0.9938236, 0.819425],
                [-0.8960798, -0.7442706, 0.3345122],
            ]),
        ],
    },
    outputs={
        '[5,2,2]': [1.9242387],
    },
    func=predict_m_heights,
)

# Print evaluation result
print("Evaluation result:", dict(σ))

Evaluation result: {(5, 2, 2): 83.5453074185373}
