In [2]:
import pandas as pd
import torch 
import matplotlib.pyplot as plt
import numpy as np
import os
from functools import lru_cache
import gc
from sklearn.model_selection import train_test_split
from scipy.spatial.distance import pdist, squareform

In [3]:
p1 = os.path.expanduser('~/Downloads/transmissions/size_10/lead_size_10_conc_7_config_659.csv')
data = np.loadtxt(p1, delimiter=',', skiprows=1)[:,1]

In [4]:
size = 10

In [5]:
@lru_cache(maxsize=1024)
def configs(conc):
    df = {}
    for x in range(5000):
        path = os.path.expanduser(f'~/Downloads/transmissions/size_10/lead_size_10_conc_{conc}_config_{x}.csv')
        new_data = np.loadtxt(path, delimiter=',',skiprows=1)[:,1]
        df[x] = new_data
    df = pd.DataFrame(df)
    df.index = np.arange(0, 4, 0.01)
    return df.clip(upper=size)


DEVICE_COMBS = {}

@lru_cache(maxsize=1024)
def chosen_for_config(n, size, config):
    width = int(size); n = int(n); cfg = int(config)
    if width not in DEVICE_COMBS:
        DEVICE_COMBS[width] = np.array([(i, j) for i in range(100) for j in range(width)], dtype=int)
    device_combs = DEVICE_COMBS[width]
    rng = np.random.RandomState(cfg)
    chosen_indices = rng.choice(len(device_combs), size=n, replace=False)
    return device_combs[chosen_indices]


def possible_combs(n, width):
    def combs_for_seed(x):
        return chosen_for_config(n, width, x)
    return combs_for_seed

@lru_cache(maxsize=1024)
def distance_matrix(conc,config):
    imps  = possible_combs(conc, size)
    item = imps(config)
    #print(np.diag(item[:,1]))
    x = np.stack((size - item[:,1],item[:,1]), axis=1)
    return squareform(pdist(item, metric='euclidean')) + np.diag(x.min(axis=1))

array([[ 3.        , 27.65863337,  8.06225775, 30.        , 30.14962686,
        33.73425559, 33.13608305],
       [27.65863337,  1.        , 35.35533906, 57.31491952, 57.07889277,
        60.00833275,  6.70820393],
       [ 8.06225775, 35.35533906,  4.        , 22.02271555, 22.09072203,
        25.70992026, 41.0487515 ],
       [30.        , 57.31491952, 22.02271555,  3.        ,  3.        ,
         7.61577311, 63.07138812],
       [30.14962686, 57.07889277, 22.09072203,  3.        ,  4.        ,
         5.        , 63.        ],
       [33.73425559, 60.00833275, 25.70992026,  7.61577311,  5.        ,
         0.        , 66.12110102],
       [33.13608305,  6.70820393, 41.0487515 , 63.07138812, 63.        ,
        66.12110102,  4.        ]])

In [6]:
def create_dataset():
    x = []
    y = []
    for conc in np.arange(7,50,2):
        print(f"Processing concentration: {conc}")
        for config in range(5000):
            dist_mat = 0 *np.eye(50)
            dist_mat[:conc,:conc] +=  distance_matrix(conc, config)
            
            arr = configs(conc).index,configs(conc)[config]
            arr = np.array(arr).T
            y.append(dist_mat)
            x.append(arr)


    x = np.array(x)
    y = np.array(y)
    print(f"Shape of x: {x.shape}")
    print(f"Shape of y: {y.shape}")

    return x,y

x,y = create_dataset()

Processing concentration: 7
Processing concentration: 9
Processing concentration: 11
Processing concentration: 13
Processing concentration: 15
Processing concentration: 17
Processing concentration: 19
Processing concentration: 21
Processing concentration: 23
Processing concentration: 25
Processing concentration: 27
Processing concentration: 29
Processing concentration: 31
Processing concentration: 33
Processing concentration: 35
Processing concentration: 37
Processing concentration: 39
Processing concentration: 41
Processing concentration: 43
Processing concentration: 45
Processing concentration: 47
Processing concentration: 49
Shape of x: (110000, 400, 2)
Shape of y: (110000, 50, 50)


In [15]:
x = np.transpose(x, (0, 2, 1))
print(x.shape, y.shape)

(110000, 2, 400) (110000, 50, 50)


In [7]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset, Dataset, random_split
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import Dataset, DataLoader

In [8]:
full_dataset = MatrixDataset(x, y)
loader = DataLoader(full_dataset, batch_size=128, shuffle=True)

NameError: name 'MatrixDataset' is not defined

In [9]:
# Step 1: Define Custom Dataset
class SensorDistanceDataset(Dataset):
    """Custom dataset for sensor data -> distance matrix prediction"""
    
    def __init__(self, sensor_data, distance_matrices):
        """
        Args:
            sensor_data: numpy array of shape (n_samples, n_sensors, n_features)
            distance_matrices: numpy array of shape (n_samples, n_points, n_points)
        """
        self.sensor_data = torch.FloatTensor(sensor_data)
        self.distance_matrices = torch.FloatTensor(distance_matrices)
        
    def __len__(self):
        return len(self.sensor_data)
    
    def __getitem__(self, idx):
        return self.sensor_data[idx], self.distance_matrices[idx]


In [10]:
# Step 2: Define the Neural Network Model
class SensorToDistanceMatrixModel(nn.Module):
    def __init__(self, n_sensors, n_features, matrix_size, hidden_dims=[512, 256, 128]):
        """
        Args:
            n_sensors: number of sensors
            n_features: number of features per sensor
            matrix_size: size of the square distance matrix (n_points)
            hidden_dims: list of hidden layer dimensions
        """
        super(SensorToDistanceMatrixModel, self).__init__()
        
        self.n_sensors = n_sensors
        self.n_features = n_features
        self.matrix_size = matrix_size
        self.output_size = matrix_size * matrix_size
        
        # Input size calculation
        input_size = n_sensors * n_features
        
        # Build the network layers
        layers = []
        prev_dim = input_size
        
        # Hidden layers
        for hidden_dim in hidden_dims:
            layers.extend([
                nn.Linear(prev_dim, hidden_dim),
                nn.ReLU(),
                nn.Dropout(0.2)
            ])
            prev_dim = hidden_dim
        
        # Output layer
        layers.append(nn.Linear(prev_dim, self.output_size))
        
        self.network = nn.Sequential(*layers)
        
    def forward(self, x):
        # Flatten sensor data: (batch_size, n_sensors, n_features) -> (batch_size, n_sensors * n_features)
        x = x.view(x.size(0), -1)
        
        # Pass through network
        output = self.network(x)
        
        # Reshape to distance matrix: (batch_size, matrix_size, matrix_size)
        distance_matrix = output.view(-1, self.matrix_size, self.matrix_size)
        
        # Ensure symmetry (distance matrices should be symmetric)
        distance_matrix = (distance_matrix + distance_matrix.transpose(-2, -1)) / 2
        
        # Ensure diagonal is zero (distance from point to itself)
        mask = torch.eye(self.matrix_size, device=distance_matrix.device).bool()
        distance_matrix = distance_matrix.masked_fill(mask.unsqueeze(0), 0)
        
        return distance_matrix



In [11]:
def train_model(model, train_loader, val_loader, num_epochs=1000, learning_rate=0.001):
    """Train the model"""
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    
    # Loss function and optimizer
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=10)
    
    train_losses = []
    val_losses = []
    
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0.0
        
        for batch_idx, (sensor_data, target_matrices) in enumerate(train_loader):
            sensor_data, target_matrices = sensor_data.to(device), target_matrices.to(device)
            
            optimizer.zero_grad()
            outputs = model(sensor_data)
            loss = criterion(outputs, target_matrices)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
        
        avg_train_loss = train_loss / len(train_loader)
        train_losses.append(avg_train_loss)
        
        # Validation phase
        model.eval()
        val_loss = 0.0
        
        with torch.no_grad():
            for sensor_data, target_matrices in val_loader:
                sensor_data, target_matrices = sensor_data.to(device), target_matrices.to(device)
                outputs = model(sensor_data)
                loss = criterion(outputs, target_matrices)
                val_loss += loss.item()
        
        avg_val_loss = val_loss / len(val_loader)
        val_losses.append(avg_val_loss)
        
        # Learning rate scheduling
        scheduler.step(avg_val_loss)
        
        if epoch % 10 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}')
    
    return train_losses, val_losses


In [12]:

# Step 4: Usage Example
def main():
    # Example parameters (adjust based on your data)
    n_samples = 110000
    n_sensors = 2
    n_features = 400
    matrix_size = 50  # 50x50 distance matrix

    # Generate dummy data (replace with your actual data)
    sensor_data = x
    distance_matrices = y
    
    # Make distance matrices symmetric and zero diagonal
    for i in range(n_samples):
        distance_matrices[i] = (distance_matrices[i] + distance_matrices[i].T) / 2
        np.fill_diagonal(distance_matrices[i], 0)
    
    # Split data
    split_idx = int(0.8 * n_samples)
    train_sensor = sensor_data[:split_idx]
    train_matrices = distance_matrices[:split_idx]
    val_sensor = sensor_data[split_idx:]
    val_matrices = distance_matrices[split_idx:]
    
    # Create datasets and dataloaders
    train_dataset = SensorDistanceDataset(train_sensor, train_matrices)
    val_dataset = SensorDistanceDataset(val_sensor, val_matrices)
    
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
    
    # Create model with your data dimensions
    model = SensorToDistanceMatrixModel(
        n_sensors=n_sensors,        # 2 sensors
        n_features=n_features,      # 400 features each
        matrix_size=matrix_size,    # 50x50 output matrix
        hidden_dims=[1024, 512, 256, 128]  # Larger network for your data size
    )
    
    print(f"Model has {sum(p.numel() for p in model.parameters())} parameters")
    
    # Train model
    train_losses, val_losses = train_model(model, train_loader, val_loader, num_epochs=50)
    
    # Save model
    torch.save(model.state_dict(), 'distance_matrix_model.pth')
    print("Model saved!")
    
    return model, train_losses, val_losses

# Step 5: Inference Function
def predict_distance_matrix(model, sensor_input):
    """Make prediction on new sensor data"""
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    model.eval()
    
    with torch.no_grad():
        if isinstance(sensor_input, np.ndarray):
            sensor_input = torch.FloatTensor(sensor_input)
        
        sensor_input = sensor_input.to(device)
        if len(sensor_input.shape) == 2:  # Add batch dimension if needed
            sensor_input = sensor_input.unsqueeze(0)
        
        predicted_matrix = model(sensor_input)
        return predicted_matrix.cpu().numpy()

if __name__ == "__main__":
    model, train_losses, val_losses = main()

Model has 1831748 parameters
Epoch [1/50], Train Loss: 247.7451, Val Loss: 893.8703
Epoch [11/50], Train Loss: 233.6884, Val Loss: 899.9047
Epoch [21/50], Train Loss: 229.9118, Val Loss: 902.9967
Epoch [31/50], Train Loss: 229.0682, Val Loss: 904.6174
Epoch [41/50], Train Loss: 229.1462, Val Loss: 904.0324
Model saved!
