In [3]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt

# Load and merge data
file_path = "./static_data_summary.csv"
data = pd.read_csv(file_path)

downtown_file_path = "./Downtown_Tracts.csv"
downtown_data = pd.read_csv(downtown_file_path)
downtown_data['is_downtown'] = 0

data = pd.merge(data, downtown_data[['TractID', 'is_downtown']], left_on='geoid10', right_on='TractID', how='left')
data['is_downtown'].fillna(1, inplace=True)
data['is_downtown'] = data['is_downtown'].astype(int)

ridesourcing_file_path = "./Ridesourcing_CensusCount_ALL_0_Filled.csv"
ridesourcing_data = pd.read_csv(ridesourcing_file_path)
ridesourcing_data['tract_id'] = ridesourcing_data['index']
ridesourcing_data = ridesourcing_data.drop(columns=['index'])
ridesourcing_data['total_travel_demand'] = ridesourcing_data.sum(axis=1)

data = pd.merge(data, ridesourcing_data[['tract_id', 'total_travel_demand']], left_on='geoid10', right_on='tract_id', how='left')

# Apply log transformation to target variable
data['log_total_travel_demand'] = np.log1p(data['total_travel_demand'])

# Sort data by 'is_downtown' to make the penalty condition more meaningful
data = data.sort_values(by=['is_downtown', 'geoid10'])

# Extract features and labels
features = data.drop(columns=['geoid10', 'total_travel_demand', 'tract_id', 'TractID', 'log_total_travel_demand'])
labels = data['log_total_travel_demand']

# Normalize features using Min-Max Scaler
scaler = MinMaxScaler()
features = scaler.fit_transform(features)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Convert to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

# Create dataloaders
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataset = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Feedforward neural network with 3 hidden layers (64 neurons each) and ReLU activations.
class MonotonicNN(nn.Module):
    def __init__(self):
        super(MonotonicNN, self).__init__()
        self.fc1 = nn.Linear(1, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 64)
        self.fc4 = nn.Linear(64, 1)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.fc4(x)
        return x

# Loss function with monotonicity penalty
def monotonicity_penalty(preds, monotonic_indices, weight=1.0):
    penalty = 0.0
    for idx in monotonic_indices:
        if preds[idx] < preds[idx - 1]:
            penalty += (preds[idx - 1] - preds[idx]) ** 2
    return weight * penalty

# Total loss function
def total_loss_function(preds, targets, monotonic_indices, penalty_weight=1.0):
    mse_loss = nn.MSELoss()(preds, targets)
    penalty = monotonicity_penalty(preds, monotonic_indices, weight=penalty_weight)
    total_loss = mse_loss + penalty
    return total_loss, mse_loss, penalty


# Generate some synthetic data
torch.manual_seed(0)
x_train = torch.linspace(0, 1, 100).view(-1, 1)
y_train = torch.sin(2 * torch.pi * x_train) + 0.1 * torch.randn_like(x_train)

# Initialize model, loss function, and optimizer
model = MonotonicNN()
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-5)

# Training loop
epochs = 100
penalty_weight = 10.0

for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    preds = model(x_train)
    
    monotonic_indices = range(1, len(preds))
    loss, mse_loss, penalty = total_loss_function(preds, y_train, monotonic_indices, penalty_weight=penalty_weight)
    
    loss.backward()
    optimizer.step()
    
    print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}, MSE: {mse_loss.item():.4f}, Penalty: {penalty.item():.4f}')

Epoch [1/100], Loss: 0.5087, MSE: 0.5087, Penalty: 0.0000
Epoch [2/100], Loss: 0.4543, MSE: 0.4496, Penalty: 0.0047
Epoch [3/100], Loss: 0.4001, MSE: 0.3791, Penalty: 0.0210
Epoch [4/100], Loss: 0.3579, MSE: 0.2938, Penalty: 0.0641
Epoch [5/100], Loss: 0.3494, MSE: 0.2290, Penalty: 0.1203
Epoch [6/100], Loss: 0.3570, MSE: 0.2255, Penalty: 0.1315
Epoch [7/100], Loss: 0.3515, MSE: 0.2376, Penalty: 0.1139
Epoch [8/100], Loss: 0.3461, MSE: 0.2648, Penalty: 0.0813
Epoch [9/100], Loss: 0.3453, MSE: 0.2758, Penalty: 0.0695
Epoch [10/100], Loss: 0.3406, MSE: 0.2603, Penalty: 0.0803
Epoch [11/100], Loss: 0.3365, MSE: 0.2350, Penalty: 0.1015
Epoch [12/100], Loss: 0.3341, MSE: 0.2238, Penalty: 0.1104
Epoch [13/100], Loss: 0.3294, MSE: 0.2276, Penalty: 0.1019
Epoch [14/100], Loss: 0.3303, MSE: 0.2410, Penalty: 0.0893
Epoch [15/100], Loss: 0.3274, MSE: 0.2415, Penalty: 0.0859
Epoch [16/100], Loss: 0.3262, MSE: 0.2331, Penalty: 0.0931
Epoch [17/100], Loss: 0.3242, MSE: 0.2235, Penalty: 0.1007
Epoch 