# Original

In [16]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# 1. Load and preprocess ML-100k with normalization
def load_ml100k(base_path, split='u1'):
    train = pd.read_csv(f'{base_path}/{split}.base', sep='\t', header=None,
                        names=['user','item','rating','ts'])
    test = pd.read_csv(f'{base_path}/{split}.test', sep='\t', header=None,
                       names=['user','item','rating','ts'])
    items = pd.read_csv(f'{base_path}/u.item', sep='|', header=None,
                        encoding='latin-1', usecols=range(5,24))
    P = torch.FloatTensor(items.values)

    nb_users = int(max(train.user.max(), test.user.max()))
    nb_items = int(max(train.item.max(), test.item.max()))

    def to_matrix(df):
        mat = np.zeros((nb_users, nb_items), dtype=np.float32)
        mask = np.zeros_like(mat, dtype=np.float32)
        for u,i,r,_ in df.itertuples(index=False):
            mat[u-1,i-1] = r / 5.0  # Normalize to [0,1]
            mask[u-1,i-1] = 1
        return mat, mask

    R_train, M_train = to_matrix(train)
    R_test, M_test = to_matrix(test)
    return (torch.FloatTensor(R_train), torch.FloatTensor(M_train),
            torch.FloatTensor(R_test), torch.FloatTensor(M_test),
            P, nb_users, nb_items)

# Paths and device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
base_path = '/Users/sivamanipatnala/Desktop/Academics/Sem_6/RS/ml-100k'
R_train, M_train, R_test, M_test, P, U, I = load_ml100k(base_path, split='u1')
R_train, M_train, R_test, M_test, P = [x.to(device) for x in (R_train, M_train, R_test, M_test, P)]

# 2. Model: Joint Dual Semi-AE
class JointDPDAE(nn.Module):
    def __init__(self, n_users, n_items, n_features, latent_dim):
        super().__init__()
        self.u_enc = nn.Sequential(
            nn.Linear(n_items, 200), nn.Sigmoid(),
            nn.Linear(200, latent_dim), nn.Sigmoid()
        )
        self.u_dec = nn.Sequential(
            nn.Linear(latent_dim, 200), nn.Sigmoid(),
            nn.Linear(200, n_items)
        )
        self.i_enc = nn.Sequential(
            nn.Linear(n_users + n_features, 200), nn.Sigmoid(),
            nn.Linear(200, latent_dim), nn.Sigmoid()
        )
        self.i_dec = nn.Sequential(
            nn.Linear(latent_dim, 200), nn.Sigmoid(),
            nn.Linear(200, n_users)
        )

    def forward(self, U_ratings, I_ratings, I_features):
        U_lat = self.u_enc(U_ratings)
        U_rec = self.u_dec(U_lat)
        x = torch.cat([I_ratings, I_features], dim=1)
        I_lat = self.i_enc(x)
        I_rec = self.i_dec(I_lat)
        return U_rec, U_lat, I_rec, I_lat

# 3. Hyperparameters
grad_batch_size = 128
latent_dim = 64
lambda_mf = 0.3
reg_lambda = 1e-5
lr = 0.005
epochs = 80  
max_grad_norm = 0.3
epsilon = 4
sample_rate = grad_batch_size / U  # 128 / 943 ≈ 0.136

# Noise scale for Laplace DP-SGD
noise_scale = max_grad_norm / epsilon  # 1.0 / 4.0 = 0.25

# 4. Initialize model and optimizer
model = JointDPDAE(U, I, P.size(1), latent_dim).to(device)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)

# Create DataLoader
train_dataset = TensorDataset(R_train, M_train)
train_loader = DataLoader(train_dataset, batch_size=grad_batch_size, shuffle=True)

# 5. Training loop with DP-SGD (Laplace noise)
total_epsilon = 0.0
for epoch in range(1, epochs + 1):
    model.train()
    for batch_idx, (batch_R, batch_M) in enumerate(train_loader):
        optimizer.zero_grad()
        
        # Process batch-relevant items
        batch_items = torch.unique(torch.where(batch_M)[1])
        I_ratings = R_train.t()[batch_items]
        I_features = P[batch_items]
        mask_i = M_train.t()[batch_items]
        
        # Forward pass
        U_rec, U_lat, I_rec, I_lat = model(batch_R, I_ratings, I_features)
        
        # Loss calculation
        mask_u = batch_M
        loss_u = ((U_rec - batch_R) ** 2 * mask_u).sum() / mask_u.sum()
        loss_i = ((I_rec - I_ratings) ** 2 * mask_i).sum() / mask_i.sum()
        I_lat_full = model.i_enc(torch.cat([R_train.t(), P], dim=1))
        R_pred = torch.matmul(U_lat, I_lat_full.t())
        loss_mf = ((R_pred - batch_R) ** 2 * mask_u).sum() / mask_u.sum()
        l2_reg = sum(p.pow(2).sum() for p in model.parameters())
        loss = loss_u + loss_i + lambda_mf * loss_mf + reg_lambda * l2_reg
        #loss = 0.1*loss_u + 0.1*loss_i + 0.8*loss_mf + reg_lambda*l2_reg

        
        # Backward pass
        loss.backward()
        
        # DP-SGD: Clip gradients and add Laplace noise
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
        for p in model.parameters():
            if p.grad is not None:
                noise = torch.distributions.Laplace(0, noise_scale).sample(p.grad.shape).to(device)
                p.grad += noise
        
        optimizer.step()
    
    # Update privacy budget (approximate amplification by sampling)
    per_step_epsilon = min(sample_rate, 2 * sample_rate / noise_scale)  # Simplified bound
    total_epsilon += per_step_epsilon
    
    # Evaluation
    if epoch % 10 == 0 or epoch == 1:
        model.eval()
        with torch.no_grad():
            U_rec, U_lat, I_rec, I_lat = model(R_train, R_train.t(), P)
            R_pred = torch.matmul(U_lat, I_lat.t()) * 5.0
            se = ((R_pred - (R_test * 5.0)) ** 2 * M_test).sum()
            rmse = torch.sqrt(se / M_test.sum()).item()
        print(f"Epoch {epoch}/{epochs}: ε={total_epsilon:.2f}, Loss={loss.item():.4f}, RMSE={rmse:.4f}")

# 6. Final Test RMSE
model.eval()
with torch.no_grad():
    U_rec, U_lat, I_rec, I_lat = model(R_train, R_train.t(), P)
    R_pred = torch.matmul(U_lat, I_lat.t()) * 5.0
    se = ((R_pred - (R_test * 5.0)) ** 2 * M_test).sum()
    rmse = torch.sqrt(se / M_test.sum()).item()
print(f"Final Test RMSE: {rmse:.4f}, Total ε={total_epsilon:.2f}")

Epoch 1/80: ε=0.14, Loss=64.9244, RMSE=72.4590
Epoch 10/80: ε=1.36, Loss=13.6398, RMSE=31.4416
Epoch 20/80: ε=2.71, Loss=2.4505, RMSE=8.2346
Epoch 30/80: ε=4.07, Loss=1.4245, RMSE=1.9049
Epoch 40/80: ε=5.43, Loss=1.0874, RMSE=1.3161
Epoch 50/80: ε=6.79, Loss=0.8016, RMSE=1.2486
Epoch 60/80: ε=8.14, Loss=0.7182, RMSE=1.2373
Epoch 70/80: ε=9.50, Loss=0.6431, RMSE=1.2577
Epoch 80/80: ε=10.86, Loss=0.6499, RMSE=1.2133
Final Test RMSE: 1.2133, Total ε=10.86


In [None]:
# IMPOROVED WITH LESS NOISE FOR MORE POPULAR ITEMS

In [29]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# 1. Data Loading (Unchanged)
def load_ml100k(base_path, split='u1'):
    train = pd.read_csv(f'{base_path}/{split}.base', sep='\t', header=None,
                       names=['user','item','rating','ts'])
    test = pd.read_csv(f'{base_path}/{split}.test', sep='\t', header=None,
                      names=['user','item','rating','ts'])
    items = pd.read_csv(f'{base_path}/u.item', sep='|', header=None,
                       encoding='latin-1', usecols=range(5,24))
    P = torch.FloatTensor(items.values)

    nb_users = int(max(train.user.max(), test.user.max()))
    nb_items = int(max(train.item.max(), test.item.max()))

    def to_matrix(df):
        mat = np.zeros((nb_users, nb_items), dtype=np.float32)
        mask = np.zeros_like(mat, dtype=np.float32)
        for u,i,r,_ in df.itertuples(index=False):
            mat[u-1,i-1] = r / 5.0
            mask[u-1,i-1] = 1
        return mat, mask

    R_train, M_train = to_matrix(train)
    R_test, M_test = to_matrix(test)
    return (torch.FloatTensor(R_train), torch.FloatTensor(M_train),
            torch.FloatTensor(R_test), torch.FloatTensor(M_test),
            P, nb_users, nb_items)

# 2. Hyperparameters (Optimized)
grad_batch_size = 128
latent_dim = 64
lambda_mf = 0.3          
reg_lambda = 1e-5
lr = 0.005
epochs = 80
max_grad_norm = 0.3      
epsilon = 4
denoise_prob = 0.1

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
base_path = 'ml-100k'
R_train, M_train, R_test, M_test, P, U, I = load_ml100k(base_path, split='u1')
R_train, M_train, R_test, M_test, P = [x.to(device) for x in (R_train, M_train, R_test, M_test, P)]

# 3. Model (No Forward Noise)
class JointDPDAE_Corrected(nn.Module):
    def __init__(self, n_users, n_items, n_features, latent_dim=64):
        super().__init__()
        self.u_dropout = nn.Dropout(denoise_prob)
        self.i_dropout = nn.Dropout(denoise_prob)
        
        # User network
        self.u_enc = nn.Sequential(
            nn.Linear(n_items, 200), nn.Sigmoid(),
            nn.Linear(200, latent_dim), nn.Sigmoid()
        )
        self.u_dec = nn.Sequential(
            nn.Linear(latent_dim, 200), nn.Sigmoid(),
            nn.Linear(200, n_items)
        )
        
        # Item network
        self.i_enc = nn.Sequential(
            nn.Linear(n_users + n_features, 200), nn.Sigmoid(),
            nn.Linear(200, latent_dim), nn.Sigmoid()
        )
        self.i_dec = nn.Sequential(
            nn.Linear(latent_dim, 200), nn.Sigmoid(),
            nn.Linear(200, n_users)
        )

    def forward(self, U_ratings, I_ratings, I_features):
        U_noisy = self.u_dropout(U_ratings)
        x = torch.cat([I_ratings, I_features], dim=1)
        x_noisy = self.i_dropout(x)
        
        U_lat = self.u_enc(U_noisy)
        I_lat = self.i_enc(x_noisy)
        
        U_rec = self.u_dec(U_lat)
        I_rec = self.i_dec(I_lat)
        
        return U_rec, U_lat, I_rec, I_lat

# 4. Initialize
model = JointDPDAE_Corrected(U, I, P.size(1), latent_dim).to(device)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
train_loader = DataLoader(TensorDataset(R_train, M_train), 
                         batch_size=grad_batch_size, shuffle=True)

# 5. Popularity-aware Noise Scaling
pop_counts = M_train.sum(dim=0)
pop_norm = (pop_counts / pop_counts.max()).clamp(min=0.1)
noise_scales = (max_grad_norm / (epsilon * pop_norm))

# 6. Training Loop (Corrected DP-SGD)
total_epsilon = 0.0
best_rmse = float('inf')

for epoch in range(1, epochs + 1):
    model.train()
    I_lat_full = model.i_enc(torch.cat([R_train.t(), P], dim=1)).detach()
    
    for batch_idx, (batch_R, batch_M) in enumerate(train_loader):
        optimizer.zero_grad()
        
        # Get batch-specific items and their noise scales
        batch_items = torch.unique(torch.where(batch_M)[1])
        I_ratings = R_train.t()[batch_items]
        I_features = P[batch_items]
        batch_noise_scales = noise_scales[batch_items]

        # Forward pass
        U_rec, U_lat, I_rec, I_lat = model(batch_R, I_ratings, I_features)
        
        # Balanced loss calculation
        mask_u = batch_M
        loss_u = ((U_rec - batch_R)**2 * mask_u).sum() / mask_u.sum()
        mask_i = M_train.t()[batch_items]
        loss_i = ((I_rec - I_ratings)**2 * mask_i).sum() / mask_i.sum()
        R_pred = U_lat @ I_lat_full.t()
        loss_mf = ((R_pred - batch_R)**2 * mask_u).sum() / mask_u.sum()
        l2_reg = sum(p.pow(2).sum() for p in model.parameters())
        
        loss = 0.4*loss_u + 0.4*loss_i + 0.2*loss_mf + reg_lambda*l2_reg
        
        # Backward pass
        loss.backward()
        
        # Popularity-aware gradient processing
        with torch.no_grad():
            # Global gradient clipping
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
            
            # Item encoder specific noise
            for name, param in model.named_parameters():
                if 'i_enc' in name and param.grad is not None:
                    # Average noise scale for batch items
                    avg_scale = batch_noise_scales.mean()
                    noise = torch.distributions.Laplace(0, avg_scale).sample(param.grad.shape)
                    param.grad += noise.to(device)
                
                # Standard noise for other parameters
                elif param.grad is not None:
                    noise = torch.distributions.Laplace(0, max_grad_norm/epsilon).sample(param.grad.shape)
                    param.grad += noise.to(device)

        optimizer.step()

    # Privacy accounting (simplified)
    total_epsilon += (grad_batch_size/U) * (1/epsilon)
    
    # Validation
    if epoch % 10 == 0 or epoch == 1:
        model.eval()
        with torch.no_grad():
            _, U_lat, _, I_lat = model(R_train, R_train.t(), P)
            R_pred = (U_lat @ I_lat.t()) * 5.0
            rmse = torch.sqrt(((R_pred - R_test*5.0)**2 * M_test).sum() / M_test.sum()).item()
            if rmse < best_rmse:
                best_rmse = rmse
            print(f"Epoch {epoch}/{epochs}: ε={total_epsilon:.2f}, RMSE={rmse:.4f}, Best={best_rmse:.4f}")

# Final Test
model.eval()
with torch.no_grad():
    _, U_lat, _, I_lat = model(R_train, R_train.t(), P)
    R_pred = (U_lat @ I_lat.t()) * 5.0
    rmse = torch.sqrt(((R_pred - R_test*5.0)**2 * M_test).sum() / M_test.sum()).item()
print(f"\nFinal RMSE: {rmse:.4f} (Best: {best_rmse:.4f}) at ε={total_epsilon:.2f}")

Epoch 1/80: ε=0.03, RMSE=78.7577, Best=78.7577
Epoch 10/80: ε=0.34, RMSE=44.9710, Best=44.9710
Epoch 20/80: ε=0.68, RMSE=16.1233, Best=16.1233
Epoch 30/80: ε=1.02, RMSE=5.4124, Best=5.4124
Epoch 40/80: ε=1.36, RMSE=1.6129, Best=1.6129
Epoch 50/80: ε=1.70, RMSE=1.7161, Best=1.6129
Epoch 60/80: ε=2.04, RMSE=1.4999, Best=1.4999
Epoch 70/80: ε=2.38, RMSE=1.4861, Best=1.4861
Epoch 80/80: ε=2.71, RMSE=1.3449, Best=1.3449

Final RMSE: 1.3449 (Best: 1.3449) at ε=2.71
