In [13]:
import json
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# ------------------------------
# Load JSON files
# ------------------------------
with open('/kaggle/input/encryption/MMU (1).json', 'r') as f:
    original_data = json.load(f)

with open('/kaggle/input/encryption/train_pairs.json', 'r') as f:
    encrypted_data = json.load(f)

# ------------------------------
# Flatten original data
# ------------------------------
X_original = []
y = []

for user_id in original_data:
    features_list = original_data[user_id]['features']
    num_vectors = len(features_list)
    X_original.extend(features_list)
    y.extend([user_id]*num_vectors)

X_original = np.array(X_original)
y = np.array(y)

# ------------------------------
# Flatten encrypted data
# ------------------------------
# Since encrypted vectors are stored as strings, we'll convert them to dummy numeric arrays
# (FHE ciphertexts cannot be used by a normal DL model, so this simulates that DL fails)
X_encrypted = []

for entry in encrypted_data:
    # In reality, encrypted_vector is a ciphertext string; we cannot train on it
    # We'll simulate it by random vectors of the same shape as original features
    orig_vec_len = len(entry['original_vector'])
    X_encrypted.append(np.random.rand(orig_vec_len))  # dummy vector to represent encryption

X_encrypted = np.array(X_encrypted)

# Encode labels
le = LabelEncoder()
y_encoded = le.fit_transform(y[:len(X_encrypted)])  # match length

# Split encrypted data
X_train, X_test, y_train, y_test = train_test_split(
    X_encrypted, y_encoded, test_size=0.2, random_state=42
)

# Convert to torch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test  = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
y_test  = torch.tensor(y_test, dtype=torch.long)

# ------------------------------
# Simple MLP
# ------------------------------
class SimpleMLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(SimpleMLP, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, output_dim)
        )
    def forward(self, x):
        return self.net(x)

input_dim = X_train.shape[1]
num_classes = len(np.unique(y_encoded))
model = SimpleMLP(input_dim=input_dim, hidden_dim=32, output_dim=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# ------------------------------
# Training loop
# ------------------------------
epochs = 50
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()

# ------------------------------
# Evaluate accuracy
# ------------------------------
model.eval()
with torch.no_grad():
    outputs = model(X_test)
    predicted = torch.argmax(outputs, dim=1)
    accuracy = (predicted == y_test).sum().item() / len(y_test)

print(f"Accuracy on encrypted vectors: {accuracy*100:.2f}%")


Accuracy on encrypted vectors: 3.49%


In [14]:
import json
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# ------------------------------
# Load JSON files
# ------------------------------
with open('/kaggle/input/encryption/MMU (1).json', 'r') as f:
    original_data = json.load(f)

with open('/kaggle/input/encryption/train_pairs.json', 'r') as f:
    encrypted_data = json.load(f)

# ------------------------------
# Flatten original data
# ------------------------------
X_original = []
y = []

for user_id in original_data:
    features_list = original_data[user_id]['features']
    num_vectors = len(features_list)
    X_original.extend(features_list)
    y.extend([user_id]*num_vectors)

X_original = np.array(X_original)
y = np.array(y)

# ------------------------------
# Generate "consistent random vectors" to simulate encrypted vectors
# ------------------------------
X_encrypted = []

for orig_vec in X_original:
    # Generate a seed based on the original vector (sum of elements)
    seed = int(np.sum(orig_vec)*1000) % (2**32)
    rng = np.random.default_rng(seed)  # create a local random generator
    random_vec = rng.random(len(orig_vec))  # unique but consistent
    X_encrypted.append(random_vec)

X_encrypted = np.array(X_encrypted)

# ------------------------------
# Encode labels
# ------------------------------
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# ------------------------------
# Train/test split function
# ------------------------------
def prepare_tensors(X, y):
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    X_train = torch.tensor(X_train, dtype=torch.float32)
    X_test  = torch.tensor(X_test, dtype=torch.float32)
    y_train = torch.tensor(y_train, dtype=torch.long)
    y_test  = torch.tensor(y_test, dtype=torch.long)
    return X_train, X_test, y_train, y_test

Xo_train, Xo_test, yo_train, yo_test = prepare_tensors(X_original, y_encoded)
Xe_train, Xe_test, ye_train, ye_test = prepare_tensors(X_encrypted, y_encoded)

# ------------------------------
# Define simple MLP
# ------------------------------
class SimpleMLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(SimpleMLP, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, output_dim)
        )
    def forward(self, x):
        return self.net(x)

def train_and_evaluate(X_train, X_test, y_train, y_test):
    input_dim = X_train.shape[1]
    num_classes = len(torch.unique(y_train))
    model = SimpleMLP(input_dim=input_dim, hidden_dim=32, output_dim=num_classes)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)

    # Training loop
    for epoch in range(50):
        model.train()
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()

    # Evaluate
    model.eval()
    with torch.no_grad():
        outputs = model(X_test)
        predicted = torch.argmax(outputs, dim=1)
        accuracy = (predicted == y_test).sum().item() / len(y_test)
    return accuracy

# ------------------------------
# Train and evaluate original vs encrypted
# ------------------------------
acc_original = train_and_evaluate(Xo_train, Xo_test, yo_train, yo_test)
acc_encrypted = train_and_evaluate(Xe_train, Xe_test, ye_train, ye_test)

print(f"Accuracy on original vectors: {acc_original*100:.2f}%")
print(f"Accuracy on encrypted vectors: {acc_encrypted*100:.2f}%")


Accuracy on original vectors: 8.14%
Accuracy on encrypted vectors: 1.16%


In [15]:
# ------------------------------
# Show sample predictions for encrypted vectors
# ------------------------------
model_encrypted = SimpleMLP(input_dim=Xe_train.shape[1], hidden_dim=32, output_dim=len(torch.unique(ye_train)))
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_encrypted.parameters(), lr=0.01)

# Train model on encrypted vectors (same as before)
for epoch in range(50):
    model_encrypted.train()
    optimizer.zero_grad()
    outputs = model_encrypted(Xe_train)
    loss = criterion(outputs, ye_train)
    loss.backward()
    optimizer.step()

# Evaluate and show sample predictions
model_encrypted.eval()
with torch.no_grad():
    outputs = model_encrypted(Xe_test)
    predicted = torch.argmax(outputs, dim=1)
    accuracy = (predicted == ye_test).sum().item() / len(ye_test)
    print(f"Accuracy on encrypted vectors: {accuracy*100:.2f}%\n")

    # Show first 10 predictions vs true labels
    print("Sample predictions vs true labels (encrypted vectors):")
    for i in range(min(10, len(predicted))):
        print(f"Predicted: {le.inverse_transform([predicted[i].item()])[0]}, True: {le.inverse_transform([ye_test[i].item()])[0]}")


Accuracy on encrypted vectors: 1.16%

Sample predictions vs true labels (encrypted vectors):
Predicted: 8, True: 44
Predicted: 34, True: 9
Predicted: 21, True: 19
Predicted: 35, True: 5
Predicted: 11, True: 38
Predicted: 33, True: 40
Predicted: 36, True: 17
Predicted: 19, True: 17
Predicted: 37, True: 18
Predicted: 13, True: 19
