In [23]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import KernelDensity
import time
from scipy.special import lambertw
import matplotlib as mpl

from noise_distribution import sample_laplace_radius, sample_staircase_radius


In [24]:
def planar_laplace_mechanism(lat, lon, epsilon, earth_radius=6_371_000):
    """
    Perturb a geographic point (lat, lon) with planar Laplace noise.
    
    Args:
      lat, lon       : original coordinates in decimal degrees
      epsilon        : privacy budget (>0)
      earth_radius   : in meters (default mean Earth radius)
    
    Returns:
      lat_p, lon_p   : perturbed coordinates in decimal degrees
    """
    # 1) sample radius & angle
    r     = sample_laplace_radius(epsilon)
    theta = np.random.uniform(0, 2*np.pi)
    
    # 2) convert original to local Cartesian (meters)
    x = earth_radius * np.radians(lon)
    y = earth_radius * np.radians(lat)
    
    # 3) add noise
    x_p = x + r * np.cos(theta)
    y_p = y + r * np.sin(theta)
    
    # 4) back to geo coords
    lon_p = np.degrees(x_p / earth_radius)
    lat_p = np.degrees(y_p / earth_radius)
    
    return lat_p, lon_p

In [25]:
if __name__ == "__main__":
    np.random.seed(42)
    phi, lam = 40.7128, -74.0060   # e.g. New York City
    eps = 0.01

    lat_noisy, lon_noisy = planar_laplace_mechanism(phi, lam, eps)
    print(f"Original: ({phi:.6f}, {lam:.6f})")
    print(f"Noisy:    ({lat_noisy:.6f}, {lon_noisy:.6f})")


Original: (40.712800, -74.006000)
Noisy:    (40.712443, -74.004883)


In [28]:
def sample_staircase_radius(epsilon, delta=1.0, L=None, size=1):
    """
    Sample the radius r from the infinite (or truncated) planar staircase PDF:
      f_i ∝ e^{-(i-1)ε}, on interval [(i-1)Δ, iΔ].
    If L is None, treat as infinite. Otherwise truncate at L and renormalize.
    """
    if L is None:
        p = 1 - np.exp(-epsilon)                  # success prob
        I = np.random.geometric(p, size=size) - 1 # 0-based bin index
    else:
        intervals = int(np.ceil(L / delta))
        k = np.arange(intervals)
        weights = np.exp(-epsilon * k)
        weights /= weights.sum()
        cdf = np.cumsum(weights)
        u = np.random.rand(size)
        I = np.searchsorted(cdf, u)
    
    r = (I + np.random.rand(size)) * delta
    return r.item() if size == 1 else r    

def planar_staircase_mechanism(lat, lon, epsilon, earth_radius=6_371_000):
    """
    Perturb a geographic point (lat, lon) with planar Laplace noise.
    
    Args:
      lat, lon       : original coordinates in decimal degrees
      epsilon        : privacy budget (>0)
      earth_radius   : in meters (default mean Earth radius)
    
    Returns:
      lat_p, lon_p   : perturbed coordinates in decimal degrees
    """
    # 1) sample radius & angle
    r     = sample_staircase_radius(epsilon, delta=1.0, L=None, size=1)
    theta = np.random.uniform(0, 2*np.pi)
    
    # 2) convert original to local Cartesian (meters)
    x = earth_radius * np.radians(lon)
    y = earth_radius * np.radians(lat)
    
    # 3) add noise
    x_p = x + r * np.cos(theta)
    y_p = y + r * np.sin(theta)
    
    # 4) back to geo coords
    lon_p = np.degrees(x_p / earth_radius)
    lat_p = np.degrees(y_p / earth_radius)
    
    return lat_p, lon_p

In [29]:
def haversine_distance(lat1, lon1, lat2, lon2, earth_radius=6_371_000):
    """Return distance in meters between (lat1, lon1) and (lat2, lon2)."""
    φ1, φ2 = np.radians(lat1), np.radians(lat2)
    Δφ = φ2 - φ1
    Δλ = np.radians(lon2 - lon1)
    a = np.sin(Δφ/2)**2 + np.cos(φ1)*np.cos(φ2)*np.sin(Δλ/2)**2
    return earth_radius * 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))

# Unified batch function
def batch_perturb_csv(input_folder, output_folder, mechanism="psm", epsilon=0.5, delta=5.0):
    """
    Perturb all CSVs in `input_folder` and save results in `output_folder`.
    
    mechanism: "psm", "plm", or "trpsm"
    epsilon: privacy budget
    delta: base threshold for TR-PSM
    """
    os.makedirs(output_folder, exist_ok=True)
    
    # Select function for PSM/PLM
    if mechanism == "psm":
        mech_fn = planar_staircase_mechanism
    elif mechanism == "plm":
        mech_fn = planar_laplace_mechanism
    
    for filepath in glob.glob(os.path.join(input_folder, "*.csv")):
        df = pd.read_csv(filepath)
        # Accept lowercase column names
        if not {"latitude","longitude"}.issubset(df.columns):
            print(f"Skipping {filepath}: missing latitude/longitude")
            continue
        
        if mechanism in ("psm", "plm"):
            # Apply PSM or PLM row-wise
            perturb = df.apply(
                lambda row: mech_fn(row["latitude"], row["longitude"], epsilon),
                axis=1, result_type="expand"
            )
            perturb.columns = ["perturbed_latitude", "perturbed_longitude"]
            df = pd.concat([df, perturb], axis=1)
            # distance
            df["distance"] = haversine_distance(
                df["latitude"], df["longitude"],
                df["perturbed_latitude"], df["perturbed_longitude"]
            )
        elif mechanism == "trpsm":
            # Apply TR-PSM to the entire trajectory
            pert_lat, pert_lon, distances = apply_trpsm_to_dataframe(
                df, epsilon, delta
            )
            df["perturbed_latitude"]  = pert_lat
            df["perturbed_longitude"] = pert_lon
            df["distance"]            = distances
        
        # Save
        out_path = os.path.join(output_folder, os.path.basename(filepath))
        df.to_csv(out_path, index=False)


In [34]:
import os
import glob

# List of epsilon values to iterate over
epsilons = [0.1]
input_folder = r"C:\Users\ss6365\Desktop\PrivAR_PSM_PSM_I\data\original\geotrace\utility"
# output_folder = r"C:\Users\ss6365\Desktop\PrivAR_PSM_PSM_I\data\distorted"

base_dirs = {
    "psm": r"C:\Users\ss6365\Desktop\PrivAR_PSM_PSM_I\data\distorted\geotrace\utility\psm",
    "plm": r"C:\Users\ss6365\Desktop\PrivAR_PSM_PSM_I\data\distorted\geotrace\utility\plm",
}

for mechanism, base_out in base_dirs.items():
    for eps in epsilons:
        suffix = f"eps{int(eps*10):02d}" if eps < 1 else f"eps{int(eps)}"
        out_folder = os.path.join(base_out, suffix)
        print(f"> {mechanism.upper()}  ε={eps} → {out_folder}")
        batch_perturb_csv(
            input_folder=input_folder,
            output_folder=out_folder,
            mechanism=mechanism,
            epsilon=eps,
            delta=5.0
        )

> PSM  ε=0.1 → C:\Users\ss6365\Desktop\PrivAR_PSM_PSM_I\data\distorted\geotrace\utility\psm\eps01
> PLM  ε=0.1 → C:\Users\ss6365\Desktop\PrivAR_PSM_PSM_I\data\distorted\geotrace\utility\plm\eps01
