In [6]:
import pandas as pd
import hashlib
import random
from datetime import datetime, timedelta
import numpy as np

def add_laplace_noise(value, scale):
    """Add Laplace noise to a value."""
    noise = np.random.laplace(loc=0.0, scale=scale)
    return value + noise

def anonymize_data(input_file, output_file, gps_scale=0.1):
    """
    Anonymize a dataset based on the specified rules.

    Args:
        input_file (str): Path to the input CSV file.
        output_file (str): Path to the output anonymized CSV file.
        gps_scale (float): Scale parameter for Laplace noise for GPS anonymization.

    Returns:
        None. Saves the anonymized dataset to the output file.
    """
    # Load the data with custom delimiter (tab-separated)
    df = pd.read_csv(input_file, delimiter="\t", names=["ID", "Date", "Latitude", "Longitude"])
    
    # Convert the Date column to datetime
    df["Date"] = pd.to_datetime(df["Date"])
    
    # Add week and year columns for grouping
    df["Week"] = df["Date"].dt.isocalendar().week
    df["Year"] = df["Date"].dt.isocalendar().year

    # Generate anonymized IDs
    def hash_id(row):
        raw = f"{row['ID']}.{row['Year']}-{row['Week']}"
        return hashlib.md5(raw.encode()).hexdigest()[:8]

    df["AnonID"] = df.apply(hash_id, axis=1)

    # Modify dates to stay within the same week
    def randomize_date(date):
        start_of_week = date - timedelta(days=date.weekday())
        return start_of_week + timedelta(days=random.randint(0, 6))

    df["AnonDate"] = df["Date"].apply(randomize_date)

    # Add Laplace noise to GPS coordinates
    df["AnonLatitude"] = df["Latitude"].apply(lambda x: add_laplace_noise(x, gps_scale))
    df["AnonLongitude"] = df["Longitude"].apply(lambda x: add_laplace_noise(x, gps_scale))

    # Save the anonymized dataset
    anonymized_df = df[["AnonID", "AnonDate", "AnonLatitude", "AnonLongitude"]]
    anonymized_df.columns = ["ID", "Date", "Latitude", "Longitude"]
    anonymized_df.to_csv(output_file, index=False, sep="\t")
    print(f"Anonymized data saved to {output_file}")

# Example usage
anonymize_data(
    input_file="../file_origin/geo_data_1_format.csv",  # Replace with your actual input file name
    output_file="../file_ano/anonymized_geo_data_1.csv",  # Replace with your desired output file name
    gps_scale=0.1  # Adjust scale if needed
)

Anonymized data saved to ../file_ano/anonymized_geo_data_1.csv
