In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import os

import mechanism as mc
import filtering

%matplotlib inline
np.set_printoptions(precision=4)

In [None]:
plt.rcParams['font.family'] = 'Times New Roman'
plt.rcParams["mathtext.fontset"] = "stix" 
plt.rcParams["font.size"] = 12
plt.rcParams['axes.linewidth'] = 1.0
plt.rcParams['axes.grid'] = True
del matplotlib.font_manager.weight_dict['roman']
matplotlib.font_manager._rebuild()

plt.rcParams['xtick.direction'] = 'in'
plt.rcParams['ytick.direction'] = 'in'
plt.rcParams['image.cmap'] = 'viridis'

plt.rcParams["errorbar.capsize"] = 2.0 # error bar

In [None]:
def MAE(x, y):
    return np.mean(np.abs(x-y))

# Foursquare

In [None]:
txt_dir = 'data/Foursquare'
checkin_df = pd.read_csv(os.path.join(txt_dir, 'dataset_TIST2015_Checkins.txt'), sep="\t", header=None)
checkin_df.columns = ['user_id', 'venue_id', 'timestamp', 'timezone_offset']

checkin_df_venue = checkin_df[checkin_df['venue_id'] == '4b49cb0ff964a520b67326e3']
checkin_df_venue['timestamp'] = pd.to_datetime(checkin_df_venue['timestamp'])
checkin_df_venue['modified_timestamp'] = checkin_df_venue['timestamp'] + pd.to_timedelta(checkin_df_venue['timezone_offset'], unit='m')
data = checkin_df_venue.groupby(pd.Grouper(key='modified_timestamp', freq='6H')).size().values

In [None]:
T = len(data)
I = int(T*0.1)
k = 10
x = data[:T]

std = 10

h = filtering.get_h('gaussian', T, std=std)
A = filtering.get_circular(h)
L = sum(h**2)
sr = mc.srank_circular(h)

In [None]:
trial = 1000
eps = 0.5
delta = 10**-4
l2_sens = np.sqrt(I)

res_z = []
res_zss = []
res_zfss = []
res_zdft = []
for _ in range(trial):
    z = mc.gaussian(x, eps, delta, l2_sens).clip(min=0)
    zss = mc.ss_gaussian(x, eps, delta, I, k, interpolate_kind='linear').clip(min=0)
    zfss = mc.ssf_gaussian(x, A, eps, delta, l2_sens, k, sr=sr, L=L, interpolate_kind='linear').clip(min=0)
    zdft = mc.dft_gaussian(x, eps, delta, l2_sens, k=30).clip(min=0)
    res_z.append(MAE(x, z))
    res_zss.append(MAE(x, zss))
    res_zfss.append(MAE(x, zfss))
    res_zdft.append(MAE(x, zdft))

for (res, label) in zip([res_z, res_zdft, res_zss, res_zfss],['Gaussian', 'DFT', 'Ours w/o filter', 'Ours w/ filter']):
    print(label, ':', np.mean(res), np.std(res))

# Gowalla

In [None]:
txt_dir = 'data/Gowalla'
checkin_df = pd.read_csv(os.path.join(txt_dir, 'loc-gowalla_totalCheckins.txt'), sep="\t", header=None)
checkin_df.columns = ['user_id', 'timestamp', 'latitude', 'longitude', 'location_id']
checkin_df_venue = checkin_df[checkin_df['location_id'] == 55033]
checkin_df_venue['timestamp'] = pd.to_datetime(checkin_df_venue['timestamp'])
data = checkin_df_venue.groupby(pd.Grouper(key='timestamp', freq='12H')).size().values

In [None]:
T = len(data)
I = int(T*0.1)
k = 10
x = data[:T]

std = 10

h = filtering.get_h('gaussian', T, std=std)
A = filtering.get_circular(h)
L = sum(h**2)
sr = mc.srank_circular(h)

In [None]:
trial = 1000
eps = 0.5
delta = 10**-4

l2_sens = np.sqrt(I)

res_z = []
res_zss = []
res_zfss = []
res_zdft = []
for _ in range(trial):
    z = mc.gaussian(x, eps, delta, l2_sens).clip(min=0)
    zss = mc.ss_gaussian(x, eps, delta, I, k, interpolate_kind='linear').clip(min=0)
    zfss = mc.ssf_gaussian(x, A, eps, delta, l2_sens, k, sr=sr, L=L, interpolate_kind='linear').clip(min=0)
    zdft = mc.dft_gaussian(x, eps, delta, l2_sens, k=30).clip(min=0)
    res_z.append(MAE(x, z))
    res_zss.append(MAE(x, zss))
    res_zfss.append(MAE(x, zfss))
    res_zdft.append(MAE(x, zdft))

for (res, label) in zip([res_z, res_zdft, res_zss, res_zfss],['Gaussian', 'DFT', 'Ours w/o filter', 'Ours w/ filter']):
    print(label, ':', np.mean(res), np.std(res))

# PeMS

In [None]:
df = pd.read_hdf('data/PEMS/pems-bay-flow.h5')
data = df.values.T
data = data[0]

In [None]:
T = 1800
I = int(T*0.1)
k = 10
x = data[:T]

std = 10

h = filtering.get_h('gaussian', T, std=std)
A = filtering.get_circular(h)
L = sum(h**2)
sr = mc.srank_circular(h)

In [None]:
trial = 1000
eps = 0.5
delta = 10**-4
l2_sens = np.sqrt(I)

res_z = []
res_zss = []
res_zfss = []
res_zdft = []
for _ in range(trial):
    z = mc.gaussian(x, eps, delta, l2_sens)
    zss = mc.ss_gaussian(x, eps, delta, I, k, interpolate_kind='linear')
    zfss = mc.ssf_gaussian(x, A, eps, delta, l2_sens, k, sr=sr, L=L, interpolate_kind='linear')
    zdft = mc.dft_gaussian(x, eps, delta, l2_sens, k=30)
    res_z.append(MAE(x, z))
    res_zss.append(MAE(x, zss))
    res_zfss.append(MAE(x, zfss))
    res_zdft.append(MAE(x, zdft))

for (res, label) in zip([res_z, res_zdft, res_zss, res_zfss],['Gaussian', 'DFT', 'Ours w/o filter', 'Ours w/ filter']):
    print(label, ':', np.mean(res), np.std(res))