In [1]:
import scipy
import numpy as np
import math
def delta_Gaussian(eps, mu):

   """Compute delta of Gaussian mechanism with shift mu or equivalently noise scale 1/mu"""

   if mu==0:

       return 0

   return scipy.stats.norm.cdf(-eps / mu + mu / 2) - np.exp(eps) * scipy.stats.norm.cdf(-eps / mu - mu / 2)

 

def eps_Gaussian(delta, mu):

   """Compute eps of Gaussian mechanism with shift mu or equivalently noise scale 1/mu"""

   def f(x):

       return delta_Gaussian(x, mu) - delta

   return scipy.optimize.root_scalar(f, bracket=[0, 500], method='brentq').root

 

def compute_epsilon(noise_multiplier, num_steps, delta):

   return eps_Gaussian(delta, np.sqrt(num_steps) / noise_multiplier)


In [2]:

N=1939290 # yelp
delta= 1/(N*math.log(N))
epoch=10

break_noise=0
for eps in [1,2,4]:
    for noise in np.arange(20,1, -0.01):
        compute_epsilon(noise, epoch, delta)
        if compute_epsilon(noise, epoch, delta)>eps:
            break_noise=noise
            break
    print("threshold eps", eps, "break_noise", break_noise, f"eps {compute_epsilon(noise, epoch, delta):4f}")

threshold eps 1 break_noise 15.399999999999281 eps 1.000309
threshold eps 2 break_noise 8.029999999998129 eps 2.002377
threshold eps 4 break_noise 4.239999999997536 eps 4.005258


In [3]:
import math

epoch=10
for noise in [15.34, 8.03, 4.24]:
    for N in [1939290]:
        delta= 1/(N*math.log(N))
        print( f"noise {noise} N {N}, delta {delta:10f},  eps {compute_epsilon(noise, epoch, delta):4f}" )
    print("********")

noise 15.34 N 1939290, delta   0.000000,  eps 1.004455
********
noise 8.03 N 1939290, delta   0.000000,  eps 2.002377
********
noise 4.24 N 1939290, delta   0.000000,  eps 4.005258
********


In [4]:

N=8396 # iclr
delta= 1/(N*math.log(N))
epoch=10

break_noise=0
for eps in [1,2,4]:
    for noise in np.arange(20,1, -0.01):
        compute_epsilon(noise, epoch, delta)
        if compute_epsilon(noise, epoch, delta)>eps:
            break_noise=noise
            break
    print("threshold eps", eps, "break_noise", break_noise, f"eps {compute_epsilon(noise, epoch, delta):4f}")

threshold eps 1 break_noise 11.589999999998685 eps 1.000936
threshold eps 2 break_noise 6.209999999997844 eps 2.000256
threshold eps 4 break_noise 3.3699999999974004 eps 4.005924


In [5]:
import math

epoch=10
for noise in [11.60, 6.22, 3.38]:
    for N in [8396]:
        delta= 1/(N*math.log(N))
        print( f"noise {noise} N {N}, delta {delta:5f},  eps {compute_epsilon(noise, epoch, delta):4f}" )
    print("********")

noise 11.6 N 8396, delta 0.000013,  eps 0.999986
********
noise 6.22 N 8396, delta 0.000013,  eps 1.996654
********
noise 3.38 N 8396, delta 0.000013,  eps 3.992231
********


In [6]:

N=75316 # pubmed
delta= 1/(N*math.log(N))
epoch=10

break_noise=0
for eps in [1,2,4]:
    for noise in np.arange(20,1, -0.01):
        compute_epsilon(noise, epoch, delta)
        if compute_epsilon(noise, epoch, delta)>eps:
            break_noise=noise
            break
    print("threshold eps", eps, "break_noise", break_noise, f"eps {compute_epsilon(noise, epoch, delta):4f}")

threshold eps 1 break_noise 13.249999999998945 eps 1.000046
threshold eps 2 break_noise 6.999999999997968 eps 2.000318
threshold eps 4 break_noise 3.7399999999974582 eps 4.011225


In [7]:
import math

epoch=10
for noise in [13.26, 7.01, 3.75]:
    for N in [75316]:
        delta= 1/(N*math.log(N))
        print( f"noise {noise} N {N}, delta {delta:5f},  eps {compute_epsilon(noise, epoch, delta):4f}" )
    print("********")

noise 13.26 N 75316, delta 0.000001,  eps 0.999232
********
noise 7.01 N 75316, delta 0.000001,  eps 1.997191
********
noise 3.75 N 75316, delta 0.000001,  eps 3.999169
********


In [None]:
N=10301 # HoC
delta= 1/(N*math.log(N))
epoch=10

break_noise=0
for eps in [0.5, 1,2,4]:
    for noise in np.arange(40,1, -0.01):
        compute_epsilon(noise, epoch, delta)
        if compute_epsilon(noise, epoch, delta)>eps:
            break_noise=noise
            break
    print("threshold eps", eps, "break_noise", break_noise, f"eps {compute_epsilon(noise, epoch, delta):4f}")

In [None]:
import math

epoch=10
for noise in [13.26, 7.01, 3.75]:
    for N in [10301]:
        delta= 1/(N*math.log(N))
        print( f"noise {noise} N {N}, delta {delta:5f},  eps {compute_epsilon(noise, epoch, delta):4f}" )
    print("********")

In [2]:
N=5102.0 # psytar
delta= 1/(N*math.log(N))
epoch=10

break_noise=0
for eps in [0.5, 1,2,4]:
    for noise in np.arange(40,1, -0.01):
        compute_epsilon(noise, epoch, delta)
        if compute_epsilon(noise, epoch, delta)>eps:
            break_noise=noise
            break
    print("threshold eps", eps, "break_noise", break_noise, f"eps {compute_epsilon(noise, epoch, delta):4f}")

threshold eps 0.5 break_noise 20.980000000003784 eps 0.500092
threshold eps 1 break_noise 11.190000000005732 eps 1.000600
threshold eps 2 break_noise 6.010000000006762 eps 2.003046
threshold eps 4 break_noise 3.2800000000073055 eps 4.004656


In [35]:
N=30168.0 # psytar
delta= 1/(N*math.log(N))
epoch=10

break_noise=0
for eps in [0.5, 1,2,4]:
    for noise in np.arange(40,1, -0.01):
        compute_epsilon(noise, epoch, delta)
        if compute_epsilon(noise, epoch, delta)>eps:
            break_noise=noise
            break
    print("threshold eps", eps, "break_noise", break_noise, f"eps {compute_epsilon(noise, epoch, delta):4f}")

threshold eps 0.5 break_noise 23.87000000000321 eps 0.500107
threshold eps 1 break_noise 12.580000000005455 eps 1.000450
threshold eps 2 break_noise 6.680000000006629 eps 2.000630
threshold eps 4 break_noise 3.590000000007244 eps 4.009665


In [16]:
from transformers import AutoModelForCausalLM, AutoTokenizer
t = AutoTokenizer.from_pretrained('meta-llama/Llama-3.2-1B-Instruct')
m = AutoModelForCausalLM.from_pretrained('meta-llama/Llama-3.2-1B-Instruct')

In [32]:
token_ids = t.apply_chat_template(
    conversation=[{"role": "system", "content": "Bla bla test"},
    {"role": "user", "content": "Can you rephrase the word 'cake'?"},
    {"role": "assistant", "content": "Here is a rephrased version of the word 'cake':"}],
    tokenize=True,
    return_tensors='pt'
)

res = m.generate(token_ids[:, :-1], max_new_tokens=64)

res2 = m.generate(token_ids[:, :], max_new_tokens=64)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


In [34]:
print(t.decode(res[0]))
print('----'*40)
print(t.decode(res2[0]))

<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 Jul 2024

Bla bla test<|eot_id|><|start_header_id|>user<|end_header_id|>

Can you rephrase the word 'cake'?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

Here is a rephrased version of the word 'cake': 

- Birthday treat
- Dessert
- Sweet
- Pastry<|eot_id|>
----------------------------------------------------------------------------------------------------------------------------------------------------------------
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 Jul 2024

Bla bla test<|eot_id|><|start_header_id|>user<|end_header_id|>

Can you rephrase the word 'cake'?<|eot_id|><|start_header_id|>assistant<|end_header_id|>

Here is a rephrased version of the word 'cake':<|eot_id|><|start_header_id|>assistant<|end_header_id|>

"Bread dessert sweet treat"<|eot_id|>
