In [1]:
pip install dash plotly pandas numpy dash-core-components dash-html-components

Note: you may need to restart the kernel to use updated packages.


In [None]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from dash import Dash, dcc, html
from dash.dependencies import Input, Output


df = pd.read_csv('voice_command_dataset.csv')
df.head()

Unnamed: 0,path,speaker,action,object,fluency_level,current_language,gender,age_range
0,dataset/audio/speakers/2BqVo8kVB2Skwgyb/0a3129...,2BqVo8kVB2Skwgyb,change language,none,advanced,English (United States),female,22-40
1,dataset/audio/speakers/2BqVo8kVB2Skwgyb/0ee42a...,2BqVo8kVB2Skwgyb,activate,music,advanced,English (United States),female,22-40
2,dataset/audio/speakers/2BqVo8kVB2Skwgyb/1d9f39...,2BqVo8kVB2Skwgyb,deactivate,lights,advanced,English (United States),female,22-40
3,dataset/audio/speakers/2BqVo8kVB2Skwgyb/269fc2...,2BqVo8kVB2Skwgyb,increase,volume,advanced,English (United States),female,22-40
4,dataset/audio/speakers/2BqVo8kVB2Skwgyb/5bbda3...,2BqVo8kVB2Skwgyb,increase,volume,advanced,English (United States),female,22-40


In [4]:
def apply_gaussian_mechanism(data, epsilon, delta=1e-5):
    sensitivity = 1.0
    sigma = np.sqrt(2 * np.log(1.25/delta)) * (sensitivity / epsilon)
    
    
    scale_factor = np.std(data) / 10
    noise = np.random.normal(0, sigma * scale_factor, size=len(data))
    return data + noise

def calculate_utility(original_data, noisy_data):
    
    
    original_norm = original_data / np.max(original_data)
    noisy_norm = noisy_data / np.max(original_data)
    
    # 相对误差
    relative_error = np.mean(np.abs(original_norm - noisy_norm))
    
    # 转换
    utility = np.exp(-relative_error)
    return utility


app = Dash(__name__)

app.layout = html.Div([
    html.H1("Voice Command Data: Utility vs Data Size (Gaussian Mechanism)",
            style={'textAlign': 'center', 'margin-bottom': '20px'}),
    
    # Privacy Level
    html.Div([
        html.Label('Privacy Level (higher value = stronger privacy):'),
        dcc.Slider(
            id='privacy-slider',
            min=1,
            max=10,
            step=1,
            value=5,
            marks={i: str(i) for i in range(1, 11)}
        )
    ], style={'width': '80%', 'margin': '20px auto'}),
    
    # 参数
    html.Div(id='parameter-display', style={'textAlign': 'center'}),
    
    # 图
    dcc.Graph(id='utility-graph'),
])

@app.callback(
    [Output('utility-graph', 'figure'),
     Output('parameter-display', 'children')],
    [Input('privacy-slider', 'value')]
)
def update_graph(privacy_level):
    epsilon = 1 / privacy_level
    delta = 1e-5
    
    
    original_data = df['action'].map(
        dict(zip(df['action'].unique(), range(len(df['action'].unique()))))
    ).values
    
    data_points = range(100, len(df), 100)
    utilities = []
    
    for n in data_points:
        
        data_subset = original_data[:n]
        # 用Gaussian
        noisy_data = apply_gaussian_mechanism(data_subset, epsilon, delta)
        # 计算utility
        utility = calculate_utility(data_subset, noisy_data)
        utilities.append(utility)
    
    
    fig = go.Figure()
    
    fig.add_trace(go.Scatter(
        y=utilities,
        mode='lines+markers',
        line=dict(color='blue', width=2)
    ))
    
    fig.update_layout(
        title=f'Data Utility vs Size (Privacy Level {privacy_level})',
        xaxis_title='',
        yaxis_title='Level of Personalization',
        width=850,
        height=534,
        showlegend=False,
        yaxis=dict(
            range=[0,1],
            showticklabels=True,
            showgrid=True,
            gridcolor='white',
            gridwidth=1
        ),
        xaxis=dict(
            showticklabels=False, 
            showgrid=True,
            gridcolor='white',
            gridwidth=1
        )
    )
    #Standard line(0.6)
    fig.add_hline(y=0.6, line_dash="dash", line_color="red", annotation_text="Balanced Privacy", annotation_position="top right")
    
   
    fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgray')
    fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgray')
    
    params_text = [
        f'Privacy Level: {privacy_level}',
        f'ε = {epsilon:.3f}',
        f'δ = {delta}'
    ]
    
    return fig, html.Div(params_text)
    

if __name__ == '__main__':
    app.run_server(debug=True, port=8054)

In [5]:
def apply_gaussian_noise(data, epsilon, delta=1e-5):
    sensitivity = 1.0
    sigma = np.sqrt(2 * np.log(1.25/delta)) * (sensitivity / epsilon)
    scale_factor = np.std(data) / 10
    noise = np.random.normal(0, sigma * scale_factor, size=len(data))
    return data + noise

# 获取原始actions
actions = df['action'].unique()
action_to_id = {action: idx for idx, action in enumerate(actions)}
id_to_action = {idx: action for action, idx in action_to_id.items()}

# 将action转换为数值
action_values = df['action'].map(action_to_id).values


epsilon = 0.2  
noisy_values = apply_gaussian_noise(action_values, epsilon)

# 将噪声值转换回最接近的类别
def get_closest_action(noisy_value):
    # 找到最接近的原始类别ID
    closest_id = int(round(noisy_value))
    # 确保ID在有效范围内
    closest_id = max(0, min(closest_id, len(actions)-1))
    return id_to_action[closest_id]
# 打印结果
print("Original vs Noisy Data:")
print("-" * 70)
print("Original Action\t\tNoisy Value\tMapped Action")
print("-" * 70)
for i in range(20): 
    original_action = df['action'].iloc[i]
    noisy_value = noisy_values[i]
    mapped_action = get_closest_action(noisy_value)
    print(f"{original_action:<20} {noisy_value:>.2f}\t{mapped_action}")

Original vs Noisy Data:
----------------------------------------------------------------------
Original Action		Noisy Value	Mapped Action
----------------------------------------------------------------------
change language      5.81	decrease
activate             -0.03	change language
deactivate           0.18	change language
increase             2.46	deactivate
increase             2.56	increase
decrease             9.07	decrease
increase             6.46	decrease
increase             -2.67	change language
decrease             0.20	change language
decrease             2.83	increase
decrease             3.27	increase
decrease             0.90	activate
activate             0.17	change language
activate             3.82	decrease
activate             2.12	deactivate
deactivate           0.58	activate
increase             1.90	deactivate
increase             4.30	decrease
increase             3.20	increase
increase             3.49	increase


In [9]:
def apply_gaussian_noise(data, epsilon, delta=1e-5):
    sensitivity = 1.0
    sigma = np.sqrt(2 * np.log(1.25/delta)) * (sensitivity / epsilon)
    scale_factor = np.std(data) / 10
    noise = np.random.normal(0, sigma * scale_factor, size=len(data))
    return data + noise


actions = df['action'].unique()
categories = df['object'].unique()  # change object's name to category
action_to_id = {action: idx for idx, action in enumerate(actions)}
category_to_id = {cat: idx for idx, cat in enumerate(categories)}
id_to_action = {idx: action for action, idx in action_to_id.items()}
id_to_category = {idx: cat for cat, idx in category_to_id.items()}

# 数据转换成数值
action_values = df['action'].map(action_to_id).values
category_values = df['object'].map(category_to_id).values

def get_closest_category(noisy_value, id_to_mapping):
    closest_id = int(round(noisy_value))
    closest_id = max(0, min(closest_id, len(id_to_mapping)-1))
    return id_to_mapping[closest_id]


privacy_levels = [1, 3, 4, 7]

# 每个privacy level的结果
for level in privacy_levels:
    epsilon = 1 / level
    
    print(f"\nPrivacy Level {level} (ε = {epsilon:.3f}):")
    print("=" * 80)
    
    
    noisy_actions = apply_gaussian_noise(action_values, epsilon)
    noisy_categories = apply_gaussian_noise(category_values, epsilon)
    
    print("\nACTION COMMANDS:")
    print("-" * 60)
    print("Original Action\t\tNoisy Action")
    print("-" * 60)
    for i in range(30):  
        original_action = df['action'].iloc[i]
        noisy_action = get_closest_category(noisy_actions[i], id_to_action)
        print(f"{original_action:<20} {noisy_action}")
    
    print("\nCATEGORIES:")
    print("-" * 60)
    print("Original Category\t\tNoisy Category")
    print("-" * 60)
    for i in range(30):  
        original_category = df['object'].iloc[i]  
        noisy_category = get_closest_category(noisy_categories[i], id_to_category)
        print(f"{original_category:<20} {noisy_category}")
    
    print("\n" + "=" * 80)


Privacy Level 1 (ε = 1.000):

ACTION COMMANDS:
------------------------------------------------------------
Original Action		Noisy Action
------------------------------------------------------------
change language      change language
activate             change language
deactivate           activate
increase             increase
increase             deactivate
decrease             increase
increase             increase
increase             deactivate
decrease             decrease
decrease             increase
decrease             increase
decrease             decrease
activate             change language
activate             deactivate
activate             change language
deactivate           deactivate
increase             increase
increase             increase
increase             deactivate
increase             increase
increase             increase
increase             increase
increase             increase
increase             deactivate
increase             decrease
increase  

In [11]:
def calculate_change_probability(original_values, noisy_values):
    total = len(original_values)
    changes = sum(1 for orig, noisy in zip(original_values, noisy_values) if orig != noisy)
    return (changes / total) * 100
# 要测试的privacy levels
privacy_levels = [1, 3, 4, 7]

print("Change Probabilities for Different Privacy Levels:")
print("=" * 80)
print("Privacy Level\tCategory Changes")
print("-" * 80)

for level in privacy_levels:
    epsilon = 1 / level
    
    # 添加噪声
    noisy_actions = apply_gaussian_noise(action_values, epsilon)
    noisy_categories = apply_gaussian_noise(category_values, epsilon)
    
    # 获取加噪声后的文本
    noisy_action_texts = [get_closest_category(val, id_to_action) for val in noisy_actions]
    noisy_category_texts = [get_closest_category(val, id_to_category) for val in noisy_categories]
    
    # 计算改变概率
    action_change_prob = calculate_change_probability(df['action'], noisy_action_texts)
    category_change_prob = calculate_change_probability(df['object'], noisy_category_texts)
    
    print(f"Level {level}\t\t{category_change_prob:.1f}%")


print("\nDetailed Examples (Level 4):")
print("=" * 80)
level = 4
epsilon = 1 / level
noisy_actions = apply_gaussian_noise(action_values, epsilon)
noisy_categories = apply_gaussian_noise(category_values, epsilon)

print("\nACTION COMMANDS Examples:")
print("-" * 60)
print("Original Action\t\tNoisy Action\t\tChanged?")
print("-" * 60)
for i in range(10):
    original_action = df['action'].iloc[i]
    noisy_action = get_closest_category(noisy_actions[i], id_to_action)
    changed = "✓" if original_action != noisy_action else "✗"
    print(f"{original_action:<20} {noisy_action:<20} {changed}")

print("\nCATEGORIES Examples:")
print("-" * 60)
print("Original Category\t\tNoisy Category\t\tChanged?")
print("-" * 60)
for i in range(10):
    original_category = df['object'].iloc[i]
    noisy_category = get_closest_category(noisy_categories[i], id_to_category)
    changed = "✓" if original_category != noisy_category else "✗"
    print(f"{original_category:<20} {noisy_category:<20} {changed}")

Change Probabilities for Different Privacy Levels:
Privacy Level	Category Changes
--------------------------------------------------------------------------------
Level 1		33.8%
Level 3		64.1%
Level 4		69.0%
Level 7		74.2%

Detailed Examples (Level 4):

ACTION COMMANDS Examples:
------------------------------------------------------------
Original Action		Noisy Action		Changed?
------------------------------------------------------------
change language      change language      ✗
activate             deactivate           ✓
deactivate           activate             ✓
increase             decrease             ✓
increase             deactivate           ✓
decrease             decrease             ✗
increase             decrease             ✓
increase             activate             ✓
decrease             decrease             ✗
decrease             decrease             ✗

CATEGORIES Examples:
------------------------------------------------------------
Original Category		Noisy Category		

In [5]:
def apply_gaussian_mechanism(data, epsilon, delta=1e-5):
    """改进的Gaussian机制"""
    sensitivity = 1.0
    sigma = np.sqrt(2 * np.log(1.25/delta)) * (sensitivity / epsilon)
    scale_factor = np.std(data) / 10
    noise = np.random.normal(0, sigma * scale_factor, size=len(data))
    return data + noise

def calculate_utility(original_data, noisy_data):
    """改进的效用计算"""
    original_norm = original_data / np.max(original_data)
    noisy_norm = noisy_data / np.max(original_data)
    relative_error = np.mean(np.abs(original_norm - noisy_norm))
    utility = np.exp(-relative_error)
    return utility

# 计算标准参考线的效用值
def calculate_standard_line(data, privacy_level=0.7):
    """计算privacy level为0.7时的效用值"""
    epsilon = 1 / privacy_level
    original_data = df['action'].map(
        dict(zip(df['action'].unique(), range(len(df['action'].unique()))))
    ).values
    
    data_points = range(100, len(df), 100)
    standard_utilities = []
    
    for n in data_points:
        data_subset = original_data[:n]
        noisy_data = apply_gaussian_mechanism(data_subset, epsilon)
        utility = calculate_utility(data_subset, noisy_data)
        standard_utilities.append(utility)
    
    return list(data_points), standard_utilities

# 初始化Dash应用
app = Dash(__name__)

app.layout = html.Div([
    html.H1("Voice Command Data: Utility vs Data Size (Gaussian Mechanism)",
            style={'textAlign': 'center', 'margin-bottom': '20px'}),
    
    # Privacy Level 控制滑块
    html.Div([
        html.Label('Privacy Level (higher value = stronger privacy):'),
        dcc.Slider(
            id='privacy-slider',
            min=1,
            max=10,
            step=1,
            value=5,
            marks={i: str(i) for i in range(1, 11)}
        )
    ], style={'width': '80%', 'margin': '20px auto'}),
    
    # 显示参数
    html.Div(id='parameter-display', style={'textAlign': 'center'}),
    
    # 图表
    dcc.Graph(id='utility-graph'),
])

@app.callback(
    [Output('utility-graph', 'figure'),
     Output('parameter-display', 'children')],
    [Input('privacy-slider', 'value')]
)
def update_graph(privacy_level):
    epsilon = 1 / privacy_level
    delta = 1e-5
    
    # 准备数据
    original_data = df['action'].map(
        dict(zip(df['action'].unique(), range(len(df['action'].unique()))))
    ).values
    
    data_points = range(100, len(df), 100)
    utilities = []
    
    for n in data_points:
        data_subset = original_data[:n]
        noisy_data = apply_gaussian_mechanism(data_subset, epsilon, delta)
        utility = calculate_utility(data_subset, noisy_data)
        utilities.append(utility)
    
    # 获取标准参考线数据
    standard_x, standard_y = calculate_standard_line(df, privacy_level=0.7)
    
    # 创建图表
    fig = go.Figure()
    
    # 添加当前privacy level的线
    fig.add_trace(go.Scatter(
        x=list(data_points),
        y=utilities,
        mode='lines+markers',
        name=f'Current Privacy Level ({privacy_level})',
        line=dict(color='blue', width=2)
    ))
    
    # 添加标准参考线
    fig.add_trace(go.Scatter(
        x=standard_x,
        y=standard_y,
        mode='lines',
        name='Standard (Privacy Level 0.7)',
        line=dict(color='red', width=2, dash='dash')
    ))
    
    fig.update_layout(
        title=f'Data Utility vs Size (Privacy Level {privacy_level})',
        xaxis_title='Number of Voice Commands',
        yaxis_title='Utility Score',
        yaxis=dict(range=[0, 1]),
        width=850,
        height=534,
        showlegend=True,
        legend=dict(
            yanchor="top",
            y=0.99,
            xanchor="right",
            x=0.99
        )
    )
    
    # 添加网格线
    fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgray')
    fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgray')
    
    params_text = [
        f'Privacy Level: {privacy_level}',
        f'ε = {epsilon:.3f}',
        f'δ = {delta}',
        html.Br(),
        html.Span('Red dashed line shows standard at Privacy Level 0.7',
                 style={'color': 'red'})
    ]
    
    return fig, html.Div(params_text)

if __name__ == '__main__':
    app.run_server(debug=True, port=8051)