In [4]:
%%HTML
<script>
  function code_toggle() {
    if (code_shown){
      $('div.input').hide('500');
      $('#toggleButton').val('Show Code')
    } else {
      $('div.input').show('500');
      $('#toggleButton').val('Hide Code')
    }
    code_shown = !code_shown
  }

  $( document ).ready(function(){
    code_shown=false;
    $('div.input').hide()
  });
</script>
<form action="javascript:code_toggle()"><input type="submit" id="toggleButton" value="Show Code"></form>

## Differentially Private PCA
---

The following tutorial gives one example of how the `dp_pca()` funciton is called. The data samples are randomly drawn i.i.d. from a multivariate Gaussian distribution with a pre-defined mean and covariance matrix. The quality (in terms of the captured energy of the covariance matrix in the reduced dimensional subspace) of the output subspace of the differentially private PCA and non-differentially private PCA is shown as a comparison. 

The parameters that can be adjusted are:

- Epsilon
- Sample_size

In [5]:
from ipywidgets import interact
from IPython.display import display

# This tutorial gives an example of one way to use the differentially private PCA function
# A non-differentially private version of the PCA process will also be run to generate the likeness of the two


# This function will be used to randomly generate a data matrix from a multivariate Gaussian distribution
def gen_data(Sample_size, k):
    """
    Inputs:
            Sample_size: total number of test samples to return in data matrix
    Outputs:
            trn_data: [trn_size x d]
            A: covariance matrix, [d x d]
    """
    
    import numpy as np

    d = 10                       # features
    n = Sample_size              # number of samples to generate for each class

    # create covariance matrix
    A = np.zeros((d,d))
    for i in range(d):
        if i < k:
            A[i,i] = d - i
        else:
            A[i, i] = 1

    # create mean
    mean = np.zeros(d)

    # generate n samples
    data_ = np.random.multivariate_normal(mean, A, n)    # [nxd]

    return data_, A

# This function will allow the PCA outputs to be interactive
def show_pca_qual(Sample_size, k = 5, Epsilon = 1.0):
    import numpy as np
    import dp_stats as dps
    
    # generate the data matrix
    data_, A = gen_data(Sample_size, k)    # data_: samples are in rows, A: covariance matrix
    
    # go through the non-differentially private PCA routine
    sigma_control = np.dot(data_.T, data_)     # [d x d] = [d x Sample_size] [Sample_size x d]
    U, S, V = np.linalg.svd(sigma_control)
    
    # grab the first k columns
    U_reduc = U[:, :k]
    
    # find the quality of the PCA control
    control_quality = np.trace(np.dot(np.dot(U_reduc.T, A), U_reduc))
    
    
    # go through the differentially private PCA routine
    # dp_pca_sn ( data, epsilon=1.0 )  // samples must be in columns
    sigma_dp = dps.dp_pca_sn(data_.T, epsilon = Epsilon)
    U_dp, S_dp, V_dp = np.linalg.svd(sigma_dp)
    
    # grab the first k columns
    U_dp_reduc = U_dp[:, :k]
    
    # find the quality of the differentially private PCA method
    dp_quality = np.trace(np.dot(np.dot(U_dp_reduc.T, A), U_dp_reduc))
    
    # output the results
    control_txt = "Non-private Quality: {}".format(round(control_quality, 4))
    display(control_txt)
    dp_txt = "Differentially Private Quality: {}".format(round(float(dp_quality), 4))
    display(dp_txt)

interact(show_pca_qual, Sample_size=(50,1000,100), k=(1, 10, 1), Epsilon=(0.01,3.0,0.01))

'Non-private Quality: 26.9546'

'Differentially Private Quality: 26.9547'

In [6]:
%%HTML
<script>
  $(document).ready(function(){
    $('div.prompt').hide();
    $('div.back-to-top').hide();
    $('nav#menubar').hide();
    $('.breadcrumb').hide();
    $('.hidden-print').hide();
  });
</script>

<footer id="attribution" style="float:right; color:#999; background:#fff;">
Created with Jupyter, delivered by Fastly, rendered by Rackspace.
</footer>