# CS211: Data Privacy
## Final Project
### Nikhil and James

In [49]:
# Load the data and libraries
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')

# Preliminary utility functions
def laplace_mech(v, sensitivity, epsilon):
    return v + np.random.laplace(loc=0, scale=sensitivity / epsilon)

def laplace_mech_vec(qs, sensitivity, epsilon):
    return [laplace_mech(q, sensitivity, epsilon) for q in qs]

def gaussian_mech(v, sensitivity, epsilon, delta):
    return v + np.random.normal(loc=0, scale=sensitivity * np.sqrt(2*np.log(1.25/delta)) / epsilon)

def gaussian_mech_vec(vec, sensitivity, epsilon, delta):
    return [v + np.random.normal(loc=0, scale=sensitivity * np.sqrt(2*np.log(1.25/delta)) / epsilon)
            for v in vec]

def pct_error(orig, priv):
    return np.abs(orig - priv)/orig * 100.0

# Database containing the default data of credit card clients Taiwan during 2005 
cc_default_db = pd.read_csv('https://raw.githubusercontent.com/nikhilchoppa/cs211-final-project/main/UCI_Credit_Card.csv')

  plt.style.use('seaborn-whitegrid')


## Format database in order to calculate some statistics
### ...Merge all the columns that have `"_#"`
This is just signifying month to month statistics and it is going to be easier to work with if they are centralized to one column for PAY, BILL_AMT,and PAY_AMT

In [51]:
# Generate column names
merge_cols = []
for comb_col in ['PAY', 'BILL_AMT', 'PAY_AMT']:
    comb_row = []
    for i in range(1,7):
        if comb_col == 'PAY':
            comb_row.append(comb_col + '_' + str(i))
        else:
            comb_row.append(comb_col + str(i))
    merge_cols.append(comb_row)

In [52]:
fresh_cols = []
for col in merge_cols:
    new_row = []
    for row in range(len(cc_default_db)):
        new_col = []
        for sub_col in col:
            new_col.append(cc_default_db[sub_col][row])
        new_row.append(new_col)
    fresh_cols.append(new_row)

In [53]:
for subcol in merge_cols:
    for c in subcol:
        cc_default_db.drop(c, axis=1, inplace=True)
cc_default_db['PAY'], cc_default_db['BILL_AMT'], cc_default_db['PAY_AMT'] = [fresh_cols[0], fresh_cols[1], fresh_cols[2]]

In [55]:
cc_default_db.head()

Unnamed: 0,ID,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,default.payment.next.month,PAY,BILL_AMT,PAY_AMT
0,1,20000.0,2,2,1,24,1,"[2, 2, -1, -1, -2, -2]","[3913.0, 3102.0, 689.0, 0.0, 0.0, 0.0]","[0.0, 689.0, 0.0, 0.0, 0.0, 0.0]"
1,2,120000.0,2,2,2,26,1,"[-1, 2, 0, 0, 0, 2]","[2682.0, 1725.0, 2682.0, 3272.0, 3455.0, 3261.0]","[0.0, 1000.0, 1000.0, 1000.0, 0.0, 2000.0]"
2,3,90000.0,2,2,2,34,0,"[0, 0, 0, 0, 0, 0]","[29239.0, 14027.0, 13559.0, 14331.0, 14948.0, ...","[1518.0, 1500.0, 1000.0, 1000.0, 1000.0, 5000.0]"
3,4,50000.0,2,2,1,37,0,"[0, 0, 0, 0, 0, 0]","[46990.0, 48233.0, 49291.0, 28314.0, 28959.0, ...","[2000.0, 2019.0, 1200.0, 1100.0, 1069.0, 1000.0]"
4,5,50000.0,1,2,1,57,0,"[-1, 0, -1, 0, 0, 0]","[8617.0, 5670.0, 35835.0, 20940.0, 19146.0, 19...","[2000.0, 36681.0, 10000.0, 9000.0, 689.0, 679.0]"
