# BVM library examples: single-dataset

In [1]:
import pandas
from bvmlib.bvm import BVM

## [Adult dataset](https://archive.ics.uci.edu/ml/datasets/Adult)

In [2]:
def load1():
    header = ['age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation',
              'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country',
              'classification']
    attributes = ['age', 'sex', 'race', 'native-country', 'marital-status', 'relationship', 'workclass',
                  'occupation', 'education-num']
    source = 'https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data'
    df = pandas.read_csv(source, names=header, usecols=attributes, low_memory=False)
    return df

In [3]:
df1 = load1()
display(df1)

Unnamed: 0,age,workclass,education-num,marital-status,occupation,relationship,race,sex,native-country
0,39,State-gov,13,Never-married,Adm-clerical,Not-in-family,White,Male,United-States
1,50,Self-emp-not-inc,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,United-States
2,38,Private,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,United-States
3,53,Private,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,United-States
4,28,Private,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,Cuba
...,...,...,...,...,...,...,...,...,...
32556,27,Private,12,Married-civ-spouse,Tech-support,Wife,White,Female,United-States
32557,40,Private,9,Married-civ-spouse,Machine-op-inspct,Husband,White,Male,United-States
32558,58,Private,9,Widowed,Adm-clerical,Unmarried,White,Female,United-States
32559,22,Private,9,Never-married,Adm-clerical,Own-child,White,Male,United-States


In [4]:
T1 = BVM(df1)
T1.qids(['age', 'sex', 'race', 'native-country', 'marital-status', 'workclass', 'occupation'])
T1.sensitive(['relationship', 'education-num'])

In [5]:
T1_results = T1.assess()

In [6]:
with pandas.option_context('display.max_rows', None, 'display.max_columns', None):
    display(T1_results['re_id'])
    display(T1_results['att_inf'])

Unnamed: 0,QID,dCR,pCR,Prior,Posterior,Histogram
0,"['age', 'sex', 'race', 'native-country', 'mari...",0.262738,12649,3.1e-05,0.388471,"{'0': 0.0, '1': 0.009182764657105125, '2': 0.0..."


Unnamed: 0,QID,Sensitive,dCA,pCA,Prior,Posterior,Histogram
0,"['age', 'sex', 'race', 'native-country', 'mari...",relationship,0.648014,2.176685,0.405178,0.881945,"{'0': 0.0, '1': 0.0, '2': 0.0, '3': 0.0, '4': ..."
1,"['age', 'sex', 'race', 'native-country', 'mari...",education-num,0.302693,2.008571,0.322502,0.647769,"{'0': 0.0, '1': 0.0, '2': 0.0, '3': 0.0, '4': ..."


## [US Census Data (1990) dataset](https://archive.ics.uci.edu/ml/datasets/US+Census+Data+%281990%29)

In [7]:
def load2():
    header = ['caseid', 'dAge', 'dAncstry1', 'dAncstry2', 'iAvail', 'iCitizen', 'iClass', 'dDepart', 'iDisabl1',
              'iDisabl2', 'iEnglish', 'iFeb55', 'iFertil', 'dHispanic', 'dHour89', 'dHours', 'iImmigr', 'dIncome1',
              'dIncome2', 'dIncome3', 'dIncome4', 'dIncome5', 'dIncome6', 'dIncome7', 'dIncome8', 'dIndustry',
              'iKorean', 'iLang1', 'iLooking', 'iMarital', 'iMay75880', 'iMeans', 'iMilitary', 'iMobility',
              'iMobillim', 'dOccup', 'iOthrserv', 'iPerscare', 'dPOB', 'dPoverty', 'dPwgt1', 'iRagechld',
              'dRearning', 'iRelat1', 'iRelat2', 'iRemplpar', 'iRiders', 'iRlabor', 'iRownchld', 'dRpincome',
              'iRPOB', 'iRrelchld', 'iRspouse', 'iRvetserv', 'iSchool', 'iSept80', 'iSex', 'iSubfam1', 'iSubfam2',
              'iTmpabsnt', 'dTravtime', 'iVietnam', 'dWeek89', 'iWork89', 'iWorklwk', 'iWWII', 'iYearsch',
              'iYearwrk', 'dYrsserv']
    attributes = ['dAge', 'dAncstry1', 'dAncstry2', 'iCitizen', 'iClass', 'iDisabl1', 'iDisabl2', 'iEnglish',
                  'iFertil', 'dHour89', 'iImmigr', 'dIncome1', 'dIncome2', 'dIncome3', 'dIncome4', 'dIncome5',
                  'dIncome6', 'dIncome7', 'dIncome8', 'dIndustry', 'iKorean', 'iLang1', 'iMarital', 'iMeans',
                  'dOccup', 'dPOB', 'dPoverty', 'iRagechld', 'dRearning', 'iSchool', 'iSex',
                  'iVietnam', 'iWWII', 'iYearsch']
    source = 'https://archive.ics.uci.edu/ml/machine-learning-databases/census1990-mld/USCensus1990.data.txt'
    df = pandas.read_csv(source, names=header, usecols=attributes, low_memory=False)
    return df

Description of selected attributes.
- 'dAge': Age
- 'dAncstry1': Ancestry 1
- 'dAncstry2': Ancestry 2
- 'iCitizen': Citizenship
- 'iClass': Class of Worker
- 'iDisabl1': Work Limitation Status
- 'iDisabl2': Work Prevented Status
- 'iEnglish': Ability to Speak English
- 'iFertil': Number of Children Ever Born
- 'dHour89': Usual Hours Worked Per Week In 1989
- 'iImmigr': Year of Entry
- 'dIncome1': Wages or Salary Income In 1989
- 'dIncome2': Nonfarm Self Employment Income In 1989
- 'dIncome3': Farm Self Employment Income In 1989
- 'dIncome4': Interests, Dividends, and Net Rental Income In 1989
- 'dIncome5': Social Security Income In 1989
- 'dIncome6': Public Assistance Income In 1989
- 'dIncome7': Retail Income In 1989
- 'dIncome8': All Other Income In 1989
- 'dIndustry': Industry
- 'iKorean': Served Korean Conflict
- 'iLang1': Language Other Than English At Home
- 'iMarital': Marital Status
- 'iMeans': Means of Transportation to Work
- 'dOccup': Occupation
- 'dPOB': Place of Birth
- 'dPoverty': Poverty Status
- 'iRagechld': Presence and Age of Own Chld
- 'dRearning': Total Personal Earnings
- 'iSchool': School Enrollment
- 'iSex': Sex
- 'iVietnam': Served Vietnam Conflict
- 'iWWII': Served World War II Conflict
- 'iYearsch': Education Attainment

In [8]:
df2 = load2()
display(df2)

Unnamed: 0,dAge,dAncstry1,dAncstry2,iCitizen,iClass,iDisabl1,iDisabl2,iEnglish,iFertil,dHour89,...,dOccup,dPOB,dPoverty,iRagechld,dRearning,iSchool,iSex,iVietnam,iWWII,iYearsch
0,dAge,dAncstry1,dAncstry2,iCitizen,iClass,iDisabl1,iDisabl2,iEnglish,iFertil,dHour89,...,dOccup,dPOB,dPoverty,iRagechld,dRearning,iSchool,iSex,iVietnam,iWWII,iYearsch
1,5,0,1,0,5,2,2,1,1,4,...,3,0,2,4,3,1,1,0,0,11
2,6,1,1,0,7,2,2,0,3,1,...,2,0,2,4,2,1,1,0,0,5
3,3,1,2,0,7,2,2,0,1,4,...,4,0,2,4,2,1,1,0,0,10
4,4,1,2,0,1,2,2,0,3,3,...,2,0,2,2,2,1,1,0,0,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2458281,7,1,2,0,0,2,2,0,0,0,...,0,0,2,0,0,1,0,0,1,7
2458282,1,1,2,0,0,0,0,0,0,0,...,0,0,2,4,0,2,1,0,0,4
2458283,3,3,1,0,1,2,2,1,0,3,...,5,0,2,0,4,1,0,0,0,11
2458284,6,0,1,0,1,2,2,0,1,2,...,2,0,2,4,3,1,1,0,0,10


In [9]:
T2 = BVM(df2)
T2.qids(['dAge', 'dAncstry1', 'dAncstry2', 'iClass', 'iEnglish', 'dHour89', 'iLang1', 'iMarital', 'iMeans',
         'dOccup', 'dPOB', 'iSex'])
T2.sensitive(['iCitizen', 'dRearning'])

In [10]:
T2_results = T2.assess()

In [11]:
with pandas.option_context('display.max_rows', None, 'display.max_columns', None):
    display(T2_results['re_id'])
    display(T2_results['att_inf'])

Unnamed: 0,QID,dCR,pCR,Prior,Posterior,Histogram
0,"['dAge', 'dAncstry1', 'dAncstry2', 'iClass', '...",0.06383,252598,0.0,0.102754,"{'0': 0.5601297001243957, '1': 0.1117465583744..."


Unnamed: 0,QID,Sensitive,dCA,pCA,Prior,Posterior,Histogram
0,"['dAge', 'dAncstry1', 'dAncstry2', 'iClass', '...",iCitizen,0.948912,1.079108,0.913131,0.985367,"{'0': 0.0, '1': 0.0, '2': 0.0, '3': 0.0, '4': ..."
1,"['dAge', 'dAncstry1', 'dAncstry2', 'iClass', '...",dRearning,0.477841,1.764961,0.470504,0.830422,"{'0': 0.0, '1': 0.0, '2': 0.0, '3': 0.0, '4': ..."


---