### 2 year monthly balance check
This dataset is made from using the [HK govt annual stats report](https://www.censtatd.gov.hk/hkstat/sub/sp140.jsp?productCode=B1010003), news articles and surveys regarding wages, poverty

In [1]:
import numpy as np

In [2]:
np.random.seed(123)

In [3]:
import pandas as pd

In [4]:
pd.set_option('display.max_columns', 100)

In [5]:
# creating 3 million unique id
id_length = 8
num_ids = 3000000
al_num = list('abcdefghijklmnopqrstuvwxyz012345679')
total_ids = np.random.choice(al_num, size=[num_ids, id_length])
ids = [ "".join(total_ids[i]) for i in range(len(total_ids))]

In [6]:
np.count_nonzero(np.unique(ids, return_counts=True)[1] ==1)

3000000

In [7]:
hsbc = pd.DataFrame(ids, columns=['customer_id'])

In [8]:
df = hsbc.copy()

In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3000000 entries, 0 to 2999999
Data columns (total 1 columns):
customer_id    object
dtypes: object(1)
memory usage: 22.9+ MB


In [10]:
# 15 - 18 age group
teens = np.random.choice([15,16,17,18], size=14000, p=[.15,.20,.30,.35])

In [11]:
np.random.shuffle(teens)

In [12]:
# 19 - 24
fresh_grad = np.random.choice([19,20,21,22,23,24], size=460000, p=[.05, .05, .10, .15, .30, .35 ])

In [13]:
# 65
s = np.random.normal(68, 8, 700000)

In [14]:
srs = s[(s > 65) & (s < 100)]

In [15]:
srs = np.round(srs)

In [16]:
# rest
rest = np.random.normal( 44, 5, 2100000)

In [17]:
mid = rest[(rest > 25) & (rest < 64)]

In [18]:
np.random.shuffle(mid)

In [19]:
working_class = np.random.choice(mid, size=2074628, replace=False)

In [20]:
# teens df
teens_df = pd.DataFrame(teens, columns=['age'])

In [21]:
teens_df['gender'] = pd.DataFrame(np.random.choice(['m','f'], size=14000, p=[.51,.49]))

In [23]:
teens_df['nationality'] = pd.DataFrame(np.random.choice(['hong kong', 'china', 'other asian', 'white'], size=14000, p=[.8,.10,.06,.04]))

In [28]:
teens_df['expat'] = teens_df['nationality'].apply(lambda x: 'no' if (x == 'hong kong') | (x == 'china') else 'yes')

In [29]:
teens_df['status'] = pd.DataFrame(np.random.choice(['single', 'married'], size=14000, p=[.98, .02]))

In [30]:
teens_df['account tier'] = pd.DataFrame(['personal'] * 14000, columns=['account tier'])

In [31]:
teens_df['employment'] = pd.DataFrame(np.random.choice(['full time', 'part time', 'no'], size=14000, p=[.108,.02, .872]), columns=['employment'])

In [33]:
def tj(col):
    """
    joint account function for teens
    """
    age = col[0]
    emp = col[1]
    
    if age < 18:
        return 'yes'
    elif age == 18:
        if emp == 'no':
            return np.random.choice(['no', 'yes'], p = [.3,.7])
        else:
            return np.random.choice(['no', 'yes'], p = [.6,.4])
    else:
        return yes
        

In [34]:
teens_df['joint'] = teens_df[['age', 'employment']].apply(tj, axis=1)

In [38]:
def tji(col):
    """
    job industry for teen
    """
    if col == 'no':
        return 'unemployed'
    else:
        return np.random.choice(['retail', 'service', 'education'], p=[.45, .45, .1])

In [39]:
teens_df['industry'] = teens_df['employment'].apply(tji)

In [37]:
teens_df['education'] = pd.DataFrame(np.random.choice(['secondary', 'primary'], size=14000, p=[.975, .025]))

In [40]:
def teens_position(col):
    """
    job position for teens
    """
    if col == 'no':
        return 'unemployed'
    else:
        return 'entry-level'

In [41]:
teens_df['job position'] = teens_df['employment'].apply(teens_position)

In [42]:
def teen_salary(col):
    """
    job salary for teens
    """
    if col != 'no':
        return np.round(np.random.normal(4428, 500),2)
    else:
        return 0

In [43]:
teens_df['salary'] = teens_df['employment'].apply(teen_salary)

In [44]:
def teen_deposit(col):
    """
    deposits for teens
    """
    if col != 0:
        return col + np.random.choice([2000, 3000, 4000, 5000], p=[.4, .25, .2, .15])
    else:
        return np.random.choice([3000,4000,5000], p=[.3,.5,.2])

In [45]:
teens_df['monthly deposit avg'] = teens_df['salary'].apply(teen_deposit)

In [47]:
def teen_expense(col):
    """
    expenses for teens
    """
    sal = col[0]
    dep = col[1]
    if sal != 0:
        return np.round(dep*np.random.choice([.65,.80,.9], p=[.55,.3,.15]),2)
    else:
        return np.round(dep*np.random.choice([.65,.40,.25], p=[.45,.35,.2 ]),2)

In [48]:
teens_df['monthly expense avg'] = teens_df[['salary', 'monthly deposit avg']].apply(teen_expense, axis=1)

In [49]:
teens_df['credit card'] = pd.DataFrame(np.random.choice(['yes', 'no'], size=14000, p=[.15,.85]))

In [50]:
teens_df['debit card'] = pd.DataFrame(np.random.choice(['yes', 'no'], size=14000, p=[.9, .1]))

In [51]:
def teens_ce(col):
    """
    teens credit card expense
    """
    mea = col[0]
    cc = col[1]
    if cc == 'no':
        return 0
    else:
        return np.round(mea * np.random.choice([.25,.35,.45], p=[.20,.45,.35]),2)


In [52]:
teens_df['credit card expense'] = teens_df[['monthly expense avg', 'credit card']].apply(teens_ce, axis=1)

In [53]:
teens_df['payme'] = pd.DataFrame(np.random.choice(['yes','no'], size=14000, p=[.8,.2]))

In [54]:
def teen_pmc(col):
    """
    teens payme deposit
    """
    if col == 'yes':
        return np.round(np.random.normal(300,100),2)
    else:
        return 0

In [55]:
teens_df['payme deposit'] = teens_df['payme'].apply(teen_pmc)

In [56]:
def teen_pme(col):
    """
    teens payme expense
    """
    if col == 'no':
        return 0
    else:
        return np.round(np.random.normal(500,100))

In [57]:
teens_df['payme expense'] = teens_df['payme'].apply(teen_pme)

In [59]:
teens_df['feb 10, 2017'] = pd.DataFrame(np.round(np.random.normal(8000, 1000, 14000),2))

In [60]:
teens_df.shape

(14000, 21)

In [61]:
teens_df.head(10)

Unnamed: 0,age,gender,nationality,expat,status,account tier,employment,joint,industry,education,job position,salary,monthly deposit avg,monthly expense avg,credit card,debit card,credit card expense,payme,payme deposit,payme expense,"feb 10, 2017"
0,16,m,hong kong,no,single,personal,no,yes,unemployed,secondary,unemployed,0.0,4000.0,2600.0,no,yes,0.0,yes,296.38,502.0,7106.07
1,17,f,china,no,single,personal,full time,yes,retail,secondary,entry-level,5157.22,7157.22,5725.78,no,yes,0.0,yes,271.17,416.0,9540.45
2,17,f,hong kong,no,single,personal,no,yes,unemployed,secondary,unemployed,0.0,4000.0,1600.0,no,yes,0.0,yes,269.6,607.0,7850.73
3,18,f,hong kong,no,single,personal,no,yes,unemployed,secondary,unemployed,0.0,4000.0,1600.0,yes,yes,560.0,yes,140.67,412.0,8723.34
4,15,f,hong kong,no,single,personal,no,yes,unemployed,secondary,unemployed,0.0,3000.0,1200.0,no,yes,0.0,no,0.0,0.0,7578.45
5,15,f,hong kong,no,single,personal,no,yes,unemployed,secondary,unemployed,0.0,4000.0,2600.0,no,yes,0.0,yes,278.44,415.0,7805.15
6,17,f,hong kong,no,single,personal,full time,yes,service,secondary,entry-level,5138.9,8138.9,7325.01,no,yes,0.0,yes,343.32,615.0,8864.81
7,18,m,hong kong,no,single,personal,no,no,unemployed,secondary,unemployed,0.0,5000.0,3250.0,no,yes,0.0,yes,439.48,558.0,8324.68
8,18,f,hong kong,no,single,personal,no,yes,unemployed,secondary,unemployed,0.0,5000.0,3250.0,no,yes,0.0,yes,260.09,528.0,7944.2
9,17,m,china,no,single,personal,no,yes,unemployed,secondary,unemployed,0.0,5000.0,3250.0,no,yes,0.0,no,0.0,0.0,9014.33


In [62]:
def second_check(col):
    """
    second monthly balance check
    """
    mda = col[0]
    pd = col[1]
    cm = col[2]
    return np.round((mda+pd+cm)*np.random.choice([.85,1.05], p=[.45,.55]),2)

In [63]:
teens_df['feb 25, 2017'] = teens_df[['monthly deposit avg', 'payme deposit', 'feb 10, 2017']].apply(second_check,axis=1)

In [64]:
def end_month(col):
    """
    new month balance check
    """
    me = col[0]
    ce = col[1]
    pe = col[2]
    em = col[3]
    return np.round((em-me-ce-pe)*np.random.choice([.85,1.05], p=[.45,.55]),2)

In [65]:
teens_df['mar 10, 2017'] = teens_df[['monthly expense avg', 'credit card expense','payme expense', 'feb 25, 2017']].apply(end_month,axis=1)

In [66]:
teens_df['mar 25, 2017'] = teens_df[['monthly deposit avg', 'payme deposit', 'mar 10, 2017']].apply(second_check,axis=1)

In [67]:
teens_df['apr 10, 2017'] = teens_df[['monthly expense avg', 'credit card expense','payme expense', 'mar 25, 2017']].apply(end_month,axis=1)

In [68]:
teens_df['apr 25, 2017'] = teens_df[['monthly deposit avg', 'payme deposit', 'apr 10, 2017']].apply(second_check,axis=1)

In [69]:
teens_df['may 10, 2017'] = teens_df[['monthly expense avg', 'credit card expense','payme expense', 'apr 25, 2017']].apply(end_month,axis=1)

In [70]:
teens_df['may 25, 2017'] = teens_df[['monthly deposit avg', 'payme deposit', 'may 10, 2017']].apply(second_check,axis=1)

In [71]:
teens_df['jun 10, 2017'] = teens_df[['monthly expense avg', 'credit card expense','payme expense', 'may 25, 2017']].apply(end_month,axis=1)

In [72]:
teens_df['jun 25, 2017'] = teens_df[['monthly deposit avg', 'payme deposit', 'jun 10, 2017']].apply(second_check,axis=1)

In [73]:
teens_df['jul 10, 2017'] = teens_df[['monthly expense avg', 'credit card expense','payme expense', 'jun 25, 2017']].apply(end_month,axis=1)

In [74]:
teens_df['jul 25, 2017'] = teens_df[['monthly deposit avg', 'payme deposit', 'jul 10, 2017']].apply(second_check,axis=1)

In [75]:
teens_df['aug 10, 2017'] = teens_df[['monthly expense avg', 'credit card expense','payme expense', 'jul 25, 2017']].apply(end_month,axis=1)

In [76]:
teens_df['aug 25, 2017'] = teens_df[['monthly deposit avg', 'payme deposit', 'aug 10, 2017']].apply(second_check,axis=1)

In [77]:
teens_df['sep 10, 2017'] = teens_df[['monthly expense avg', 'credit card expense','payme expense', 'aug 25, 2017']].apply(end_month,axis=1)

In [78]:
teens_df['sep 25, 2017'] = teens_df[['monthly deposit avg', 'payme deposit', 'sep 10, 2017']].apply(second_check,axis=1)

In [79]:
teens_df['oct 10, 2017'] = teens_df[['monthly expense avg', 'credit card expense','payme expense', 'sep 25, 2017']].apply(end_month,axis=1)

In [80]:
teens_df['oct 25, 2017'] = teens_df[['monthly deposit avg', 'payme deposit', 'oct 10, 2017']].apply(second_check,axis=1)

In [81]:
teens_df['nov 10, 2017'] = teens_df[['monthly expense avg', 'credit card expense','payme expense', 'oct 25, 2017']].apply(end_month,axis=1)

In [82]:
teens_df['nov 25, 2017'] = teens_df[['monthly deposit avg', 'payme deposit', 'nov 10, 2017']].apply(second_check,axis=1)

In [83]:
teens_df['dec 10, 2017'] = teens_df[['monthly expense avg', 'credit card expense','payme expense', 'nov 25, 2017']].apply(end_month,axis=1)

In [84]:
teens_df['dec 25, 2017'] = teens_df[['monthly deposit avg', 'payme deposit', 'dec 10, 2017']].apply(second_check,axis=1)

In [85]:
teens_df['jan 10, 2018'] = teens_df[['monthly expense avg', 'credit card expense','payme expense', 'dec 25, 2017']].apply(end_month,axis=1)

In [86]:
teens_df['jan 25, 2018'] = teens_df[['monthly deposit avg', 'payme deposit', 'jan 10, 2018']].apply(second_check,axis=1)

In [87]:
teens_df['feb 10, 2018'] = teens_df[['monthly expense avg', 'credit card expense','payme expense', 'jan 25, 2018']].apply(end_month,axis=1)

In [88]:
teens_df['feb 25, 2018'] = teens_df[['monthly deposit avg', 'payme deposit', 'feb 10, 2018']].apply(second_check,axis=1)

In [89]:
teens_df['mar 10, 2018'] = teens_df[['monthly expense avg', 'credit card expense','payme expense', 'feb 25, 2018']].apply(end_month,axis=1)

In [90]:
teens_df['mar 25, 2018'] = teens_df[['monthly deposit avg', 'payme deposit', 'mar 10, 2018']].apply(second_check,axis=1)

In [91]:
teens_df['apr 10, 2018'] = teens_df[['monthly expense avg', 'credit card expense','payme expense', 'mar 25, 2018']].apply(end_month,axis=1)

In [92]:
teens_df['apr 25, 2018'] = teens_df[['monthly deposit avg', 'payme deposit', 'apr 10, 2018']].apply(second_check,axis=1)

In [93]:
teens_df['may 10, 2018'] = teens_df[['monthly expense avg', 'credit card expense','payme expense', 'apr 25, 2018']].apply(end_month,axis=1)

In [94]:
teens_df['may 25, 2018'] = teens_df[['monthly deposit avg', 'payme deposit', 'may 10, 2018']].apply(second_check,axis=1)

In [95]:
teens_df['jun 10, 2018'] = teens_df[['monthly expense avg', 'credit card expense','payme expense', 'may 25, 2018']].apply(end_month,axis=1)

In [96]:
teens_df['jun 25, 2018'] = teens_df[['monthly deposit avg', 'payme deposit', 'jun 10, 2018']].apply(second_check,axis=1)

In [97]:
teens_df['jul 10, 2018'] = teens_df[['monthly expense avg', 'credit card expense','payme expense', 'jun 25, 2018']].apply(end_month,axis=1)

In [98]:
teens_df['jul 25, 2018'] = teens_df[['monthly deposit avg', 'payme deposit', 'jul 10, 2018']].apply(second_check,axis=1)

In [99]:
teens_df['aug 10, 2018'] = teens_df[['monthly expense avg', 'credit card expense','payme expense', 'jul 25, 2018']].apply(end_month,axis=1)

In [100]:
teens_df['aug 25, 2018'] = teens_df[['monthly deposit avg', 'payme deposit', 'aug 10, 2018']].apply(second_check,axis=1)

In [101]:
teens_df['sep 10, 2018'] = teens_df[['monthly expense avg', 'credit card expense','payme expense', 'aug 25, 2018']].apply(end_month,axis=1)

In [102]:
teens_df['sep 25, 2018'] = teens_df[['monthly deposit avg', 'payme deposit', 'sep 10, 2018']].apply(second_check,axis=1)

In [103]:
teens_df['oct 10, 2018'] = teens_df[['monthly expense avg', 'credit card expense','payme expense', 'sep 25, 2018']].apply(end_month,axis=1)

In [104]:
teens_df['oct 25, 2018'] = teens_df[['monthly deposit avg', 'payme deposit', 'oct 10, 2018']].apply(second_check,axis=1)

In [105]:
teens_df['nov 10, 2018'] = teens_df[['monthly expense avg', 'credit card expense','payme expense', 'oct 25, 2018']].apply(end_month,axis=1)

In [106]:
teens_df['nov 25, 2018'] = teens_df[['monthly deposit avg', 'payme deposit', 'nov 10, 2018']].apply(second_check,axis=1)

In [107]:
teens_df['dec 10, 2018'] = teens_df[['monthly expense avg', 'credit card expense','payme expense', 'nov 25, 2018']].apply(end_month,axis=1)

In [108]:
teens_df['dec 25, 2018'] = teens_df[['monthly deposit avg', 'payme deposit', 'dec 10, 2018']].apply(second_check,axis=1)

In [109]:
teens_df['jan 10, 2019'] = teens_df[['monthly expense avg', 'credit card expense','payme expense', 'dec 25, 2018']].apply(end_month,axis=1)

In [110]:
teens_df['jan 25, 2019'] = teens_df[['monthly deposit avg', 'payme deposit', 'jan 10, 2019']].apply(second_check,axis=1)

In [111]:
teens_df['feb 10, 2019'] = teens_df[['monthly expense avg', 'credit card expense','payme expense', 'jan 25, 2019']].apply(end_month,axis=1)

In [112]:
teens_df.shape

(14000, 69)

In [113]:
teens_df.head()

Unnamed: 0,age,gender,nationality,expat,status,account tier,employment,joint,industry,education,job position,salary,monthly deposit avg,monthly expense avg,credit card,debit card,credit card expense,payme,payme deposit,payme expense,"feb 10, 2017","feb 25, 2017","mar 10, 2017","mar 25, 2017","apr 10, 2017","apr 25, 2017","may 10, 2017","may 25, 2017","jun 10, 2017","jun 25, 2017","jul 10, 2017","jul 25, 2017","aug 10, 2017","aug 25, 2017","sep 10, 2017","sep 25, 2017","oct 10, 2017","oct 25, 2017","nov 10, 2017","nov 25, 2017","dec 10, 2017","dec 25, 2017","jan 10, 2018","jan 25, 2018","feb 10, 2018","feb 25, 2018","mar 10, 2018","mar 25, 2018","apr 10, 2018","apr 25, 2018","may 10, 2018","may 25, 2018","jun 10, 2018","jun 25, 2018","jul 10, 2018","jul 25, 2018","aug 10, 2018","aug 25, 2018","sep 10, 2018","sep 25, 2018","oct 10, 2018","oct 25, 2018","nov 10, 2018","nov 25, 2018","dec 10, 2018","dec 25, 2018","jan 10, 2019","jan 25, 2019","feb 10, 2019"
0,16,m,hong kong,no,single,personal,no,yes,unemployed,secondary,unemployed,0.0,4000.0,2600.0,no,yes,0.0,yes,296.38,502.0,7106.07,11972.57,9314.1,14291.0,9510.65,14497.38,11965.15,13822.3,9112.25,14079.06,9330.5,14308.22,9525.29,14512.75,11981.29,13836.02,11270.72,13232.03,8610.53,10970.87,6688.54,11534.17,8853.78,11177.64,8479.42,13414.59,8765.7,13715.18,11143.84,13124.19,8518.86,13456.0,8800.9,13752.14,11182.65,16252.98,11178.33,13153.5,8543.78,13482.17,10899.18,15955.34,10925.34,12938.46,10328.28,12430.96,9795.41,11978.02,9319.82
1,17,f,china,no,single,personal,full time,yes,retail,secondary,entry-level,5157.22,7157.22,5725.78,no,yes,0.0,yes,271.17,416.0,9540.45,17817.28,12259.28,20672.05,15256.78,23819.43,15026.0,23577.11,14820.03,23360.84,14636.2,18754.9,10721.15,15427.11,7892.53,16086.97,8453.41,13499.53,6254.09,14366.6,8636.06,13654.78,7888.65,13019.48,7221.58,15382.47,7854.59,12990.53,7191.19,15350.56,7827.46,12967.47,7166.97,15325.13,7805.85,15995.95,8376.04,13433.77,6198.19,11582.59,5712.85,11170.05,5279.68,10801.86,3961.07,9681.04,3008.37,8871.25,2320.05
2,17,f,hong kong,no,single,personal,no,yes,unemployed,secondary,unemployed,0.0,4000.0,1600.0,no,yes,0.0,yes,269.6,607.0,7850.73,12726.35,8941.45,11229.39,9473.51,14430.27,12834.43,17959.23,16539.84,17688.02,16255.07,21550.9,16442.32,17605.13,13088.41,18225.91,16819.86,22143.93,16946.39,22276.79,21073.28,21541.45,20301.17,25799.31,20053.46,20674.6,19390.98,20111.49,15218.82,20462.84,15517.46,16819.0,12420.2,17524.29,16083.15,17299.84,12828.91,17953.44,13384.47,15005.96,10879.12,12876.41,11202.88,16246.1,14741.06,16159.06,14649.66,19865.22,15009.49
3,18,f,hong kong,no,single,personal,no,yes,unemployed,secondary,unemployed,0.0,4000.0,1600.0,yes,yes,560.0,yes,140.67,412.0,8723.34,10934.41,8780.53,10983.02,7149.37,11854.54,9746.67,14581.71,12610.2,17588.41,15767.23,20903.3,15581.6,20708.38,15415.92,20534.42,15268.06,20379.17,15136.09,16385.25,11741.26,13499.64,11474.02,13272.49,9095.42,11250.68,7376.88,12093.43,9997.5,14845.08,10432.12,15301.43,10820.02,15708.72,13793.56,15244.1,10771.28,12675.16,10608.32,15486.44,10977.27,12850.25,8736.51,13521.04,9306.68,11430.25,7529.51,12253.69,10165.77
4,15,f,hong kong,no,single,personal,no,yes,unemployed,secondary,unemployed,0.0,3000.0,1200.0,no,yes,0.0,no,0.0,0.0,7578.45,11107.37,8421.26,9708.07,7231.86,10743.45,8111.93,11667.53,8897.4,12492.27,9598.43,13228.35,12629.77,13285.3,12689.56,16474.04,16037.74,16182.08,15731.18,19667.74,15697.58,19632.46,15667.59,19600.97,19321.02,18972.87,15106.94,15390.9,14900.44,15215.37,11913.06,12676.1,12049.9,15802.4,15332.52,15582.64,15101.77,19006.86,18697.2,18442.62,14656.23,15007.8,11736.63,15473.46,12132.44,15889.06,15423.51,15659.98,15182.98


In [114]:
teens_df.to_csv('teens_hsbc.csv', index=False)

##### Young Adults

In [115]:
len(fresh_grad)

460000

In [116]:
young_adults = pd.DataFrame(fresh_grad, columns=['age'])

In [117]:
young_adults['gender'] = pd.DataFrame(np.random.choice(['m', 'f'], size=460000, p=[.48, .52]))

In [118]:
young_adults['nationality'] = pd.DataFrame(np.random.choice(['hong kong', 'china', 'other asian', 'white'], size=460000, p=[.75,.15,.05,.05]))

In [119]:
young_adults['expat'] = young_adults['nationality'].apply(lambda x: 'no' if (x=='hong kong') | (x=='china') else 'yes')

In [120]:
young_adults['status'] = pd.DataFrame(np.random.choice(['single','married','divorce'], size=460000, p=[.93,.05,.02]))

In [121]:
young_adults['account tier'] = pd.DataFrame(np.random.choice(['personal', 'advance','premier'],size=460000, p=[.94,.05,.01]))

In [122]:
young_adults['employment'] = pd.DataFrame(np.random.choice(['no','full time', 'part time'],size=460000, p=[.65,.1,.25]))

In [123]:
young_adults['joint'] = pd.DataFrame(np.random.choice(['no', 'yes'], size=460000, p=[.85, .15]))

In [124]:
def ya_work(col):
    """
    young adult industry
    """
    if col == 'no':
        return 'unemployed'
    else:
        return np.random.choice(['retail', 'service', 'education','manufacturing', 'other', 'construction', 'transportation'], p=[.25,.25,.3,.025,.05,.025,.1])

In [125]:
young_adults['industry'] = young_adults['employment'].apply(ya_work)

In [126]:
young_adults['education'] = pd.DataFrame(np.random.choice(['primary','secondary','tertiary'], size=460000, p=[.1,.65,.25]))

In [127]:
def ya_position(col):
    """
    young adult job position
    """
    if col == 'no':
        return 'unemployed'
    else:
        return np.random.choice(['entry-level','junior', 'associate'], p=[.75,.2,.05])

In [128]:
young_adults['job position'] = young_adults['employment'].apply(ya_position)

In [129]:
def ya_salary(col):
    """
    young adult salary
    """
    if col == 'unemployed':
        return 0
    elif col == 'part time':
        return np.round(np.random.normal(7000,500),2)
    else:
        return np.round(np.random.normal(14400,3000),2)

In [130]:
young_adults['salary'] = young_adults['job position'].apply(ya_salary)

In [131]:
def ya_deposit(col):
    """
    young adult deposits
    """
    if col != 0:
        return col*np.random.choice([.5,.66,.75,.85,1], p=[.15,.25,.35,.15,.1])
    else:
        return np.random.choice([3000,4000,5000], p=[.2,.5,.3])

In [132]:
young_adults['monthly deposit avg'] = young_adults['salary'].apply(ya_deposit)

In [133]:
def ya_expense(col):
    """
    young adult expenses
    """
    sal = col[0]
    dep = col[1]
    if sal != 0:
        return dep* np.random.choice([.6,.45,.35,.2], p=[.4,.3,.2,.1])
    else:
        return dep* np.random.choice([.5,.35,.2], p=[.6,.25,.15])

In [134]:
young_adults['monthly expense avg'] = young_adults[['salary', 'monthly deposit avg']].apply(ya_expense, axis=1)

In [135]:
def ya_cc(col):
    """
    young adults credit card
    """
    if col > 12000:
        return np.random.choice(['yes', 'no'], p=[.8,.2])
    else:
        return np.random.choice(['yes', 'no'], p=[.4,.6])

In [136]:
young_adults['credit card'] = young_adults['salary'].apply(ya_cc)

In [137]:
young_adults['debit card'] = pd.DataFrame(np.random.choice(['yes','no'], size=460000, p=[.9,.1]))

In [138]:
def ya_ccexp(col):
    """
    young adult credit card expense
    """
    cc = col[0]
    exp = col[1]
    if cc == 'yes':
        return np.round(exp*np.random.choice([.55, .45,.25,.15], p=[.45,.25,.15,.15]),2)
    else:
        return 0

In [139]:
young_adults['credit card expense'] = young_adults[['credit card', 'monthly expense avg']].apply(ya_ccexp,axis=1)

In [140]:
young_adults['payme'] = pd.DataFrame(np.random.choice(['no', 'yes'], size=460000, p=[.8,.2]))

In [141]:
young_adults['payme deposit'] = young_adults['payme'].apply(lambda x: np.round(np.random.normal(400,100),2) if x=='yes' else 0)

In [142]:
young_adults['payme expense'] = young_adults['payme'].apply(lambda x: np.round(np.random.normal(600,100),2) if x =='yes' else 0)

In [143]:
def ya_start(col):
    """
    young adult start month check
    """
    if col == 'personal':
        return np.random.choice([np.round(np.random.normal(11000, 1000),2), np.round(np.random.normal(14000,1500),2), np.round(np.random.normal(18000,2000),2)], p=[.3,.5,.2])
    elif col == 'advance':
        return np.random.choice([np.round(np.random.normal(200000, 5000), 2), np.round(np.random.normal(190000, 10000), 2)], p=[.7,.3])
    else:
        return np.round(np.random.normal(1000000, 50000),2)

In [144]:
young_adults['feb 10, 2017'] = young_adults['account tier'].apply(ya_start)

In [145]:
young_adults['feb 25, 2017'] = young_adults[['monthly deposit avg', 'payme deposit', 'feb 10, 2017']].apply(second_check, axis=1)

In [146]:
def ya_end_month(col):
    """
    young adult new month check
    """
    me = col[0]
    ce = col[1]
    pe = col[2]
    em = col[3]
    return np.round((em-me-ce-pe)*np.random.choice([.80,.95, 1.1], p=[.3,.45,.25]),2)

In [147]:
young_adults['mar 10, 2017'] = young_adults[['monthly expense avg', 'credit card expense','payme expense', 'feb 25, 2017']].apply(ya_end_month, axis=1)

In [148]:
young_adults['mar 25, 2017'] = young_adults[['monthly deposit avg', 'payme deposit', 'mar 10, 2017']].apply(second_check, axis=1)

In [149]:
young_adults['apr 10, 2017'] = young_adults[['monthly expense avg', 'credit card expense','payme expense', 'mar 25, 2017']].apply(ya_end_month, axis=1)

In [150]:
young_adults['apr 25, 2017'] = young_adults[['monthly deposit avg', 'payme deposit', 'apr 10, 2017']].apply(second_check, axis=1)

In [152]:
young_adults['may 10, 2017'] = young_adults[['monthly expense avg', 'credit card expense','payme expense', 'apr 25, 2017']].apply(ya_end_month, axis=1)

In [153]:
young_adults['may 25, 2017'] = young_adults[['monthly deposit avg', 'payme deposit', 'may 10, 2017']].apply(second_check, axis=1)

In [154]:
young_adults['jun 10, 2017'] = young_adults[['monthly expense avg', 'credit card expense','payme expense', 'may 25, 2017']].apply(ya_end_month, axis=1)

In [155]:
young_adults['jun 25, 2017'] = young_adults[['monthly deposit avg', 'payme deposit', 'jun 10, 2017']].apply(second_check, axis=1)

In [156]:
young_adults['jul 10, 2017'] = young_adults[['monthly expense avg', 'credit card expense','payme expense', 'jun 25, 2017']].apply(ya_end_month, axis=1)

In [157]:
young_adults['jul 25, 2017'] = young_adults[['monthly deposit avg', 'payme deposit', 'jul 10, 2017']].apply(second_check, axis=1)

In [158]:
young_adults['aug 10, 2017'] = young_adults[['monthly expense avg', 'credit card expense','payme expense', 'jul 25, 2017']].apply(ya_end_month, axis=1)

In [159]:
young_adults['aug 25, 2017'] = young_adults[['monthly deposit avg', 'payme deposit', 'aug 10, 2017']].apply(second_check, axis=1)

In [160]:
young_adults['sep 10, 2017'] = young_adults[['monthly expense avg', 'credit card expense','payme expense', 'aug 25, 2017']].apply(ya_end_month, axis=1)

In [161]:
young_adults['sep 25, 2017'] = young_adults[['monthly deposit avg', 'payme deposit', 'sep 10, 2017']].apply(second_check, axis=1)

In [162]:
young_adults['oct 10, 2017'] = young_adults[['monthly expense avg', 'credit card expense','payme expense', 'sep 25, 2017']].apply(ya_end_month, axis=1)

In [163]:
young_adults['oct 25, 2017'] = young_adults[['monthly deposit avg', 'payme deposit', 'oct 10, 2017']].apply(second_check, axis=1)

In [164]:
young_adults['nov 10, 2017'] = young_adults[['monthly expense avg', 'credit card expense','payme expense', 'oct 25, 2017']].apply(ya_end_month, axis=1)

In [165]:
young_adults['nov 25, 2017'] = young_adults[['monthly deposit avg', 'payme deposit', 'nov 10, 2017']].apply(second_check, axis=1)

In [166]:
young_adults['dec 10, 2017'] = young_adults[['monthly expense avg', 'credit card expense','payme expense', 'nov 25, 2017']].apply(ya_end_month, axis=1)

In [167]:
young_adults['dec 25, 2017'] = young_adults[['monthly deposit avg', 'payme deposit', 'dec 10, 2017']].apply(second_check, axis=1)

In [168]:
young_adults['jan 10, 2018'] = young_adults[['monthly expense avg', 'credit card expense','payme expense', 'dec 25, 2017']].apply(ya_end_month, axis=1)

In [169]:
young_adults['jan 25, 2018'] = young_adults[['monthly deposit avg', 'payme deposit', 'jan 10, 2018']].apply(second_check, axis=1)

In [170]:
young_adults['feb 10, 2018'] = young_adults[['monthly expense avg', 'credit card expense','payme expense', 'jan 25, 2018']].apply(ya_end_month, axis=1)

In [171]:
young_adults['feb 25, 2018'] = young_adults[['monthly deposit avg', 'payme deposit', 'feb 10, 2018']].apply(second_check, axis=1)

In [172]:
young_adults['mar 10, 2018'] = young_adults[['monthly expense avg', 'credit card expense','payme expense', 'feb 25, 2018']].apply(ya_end_month, axis=1)

In [173]:
young_adults['mar 25, 2018'] = young_adults[['monthly deposit avg', 'payme deposit', 'mar 10, 2018']].apply(second_check, axis=1)

In [174]:
young_adults['apr 10, 2018'] = young_adults[['monthly expense avg', 'credit card expense','payme expense', 'mar 25, 2018']].apply(ya_end_month, axis=1)

In [175]:
young_adults['apr 25, 2018'] = young_adults[['monthly deposit avg', 'payme deposit', 'apr 10, 2018']].apply(second_check, axis=1)

In [176]:
young_adults['may 10, 2018'] = young_adults[['monthly expense avg', 'credit card expense','payme expense', 'apr 25, 2018']].apply(ya_end_month, axis=1)

In [177]:
young_adults['may 25, 2018'] = young_adults[['monthly deposit avg', 'payme deposit', 'may 10, 2018']].apply(second_check, axis=1)

In [178]:
young_adults['jun 10, 2018'] = young_adults[['monthly expense avg', 'credit card expense','payme expense', 'may 25, 2018']].apply(ya_end_month, axis=1)

In [179]:
young_adults['jun 25, 2018'] = young_adults[['monthly deposit avg', 'payme deposit', 'jun 10, 2018']].apply(second_check, axis=1)

In [180]:
young_adults['jul 10, 2018'] = young_adults[['monthly expense avg', 'credit card expense','payme expense', 'jun 25, 2018']].apply(ya_end_month, axis=1)

In [181]:
young_adults['jul 25, 2018'] = young_adults[['monthly deposit avg', 'payme deposit', 'jul 10, 2018']].apply(second_check, axis=1)

In [182]:
young_adults['aug 10, 2018'] = young_adults[['monthly expense avg', 'credit card expense','payme expense', 'jul 25, 2018']].apply(ya_end_month, axis=1)

In [183]:
young_adults['aug 25, 2018'] = young_adults[['monthly deposit avg', 'payme deposit', 'aug 10, 2018']].apply(second_check, axis=1)

In [184]:
young_adults['sep 10, 2018'] = young_adults[['monthly expense avg', 'credit card expense','payme expense', 'aug 25, 2018']].apply(ya_end_month, axis=1)

In [185]:
young_adults['sep 25, 2018'] = young_adults[['monthly deposit avg', 'payme deposit', 'sep 10, 2018']].apply(second_check, axis=1)

In [186]:
young_adults['oct 10, 2018'] = young_adults[['monthly expense avg', 'credit card expense','payme expense', 'sep 25, 2018']].apply(ya_end_month, axis=1)

In [187]:
young_adults['oct 25, 2018'] = young_adults[['monthly deposit avg', 'payme deposit', 'oct 10, 2018']].apply(second_check, axis=1)

In [188]:
young_adults['nov 10, 2018'] = young_adults[['monthly expense avg', 'credit card expense','payme expense', 'oct 25, 2018']].apply(ya_end_month, axis=1)

In [189]:
young_adults['nov 25, 2018'] = young_adults[['monthly deposit avg', 'payme deposit', 'nov 10, 2018']].apply(second_check, axis=1)

In [190]:
young_adults['dec 10, 2018'] = young_adults[['monthly expense avg', 'credit card expense','payme expense', 'nov 25, 2018']].apply(ya_end_month, axis=1)

In [191]:
young_adults['dec 25, 2018'] = young_adults[['monthly deposit avg', 'payme deposit', 'dec 10, 2018']].apply(second_check, axis=1)

In [192]:
young_adults['jan 10, 2019'] = young_adults[['monthly expense avg', 'credit card expense','payme expense', 'dec 25, 2018']].apply(ya_end_month, axis=1)

In [193]:
young_adults['jan 25, 2019'] = young_adults[['monthly deposit avg', 'payme deposit', 'jan 10, 2019']].apply(second_check, axis=1)

In [194]:
young_adults['feb 10, 2019'] = young_adults[['monthly expense avg', 'credit card expense','payme expense', 'jan 25, 2019']].apply(ya_end_month, axis=1)

In [195]:
young_adults.shape

(460000, 69)

In [197]:
young_adults.to_csv('ya.csv',index=False)

In [196]:
young_adults.head()

Unnamed: 0,age,gender,nationality,expat,status,account tier,employment,joint,industry,education,job position,salary,monthly deposit avg,monthly expense avg,credit card,debit card,credit card expense,payme,payme deposit,payme expense,"feb 10, 2017","feb 25, 2017","mar 10, 2017","mar 25, 2017","apr 10, 2017","apr 25, 2017","may 10, 2017","may 25, 2017","jun 10, 2017","jun 25, 2017","jul 10, 2017","jul 25, 2017","aug 10, 2017","aug 25, 2017","sep 10, 2017","sep 25, 2017","oct 10, 2017","oct 25, 2017","nov 10, 2017","nov 25, 2017","dec 10, 2017","dec 25, 2017","jan 10, 2018","jan 25, 2018","feb 10, 2018","feb 25, 2018","mar 10, 2018","mar 25, 2018","apr 10, 2018","apr 25, 2018","may 10, 2018","may 25, 2018","jun 10, 2018","jun 25, 2018","jul 10, 2018","jul 25, 2018","aug 10, 2018","aug 25, 2018","sep 10, 2018","sep 25, 2018","oct 10, 2018","oct 25, 2018","nov 10, 2018","nov 25, 2018","dec 10, 2018","dec 25, 2018","jan 10, 2019","jan 25, 2019","feb 10, 2019"
0,23,m,hong kong,no,single,personal,part time,yes,education,secondary,entry-level,11874.98,10093.733,2018.7466,no,yes,0.0,no,0.0,0.0,16071.23,22240.22,22243.62,33954.22,25548.38,37424.22,33635.2,45915.38,48286.3,61299.03,65208.31,64006.74,58888.59,72431.44,66892.06,65437.92,69761.09,83847.56,65463.05,79334.62,73450.08,71012.24,65543.82,79419.43,73530.65,71080.73,65608.88,64347.22,49862.78,62954.34,57888.81,71381.67,76299.22,73434.01,67844.5,81835.14,87798.03,102786.35,95729.22,89949.51,83534.23,79583.77,62052.02,75753.04,70047.58,68120.12,62796.3,76534.53,59612.63
1,22,f,hong kong,no,single,personal,no,no,unemployed,tertiary,unemployed,0.0,4000.0,2000.0,yes,yes,300.0,no,0.0,0.0,11276.11,16039.92,15113.91,20069.61,14215.69,15483.34,14501.67,15726.42,10741.14,12529.97,9718.47,11660.7,7488.56,12062.99,10739.29,15476.25,10541.0,15268.05,12319.65,17135.63,14093.85,18998.54,15863.61,20856.79,17628.95,22710.4,16328.32,21344.74,18092.5,18778.62,15654.69,20637.42,17420.55,18207.47,17498.22,22573.13,16218.5,21229.42,15143.54,16272.01,11177.61,15936.49,12954.67,17802.4,17052.64,17894.74,12475.79,14004.42,9363.54
2,24,f,hong kong,no,single,personal,part time,yes,retail,secondary,junior,11298.06,7456.7196,3355.52382,yes,yes,503.33,no,0.0,0.0,15833.11,24454.32,16476.37,25129.74,17016.71,25697.1,20746.33,29613.2,24466.63,33519.52,28177.63,30289.2,25108.83,34193.83,24267.98,26965.99,18485.71,22051.07,17282.61,25976.3,24329.19,33375.21,28040.54,37272.12,26730.61,29059.23,23940.36,32966.93,32018.88,33554.26,28210.64,30317.26,21166.72,30054.61,28815.33,30831.24,25623.77,34734.51,33963.22,35206.95,25078.48,27654.92,19036.85,22519.53,20526.74,23785.94,18930.73,27706.82,19078.37
3,23,f,china,no,single,personal,no,no,unemployed,secondary,unemployed,0.0,4000.0,800.0,no,yes,0.0,no,0.0,0.0,14764.79,15950.07,14392.57,19312.2,14809.76,19750.25,20845.28,21118.49,22350.34,22397.79,17278.23,18086.5,16422.18,21443.29,19611.13,20069.46,15415.57,20386.35,18607.03,23737.38,21790.51,27080.04,21024.03,26275.23,24201.47,29611.54,27370.96,26665.32,20692.26,20988.42,16150.74,21158.28,19340.37,19839.31,15231.45,16346.73,14769.39,15953.98,14396.28,15636.84,14095.0,18999.75,17289.76,18096.3,19025.93,19572.04,15017.63,19968.51,15334.81
4,24,m,other asian,yes,single,personal,no,no,unemployed,secondary,unemployed,0.0,4000.0,2000.0,no,yes,0.0,no,0.0,0.0,9448.34,14120.76,13332.84,14732.91,10186.33,14895.65,12250.87,13813.24,11222.58,15983.71,15382.08,20351.18,14680.94,19614.99,14091.99,15378.19,10702.55,15437.68,10750.14,15487.65,12813.27,14291.28,11676.72,16460.56,13737.53,18624.41,15793.19,20782.85,20661.13,25894.19,19115.35,19648.05,16765.65,17650.8,12520.64,17346.67,14579.34,19508.31,19259.14,24422.1,24664.31,30097.53,26692.65,32227.28,24181.82,29590.91,26211.36,25679.66,22495.68


In [198]:
teens_df.head()

Unnamed: 0,age,gender,nationality,expat,status,account tier,employment,joint,industry,education,job position,salary,monthly deposit avg,monthly expense avg,credit card,debit card,credit card expense,payme,payme deposit,payme expense,"feb 10, 2017","feb 25, 2017","mar 10, 2017","mar 25, 2017","apr 10, 2017","apr 25, 2017","may 10, 2017","may 25, 2017","jun 10, 2017","jun 25, 2017","jul 10, 2017","jul 25, 2017","aug 10, 2017","aug 25, 2017","sep 10, 2017","sep 25, 2017","oct 10, 2017","oct 25, 2017","nov 10, 2017","nov 25, 2017","dec 10, 2017","dec 25, 2017","jan 10, 2018","jan 25, 2018","feb 10, 2018","feb 25, 2018","mar 10, 2018","mar 25, 2018","apr 10, 2018","apr 25, 2018","may 10, 2018","may 25, 2018","jun 10, 2018","jun 25, 2018","jul 10, 2018","jul 25, 2018","aug 10, 2018","aug 25, 2018","sep 10, 2018","sep 25, 2018","oct 10, 2018","oct 25, 2018","nov 10, 2018","nov 25, 2018","dec 10, 2018","dec 25, 2018","jan 10, 2019","jan 25, 2019","feb 10, 2019"
0,16,m,hong kong,no,single,personal,no,yes,unemployed,secondary,unemployed,0.0,4000.0,2600.0,no,yes,0.0,yes,296.38,502.0,7106.07,11972.57,9314.1,14291.0,9510.65,14497.38,11965.15,13822.3,9112.25,14079.06,9330.5,14308.22,9525.29,14512.75,11981.29,13836.02,11270.72,13232.03,8610.53,10970.87,6688.54,11534.17,8853.78,11177.64,8479.42,13414.59,8765.7,13715.18,11143.84,13124.19,8518.86,13456.0,8800.9,13752.14,11182.65,16252.98,11178.33,13153.5,8543.78,13482.17,10899.18,15955.34,10925.34,12938.46,10328.28,12430.96,9795.41,11978.02,9319.82
1,17,f,china,no,single,personal,full time,yes,retail,secondary,entry-level,5157.22,7157.22,5725.78,no,yes,0.0,yes,271.17,416.0,9540.45,17817.28,12259.28,20672.05,15256.78,23819.43,15026.0,23577.11,14820.03,23360.84,14636.2,18754.9,10721.15,15427.11,7892.53,16086.97,8453.41,13499.53,6254.09,14366.6,8636.06,13654.78,7888.65,13019.48,7221.58,15382.47,7854.59,12990.53,7191.19,15350.56,7827.46,12967.47,7166.97,15325.13,7805.85,15995.95,8376.04,13433.77,6198.19,11582.59,5712.85,11170.05,5279.68,10801.86,3961.07,9681.04,3008.37,8871.25,2320.05
2,17,f,hong kong,no,single,personal,no,yes,unemployed,secondary,unemployed,0.0,4000.0,1600.0,no,yes,0.0,yes,269.6,607.0,7850.73,12726.35,8941.45,11229.39,9473.51,14430.27,12834.43,17959.23,16539.84,17688.02,16255.07,21550.9,16442.32,17605.13,13088.41,18225.91,16819.86,22143.93,16946.39,22276.79,21073.28,21541.45,20301.17,25799.31,20053.46,20674.6,19390.98,20111.49,15218.82,20462.84,15517.46,16819.0,12420.2,17524.29,16083.15,17299.84,12828.91,17953.44,13384.47,15005.96,10879.12,12876.41,11202.88,16246.1,14741.06,16159.06,14649.66,19865.22,15009.49
3,18,f,hong kong,no,single,personal,no,yes,unemployed,secondary,unemployed,0.0,4000.0,1600.0,yes,yes,560.0,yes,140.67,412.0,8723.34,10934.41,8780.53,10983.02,7149.37,11854.54,9746.67,14581.71,12610.2,17588.41,15767.23,20903.3,15581.6,20708.38,15415.92,20534.42,15268.06,20379.17,15136.09,16385.25,11741.26,13499.64,11474.02,13272.49,9095.42,11250.68,7376.88,12093.43,9997.5,14845.08,10432.12,15301.43,10820.02,15708.72,13793.56,15244.1,10771.28,12675.16,10608.32,15486.44,10977.27,12850.25,8736.51,13521.04,9306.68,11430.25,7529.51,12253.69,10165.77
4,15,f,hong kong,no,single,personal,no,yes,unemployed,secondary,unemployed,0.0,3000.0,1200.0,no,yes,0.0,no,0.0,0.0,7578.45,11107.37,8421.26,9708.07,7231.86,10743.45,8111.93,11667.53,8897.4,12492.27,9598.43,13228.35,12629.77,13285.3,12689.56,16474.04,16037.74,16182.08,15731.18,19667.74,15697.58,19632.46,15667.59,19600.97,19321.02,18972.87,15106.94,15390.9,14900.44,15215.37,11913.06,12676.1,12049.9,15802.4,15332.52,15582.64,15101.77,19006.86,18697.2,18442.62,14656.23,15007.8,11736.63,15473.46,12132.44,15889.06,15423.51,15659.98,15182.98


In [381]:
mill_df = pd.concat([teens_df, young_adults], ignore_index=True)

In [200]:
mill_df.to_csv('mill_df.csv', index=False)

### middle class

In [201]:
wc = pd.DataFrame(np.round(working_class), columns=['age'])

In [202]:
wc.shape

(2074628, 1)

In [203]:
wc['gender'] = pd.DataFrame(np.random.choice(['m','f'], size=2074628, p=[.46, .54]))

In [204]:
wc['nationality'] = pd.DataFrame(np.random.choice(['hong kong', 'china', 'other asian', 'white'], size=2074628, p=[.65,.2,.05,.1]))

In [206]:
wc['expat'] = wc['nationality'].apply(lambda x: 'no' if (x=='hong kong') | (x=='china') else 'no')

In [207]:
wc['status'] = pd.DataFrame(np.random.choice(['single', 'married','divorce', 'widower'], size=2074628, p=[.3,.50,.15,.05]))

In [208]:
def account(col):
    """
    middle age accounts
    """
    if col < 35:
        return np.random.choice(['personal', 'advance', 'premier'], p=[.6,.35,.05])    
    elif col > 50:
        return np.random.choice(['personal', 'advance', 'premier'], p=[.35,.45,.2])
    else:
        return np.random.choice(['personal', 'advance', 'premier'], p=[.5,.4,.1])
        

In [209]:
wc['account tier'] = wc['age'].apply(account)

In [210]:
def employment(col):
    """
    middle age employment
    """
    sex = col[0]
    marry = col[1]
    if sex == 'f':
        if marry == 'married':
            return np.random.choice(['full time', 'part time','no'], p=[.36,.2,.44])
        else:
            return np.random.choice(['full time', 'part time', 'no'], p=[.75,.15,.1])
    else:
        return np.random.choice(['full time', 'part time','no'], p=[.80,.15,.05])

In [211]:
wc['employment'] = wc[['gender','status']].apply(employment,axis=1)

In [212]:
wc['joint'] = wc['status'].apply(lambda x: 'yes' if x == 'married' else np.random.choice(['yes', 'no'], p=[.34,.66]))

In [213]:
wc['industry'] = wc['employment'].apply(lambda x: 'unemployed' if x == 'no' else np.random.choice(['retail', 'service', 'education','manufacturing', 'other', 'construction', 'transportation','import/export','finance', 'professional service','IT','real estate','social service'], p=[.08,.08,.04,.05,.04,.07,.06,.15,.09,.1,.04,.05,.15]))

In [214]:
def edu(col):
    """
    middle age education
    """
    if col == 'premier':
        return np.random.choice(['primary', 'secondary','tertiary'], p=[.05, .25, .7])
    elif col == 'advance':
        return np.random.choice(['primary', 'secondary', 'tertiary'], p=[.2, .45,.35])
    else:
        return np.random.choice(['primary', 'secondary', 'tertiary'], p=[.55, .30, .15])

In [215]:
wc['education'] = wc['account tier'].apply(edu)

In [216]:
def job_position(col):
    """
    middle age job position
    """
    emp = col[0]
    acct = col[1]
    if emp == 'no':
        return 'unemployed'
    else:
        if acct == 'premier':
            return np.random.choice(['manager', 'executive', 'senior','owner','other'], p=[.31,.13,.4,.1,.06])
        elif acct == 'advance':
            return np.random.choice(['manager', 'senior', 'associate','owner', 'other'], p=[.15,.45,.35,.025,.025])
        else:
            return np.random.choice(['associate', 'junior', 'senior','other'], p=[.6, .3,.05, .05])

In [217]:
wc['job position'] = wc[['employment', 'account tier']].apply(job_position, axis=1)

In [218]:
def salary(col):
    """
    middle age salary
    """
    emp = col[0]
    jp = col[1]
    if emp == 'no':
        return 0
    elif emp == 'part time':
        return np.round(np.random.choice([np.random.normal(8000, 500), np.random.normal(14000, 1000), np.random.normal(20000, 2000)], p=[.35, .5, .15]),2)
    else:
        if jp == 'executive':
            return np.round(np.random.normal(150000,25000),2)
        elif (jp == 'manager') | (jp == 'senior') | (jp == 'owner'):
            return np.round(np.random.choice([np.random.normal(80000, 10000), np.random.normal(65000, 5000), np.random.normal(45000,5000)], p=[.45,.40, .15]),2)
        else:
            return np.round(np.random.choice([np.random.normal(26800, 1000), np.random.normal(35000, 2500), np.random.normal(18000, 1000)], p=[.55,.35,.10]),2)

In [219]:
wc['salary'] = wc[['employment','job position']].apply(salary, axis=1)

In [62]:
wc.head()

Unnamed: 0,age,nationality,expat,status,account tier,gender,employment,joint,industry,education,job position,salary
0,42.0,white,yes,single,advance,m,full time,no,transportation,secondary,associate,31716.58
1,43.0,white,yes,married,personal,m,no,yes,unemployed,primary,unemployed,0.0
2,46.0,hong kong,no,single,personal,f,full time,no,professional service,primary,junior,26979.79
3,46.0,hong kong,no,married,personal,f,part time,yes,other,primary,associate,7324.66
4,45.0,china,no,divorce,personal,f,full time,no,IT,primary,associate,25689.13


In [220]:
def deposits(col):
    """
    middle age deposits
    """
    if col != 0:
        return np.round(col*np.random.choice([.5,.66,.75,.85,1], p=[.15,.25,.34,.25,.01]),2)
    else:
        return np.round(np.random.choice([np.random.normal(4000, 100), np.random.normal(7000,500), np.random.normal(10000, 1000)], p=[.2,.5,.3]),2)

In [221]:
wc['monthly deposit avg'] = wc['salary'].apply(deposits)

In [222]:
def expense(col):
    """
    middle age expense
    """
    sal = col[0]
    dep = col[1]
    if sal != 0:
        return np.round(dep* np.random.choice([.55,.45,.35,.2], p=[.2,.3,.15,.35]),2)
    else:
        return np.round(dep* np.random.choice([.4,.3,.25], p=[.5,.35,.15]),2)

In [223]:
wc['monthly expense avg'] = wc[['salary', 'monthly deposit avg']].apply(expense, axis=1)

In [224]:
wc['credit card'] = pd.DataFrame(np.random.choice(['yes','no'], size=2074628, p=[.98,.02]))

In [225]:
wc['debit card'] = pd.DataFrame(np.random.choice(['yes', 'no'], size=2074628, p=[.99,.01]))

In [226]:
def credit_expense(col):
    """
    middle age credit card expense
    """
    cc = col[0]
    exp = col[1]
    if cc == 'yes':
        return np.round(exp * np.random.choice([.60,.4,.25,.15], p=[.2,.4,.3,.1]),2)
    else:
        return np.round(exp * np.random.choice([.4,.35,.25,.15], p=[.3,.25,.25,.2]),2)

In [227]:
wc['credit card expense'] = wc[['credit card', 'monthly expense avg']].apply(credit_expense, axis=1)

In [228]:
wc['payme'] = wc['age'].apply(lambda x: np.random.choice(['yes','no'], p=[.7,.3]) if x > 35 else np.random.choice(['yes', 'no'], p=[.2,.8]))

In [229]:
wc['payme deposit'] = wc['payme'].apply(lambda x: np.round(np.random.normal(1000,300),2) if x == 'yes' else 0)

In [230]:
wc['payme expense'] = wc['payme'].apply(lambda x: np.round(np.random.normal(1500,500),2) if x =='yes' else 0)

In [231]:
def start(col):
    """
    middle age start month
    """
    acct = col[0]
    if acct == 'premier':
        return np.round(np.random.choice([np.random.normal(1500000,25000), np.random.normal(2000000, 100000)],p=[.44,.56]),2)
    elif acct == 'advance':
        return np.round(np.random.choice([np.random.normal(500000, 50000), np.random.normal(250000,10000), np.random.normal(700000, 50000)], p=[.4,.4, .2]),2)
    else:
        return np.round(np.random.normal(100000, 25000),2)

In [232]:
wc['feb 10, 2017'] = wc['account tier'].apply(start)

In [233]:
def sec_check(col):
    """
    middle age second month
    """
    acct = col[0]
    jp = col[1]
    mda = col[2]
    pd = col[3]
    cm = col[4]
    if acct == 'premier':
        if jp == 'executive':
            return np.round((mda+pd+cm) * np.random.choice([.85,.95,1.2],p=[.33,.5,.17]),2)
        elif (jp == 'manager') | (jp == 'senior'):
            return np.round((mda+pd+cm) * np.random.choice([.90, .95, 1.05], p = [.40,.35, .25 ]),2)
        else:
            return np.round(np.random.normal(mda+pd+cm, 10000),2)
    elif acct == 'advance':
        return np.round((mda+pd+cm)*np.random.choice([.75,.9,1.05,1.2], p=[.24, .33, .33,.1 ]),2)
    elif acct == 'personal':
        return np.round((mda+pd+cm)*np.random.choice([.65, .8, 1, 1.1, 1.15], p=[.25, .35, .2,.1,.1]),2)

In [234]:
wc['feb 25, 2017'] = wc[['account tier', 'job position', 'monthly deposit avg', 'payme deposit' ,'feb 10, 2017']].apply(sec_check, axis=1)

In [235]:
def ma_end_month(col):
    """
    middle age new month
    """
    acct = col[0]
    me = col[1]
    ce = col[2]
    pe = col[3]
    em = col[4]
    if acct == 'premier':
        return np.round((em-me-ce-pe)*np.random.choice([.70,.95,1.1,1.2], p=[.15,.25,.35,.25]),2)
    elif acct == 'advance':
        return np.round((em-me-ce-pe)* np.random.choice([.80,.9, 1, 1.05], p =[.25, .25, .15, .35 ]),2)
    else:
        return np.round((em-me-ce-pe)* np.random.choice([.60,.8,.95,1, 1.1], p=[.15, .25, .35, .2,.05]),2)

In [237]:
wc['mar 10, 2017'] = wc[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'feb 25, 2017']].apply(ma_end_month, axis=1)

In [238]:
wc['mar 25, 2017'] = wc[['account tier', 'job position', 'monthly deposit avg', 'payme deposit' ,'mar 10, 2017']].apply(sec_check, axis=1)

In [239]:
wc['apr 10, 2017'] = wc[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'mar 25, 2017']].apply(ma_end_month, axis=1)

In [240]:
wc['apr 25, 2017'] = wc[['account tier', 'job position', 'monthly deposit avg', 'payme deposit' ,'apr 10, 2017']].apply(sec_check, axis=1)

In [241]:
wc['may 10, 2017'] = wc[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'apr 25, 2017']].apply(ma_end_month, axis=1)

In [242]:
wc['may 25, 2017'] = wc[['account tier', 'job position', 'monthly deposit avg', 'payme deposit' ,'may 10, 2017']].apply(sec_check, axis=1)

In [243]:
wc['jun 10, 2017'] = wc[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'may 25, 2017']].apply(ma_end_month, axis=1)

In [244]:
wc['jun 25, 2017'] = wc[['account tier', 'job position', 'monthly deposit avg', 'payme deposit' ,'jun 10, 2017']].apply(sec_check, axis=1)

In [245]:
wc['jul 10, 2017'] = wc[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'jun 25, 2017']].apply(ma_end_month, axis=1)

In [246]:
wc['jul 25, 2017'] = wc[['account tier', 'job position', 'monthly deposit avg', 'payme deposit' ,'jul 10, 2017']].apply(sec_check, axis=1)

In [247]:
wc['aug 10, 2017'] = wc[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'jul 25, 2017']].apply(ma_end_month, axis=1)

In [248]:
wc['aug 25, 2017'] = wc[['account tier', 'job position', 'monthly deposit avg', 'payme deposit' ,'aug 10, 2017']].apply(sec_check, axis=1)

In [249]:
wc['sep 10, 2017'] = wc[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'aug 25, 2017']].apply(ma_end_month, axis=1)

In [250]:
wc['sep 25, 2017'] = wc[['account tier', 'job position', 'monthly deposit avg', 'payme deposit' ,'sep 10, 2017']].apply(sec_check, axis=1)

In [251]:
wc['oct 10, 2017'] = wc[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'sep 25, 2017']].apply(ma_end_month, axis=1)

In [252]:
wc['oct 25, 2017'] = wc[['account tier', 'job position', 'monthly deposit avg', 'payme deposit' ,'oct 10, 2017']].apply(sec_check, axis=1)

In [253]:
wc['nov 10, 2017'] = wc[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'oct 25, 2017']].apply(ma_end_month, axis=1)

In [254]:
wc['nov 25, 2017'] = wc[['account tier', 'job position', 'monthly deposit avg', 'payme deposit' ,'nov 10, 2017']].apply(sec_check, axis=1)

In [255]:
wc['dec 10, 2017'] = wc[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'nov 25, 2017']].apply(ma_end_month, axis=1)

In [256]:
wc['dec 25, 2017'] = wc[['account tier', 'job position', 'monthly deposit avg', 'payme deposit' ,'dec 10, 2017']].apply(sec_check, axis=1)

In [257]:
wc['jan 10, 2018'] = wc[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'dec 25, 2017']].apply(ma_end_month, axis=1)

In [258]:
wc['jan 25, 2018'] = wc[['account tier', 'job position', 'monthly deposit avg', 'payme deposit' ,'jan 10, 2018']].apply(sec_check, axis=1)

In [259]:
wc['feb 10, 2018'] = wc[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'jan 25, 2018']].apply(ma_end_month, axis=1)

In [260]:
wc['feb 25, 2018'] = wc[['account tier', 'job position', 'monthly deposit avg', 'payme deposit' ,'feb 10, 2018']].apply(sec_check, axis=1)

In [261]:
wc['mar 10, 2018'] = wc[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'feb 25, 2018']].apply(ma_end_month, axis=1)

In [262]:
wc['mar 25, 2018'] = wc[['account tier', 'job position', 'monthly deposit avg', 'payme deposit' ,'mar 10, 2018']].apply(sec_check, axis=1)

In [263]:
wc['apr 10, 2018'] = wc[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'mar 25, 2018']].apply(ma_end_month, axis=1)

In [264]:
wc['apr 25, 2018'] = wc[['account tier', 'job position', 'monthly deposit avg', 'payme deposit' ,'apr 10, 2018']].apply(sec_check, axis=1)

In [265]:
wc['may 10, 2018'] = wc[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'apr 25, 2018']].apply(ma_end_month, axis=1)

In [266]:
wc['may 25, 2018'] = wc[['account tier', 'job position', 'monthly deposit avg', 'payme deposit' ,'may 10, 2018']].apply(sec_check, axis=1)

In [267]:
wc['jun 10, 2018'] = wc[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'may 25, 2018']].apply(ma_end_month, axis=1)

In [268]:
wc['jun 25, 2018'] = wc[['account tier', 'job position', 'monthly deposit avg', 'payme deposit' ,'jun 10, 2018']].apply(sec_check, axis=1)

In [269]:
wc['jul 10, 2018'] = wc[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'jun 25, 2018']].apply(ma_end_month, axis=1)

In [270]:
wc['jul 25, 2018'] = wc[['account tier', 'job position', 'monthly deposit avg', 'payme deposit' ,'jul 10, 2018']].apply(sec_check, axis=1)

In [271]:
wc['aug 10, 2018'] = wc[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'jul 25, 2018']].apply(ma_end_month, axis=1)

In [272]:
wc['aug 25, 2018'] = wc[['account tier', 'job position', 'monthly deposit avg', 'payme deposit' ,'aug 10, 2018']].apply(sec_check, axis=1)

In [273]:
wc['sep 10, 2018'] = wc[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'aug 25, 2018']].apply(ma_end_month, axis=1)

In [274]:
wc['sep 25, 2018'] = wc[['account tier', 'job position', 'monthly deposit avg', 'payme deposit' ,'sep 10, 2018']].apply(sec_check, axis=1)

In [275]:
wc['oct 10, 2018'] = wc[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'sep 25, 2018']].apply(ma_end_month, axis=1)

In [276]:
wc['oct 25, 2018'] = wc[['account tier', 'job position', 'monthly deposit avg', 'payme deposit' ,'oct 10, 2018']].apply(sec_check, axis=1)

In [277]:
wc['nov 10, 2018'] = wc[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'oct 25, 2018']].apply(ma_end_month, axis=1)

In [278]:
wc['nov 25, 2018'] = wc[['account tier', 'job position', 'monthly deposit avg', 'payme deposit' ,'nov 10, 2018']].apply(sec_check, axis=1)

In [279]:
wc['dec 10, 2018'] = wc[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'nov 25, 2018']].apply(ma_end_month, axis=1)

In [280]:
wc['dec 25, 2018'] = wc[['account tier', 'job position', 'monthly deposit avg', 'payme deposit' ,'dec 10, 2018']].apply(sec_check, axis=1)

In [281]:
wc['jan 10, 2019'] = wc[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'dec 25, 2018']].apply(ma_end_month, axis=1)

In [282]:
wc['jan 25, 2019'] = wc[['account tier', 'job position', 'monthly deposit avg', 'payme deposit' ,'jan 10, 2019']].apply(sec_check, axis=1)

In [284]:
wc['feb 10, 2019'] = wc[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'jan 25, 2019']].apply(ma_end_month, axis=1)

In [287]:
wc.to_csv('working_class.csv', index=False)

In [285]:
wc.shape

(2074628, 69)

In [382]:
mill_wc_df = pd.concat([mill_df, wc],ignore_index=True)

In [301]:
mill_wc_df.to_csv('mill_wc_df.csv', index=False)

### Senior

In [290]:
len(srs)

452465

In [294]:
senior = pd.DataFrame(srs, columns=['age'])

In [295]:
senior['gender'] = pd.DataFrame(np.random.choice(['m','f'],size=451372, p=[.46,.54]))

In [296]:
senior['nationality'] = pd.DataFrame(np.random.choice(['hong kong', 'china', 'other asian', 'white'], size=451372, p=[.85,.10,.04,.01]))

In [297]:
senior['expat'] = senior['nationality'].apply(lambda x: 'no' if (x=='hong kong') | (x=='china') else 'yes')

In [298]:
senior['status'] = pd.DataFrame(np.random.choice(['single', 'married', 'divorce', 'widower'], size=451372, p=[.2,.6,.05,.15]))

In [299]:
senior['account tier'] = senior['status'].apply(lambda x: np.random.choice(['personal', 'advance', 'premier'], p=[.75,.20,.05]))

In [302]:
senior['employment'] = senior['age'].apply(lambda x: np.random.choice(['full time', 'part time', 'no'], p=[.3,.5,.2]) if x < 75 else (np.random.choice(['full time', 'part time', 'no'], p =[.05, .15, .80]) if x < 85 else 'no'))

In [303]:
senior['joint'] = senior['status'].apply(lambda x: np.random.choice(['yes', 'no'], p=[.8,.2]) if x == 'yes' else np.random.choice(['yes','no'], p=[.65, .35]))

In [304]:
senior['industry'] = senior['employment'].apply(lambda x: 'retired' if x == 'no' else np.random.choice(['other', 'service'], p=[.15, .85]))

In [305]:
senior['education'] = pd.DataFrame(np.random.choice(['primary', 'secondary','tertiary'], size= 452287, p=[.65,.25,.10]))

In [309]:
senior['job position'] = senior['employment'].apply(lambda x: 'retired' if x =='no' else np.random.choice(['other', 'associate'], p=[.35,.65]))

In [314]:
def sr_salary(col):
    """
    senior salary
    """
    if col == 'unemployed':
        return 0
    elif col == 'part time':
        return np.round(np.random.normal(5000, 250),2)
    else:
        return np.round(np.random.normal(14000, 1000),2)

In [315]:
senior['salary'] = senior['employment'].apply(sr_salary)

In [316]:
def sr_deposit(col):
    """
    senior deposit
    """
    if col != 0:
        return col * np.random.choice([.7, .8, .95, 1.05, 1.10], p=[.4,.25,.2,.05,.1])
    else:
        return np.random.choice([2500, 5000, 10000], p=[.45, .3, .25])

In [317]:
senior['monthly deposit avg'] = senior['salary'].apply(sr_deposit)

In [318]:
def sr_expense(col):
    """
    senior expense
    """
    sal = col[0]
    mda = col[1]
    if sal != 0:
        return np.round(mda * np.random.choice([.45,.35,.2,.1],p=[.33,.17,.25,.25]),2)
    else:
        return np.round(mda * np.random.choice([.4,.3,.15], p=[.55,.35,.1]))

In [319]:
senior['monthly expense avg'] = senior[['salary', 'monthly deposit avg']].apply(sr_expense, axis=1)

In [320]:
senior['credit card'] = pd.DataFrame(np.random.choice(['yes','no'], size=452287, p=[.8,.2]))

In [321]:
senior['debit card'] = pd.DataFrame(np.random.choice(['yes', 'no'], size=452287, p=[.35,.65]))

In [322]:
def sr_ccexp(col):
    """
    senior cc expense
    """
    cc = col[0]
    exp = col[1]
    if cc == 'yes':
        return exp*np.random.choice([.3, .15,.05], p=[.25,.55,.2])
    else:
        return 0

In [323]:
senior['credit card expense'] = senior[['credit card', 'monthly expense avg']].apply(sr_ccexp, axis=1)

In [324]:
senior['payme'] = pd.DataFrame(np.random.choice(['yes', 'no'],size=452287, p=[.02,.98]))

In [325]:
senior['payme deposit'] = senior['payme'].apply(lambda x: np.round(np.random.normal(600,100),2) if x=='yes' else 0)

In [327]:
senior['payme expense'] = senior['payme'].apply(lambda x: np.round(np.random.normal(1000, 100),2) if x=='yes' else 0)

In [328]:
def sr_start(col):
    """
    senior start month
    """
    acct = col[0]
    if acct == 'premier':
        return np.round(np.random.choice([np.random.normal(1200000,15000), np.random.normal(1500000, 50000)],p=[.44,.56]),2)
    elif acct == 'advance':
        return np.round(np.random.choice([np.random.normal(400000, 50000), np.random.normal(250000,10000), np.random.normal(800000, 100000)], p=[.5,.2,.3]),2)
    else:
        return np.round(np.random.choice([np.random.normal(50000, 25000), np.random.normal(100000, 25000), np.random.normal(20000, 500)],p=[.4,.2,.4]),2)

In [329]:
senior['feb 10, 2017'] = senior['account tier'].apply(sr_start)

In [330]:
def sr_sec_check(col):
    """
    senior second month
    """
    acct = col[0]
    emp = col[1]
    mda = col[2]
    pd = col[3]
    cm = col[4]
    if acct == 'premier':
        if emp == 'yes':
            return np.round((mda+pd+cm) * np.random.choice([1.05,.95,1.2],p=[.33,.5,.17]),2)
        else:
            return np.round(np.random.normal(mda+pd+cm, 10000),2)
    elif acct == 'advance':
        return np.round((mda+pd+cm)*np.random.choice([.75,.9,1.05,1.2], p=[.24, .33, .33,.1 ]),2)
    else:
        return np.round((mda+pd+cm)*np.random.choice([.8, .95, 1], p=[.25, .45, .3]),2)

In [331]:
senior['feb 25, 2017'] = senior[['account tier', 'employment','monthly deposit avg', 'payme deposit', 'feb 10, 2017']].apply(sr_sec_check, axis=1)

In [332]:
def sr_end_month(col):
    """
    sr new month
    """
    acct = col[0]
    me = col[1]
    ce = col[2]
    pe = col[3]
    em = col[4]
    if acct == 'premier':
        return np.round((em-me-ce-pe)*np.random.choice([.75,.85,.95,1.1,], p=[.15,.25,.35,.25]),2)
    elif acct == 'advance':
        return np.round((em-me-ce-pe)* np.random.choice([.80,.95, 1, 1.05], p =[.25, .25, .15, .35 ]),2)
    else:
        return np.round((em-me-ce-pe)* np.random.choice([.80,.85,.95,1], p=[.15, .25, .35, .25]),2)

In [333]:
senior['mar 10, 2017'] = senior[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'feb 25, 2017']].apply(sr_end_month, axis=1)

In [334]:
senior['mar 25, 2017'] = senior[['account tier', 'employment','monthly deposit avg', 'payme deposit', 'mar 10, 2017']].apply(sr_sec_check, axis=1)

In [335]:
senior['apr 10, 2017'] = senior[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'mar 25, 2017']].apply(sr_end_month, axis=1)

In [336]:
senior['apr 25, 2017'] = senior[['account tier', 'employment','monthly deposit avg', 'payme deposit', 'apr 10, 2017']].apply(sr_sec_check, axis=1)

In [337]:
senior['may 10, 2017'] = senior[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'apr 25, 2017']].apply(sr_end_month, axis=1)

In [338]:
senior['may 25, 2017'] = senior[['account tier', 'employment','monthly deposit avg', 'payme deposit', 'may 10, 2017']].apply(sr_sec_check, axis=1)

In [339]:
senior['jun 10, 2017'] = senior[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'may 25, 2017']].apply(sr_end_month, axis=1)

In [340]:
senior['jun 25, 2017'] = senior[['account tier', 'employment','monthly deposit avg', 'payme deposit', 'jun 10, 2017']].apply(sr_sec_check, axis=1)

In [341]:
senior['jul 10, 2017'] = senior[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'jun 25, 2017']].apply(sr_end_month, axis=1)

In [342]:
senior['jul 25, 2017'] = senior[['account tier', 'employment','monthly deposit avg', 'payme deposit', 'jul 10, 2017']].apply(sr_sec_check, axis=1)

In [343]:
senior['aug 10, 2017'] = senior[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'jul 25, 2017']].apply(sr_end_month, axis=1)

In [344]:
senior['aug 25, 2017'] = senior[['account tier', 'employment','monthly deposit avg', 'payme deposit', 'aug 10, 2017']].apply(sr_sec_check, axis=1)

In [345]:
senior['sep 10, 2017'] = senior[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'aug 25, 2017']].apply(sr_end_month, axis=1)

In [346]:
senior['sep 25, 2017'] = senior[['account tier', 'employment','monthly deposit avg', 'payme deposit', 'sep 10, 2017']].apply(sr_sec_check, axis=1)

In [347]:
senior['oct 10, 2017'] = senior[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'sep 25, 2017']].apply(sr_end_month, axis=1)

In [348]:
senior['oct 25, 2017'] = senior[['account tier', 'employment','monthly deposit avg', 'payme deposit', 'oct 10, 2017']].apply(sr_sec_check, axis=1)

In [349]:
senior['nov 10, 2017'] = senior[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'oct 25, 2017']].apply(sr_end_month, axis=1)

In [350]:
senior['nov 25, 2017'] = senior[['account tier', 'employment','monthly deposit avg', 'payme deposit', 'nov 10, 2017']].apply(sr_sec_check, axis=1)

In [351]:
senior['dec 10, 2017'] = senior[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'nov 25, 2017']].apply(sr_end_month, axis=1)

In [352]:
senior['dec 25, 2017'] = senior[['account tier', 'employment','monthly deposit avg', 'payme deposit', 'dec 10, 2017']].apply(sr_sec_check, axis=1)

In [353]:
senior['jan 10, 2018'] = senior[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'dec 25, 2017']].apply(sr_end_month, axis=1)

In [354]:
senior['jan 25, 2018'] = senior[['account tier', 'employment','monthly deposit avg', 'payme deposit', 'jan 10, 2018']].apply(sr_sec_check, axis=1)

In [355]:
senior['feb 10, 2018'] = senior[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'jan 25, 2018']].apply(sr_end_month, axis=1)

In [356]:
senior['feb 25, 2018'] = senior[['account tier', 'employment','monthly deposit avg', 'payme deposit', 'feb 10, 2018']].apply(sr_sec_check, axis=1)

In [357]:
senior['mar 10, 2018'] = senior[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'feb 25, 2018']].apply(sr_end_month, axis=1)

In [358]:
senior['mar 25, 2018'] = senior[['account tier', 'employment','monthly deposit avg', 'payme deposit', 'mar 10, 2018']].apply(sr_sec_check, axis=1)

In [359]:
senior['apr 10, 2018'] = senior[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'mar 25, 2018']].apply(sr_end_month, axis=1)

In [360]:
senior['apr 25, 2018'] = senior[['account tier', 'employment','monthly deposit avg', 'payme deposit', 'apr 10, 2018']].apply(sr_sec_check, axis=1)

In [361]:
senior['may 10, 2018'] = senior[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'apr 25, 2018']].apply(sr_end_month, axis=1)

In [362]:
senior['may 25, 2018'] = senior[['account tier', 'employment','monthly deposit avg', 'payme deposit', 'may 10, 2018']].apply(sr_sec_check, axis=1)

In [363]:
senior['jun 10, 2018'] = senior[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'may 25, 2018']].apply(sr_end_month, axis=1)

In [364]:
senior['jun 25, 2018'] = senior[['account tier', 'employment','monthly deposit avg', 'payme deposit', 'jun 10, 2018']].apply(sr_sec_check, axis=1)

In [365]:
senior['jul 10, 2018'] = senior[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'jun 25, 2018']].apply(sr_end_month, axis=1)

In [366]:
senior['jul 25, 2018'] = senior[['account tier', 'employment','monthly deposit avg', 'payme deposit', 'jul 10, 2018']].apply(sr_sec_check, axis=1)

In [367]:
senior['aug 10, 2018'] = senior[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'jul 25, 2018']].apply(sr_end_month, axis=1)

In [368]:
senior['aug 25, 2018'] = senior[['account tier', 'employment','monthly deposit avg', 'payme deposit', 'aug 10, 2018']].apply(sr_sec_check, axis=1)

In [369]:
senior['sep 10, 2018'] = senior[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'aug 25, 2018']].apply(sr_end_month, axis=1)

In [370]:
senior['sep 25, 2018'] = senior[['account tier', 'employment','monthly deposit avg', 'payme deposit', 'sep 10, 2018']].apply(sr_sec_check, axis=1)

In [371]:
senior['oct 10, 2018'] = senior[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'sep 25, 2018']].apply(sr_end_month, axis=1)

In [372]:
senior['oct 25, 2018'] = senior[['account tier', 'employment','monthly deposit avg', 'payme deposit', 'oct 10, 2018']].apply(sr_sec_check, axis=1)

In [373]:
senior['nov 10, 2018'] = senior[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'oct 25, 2018']].apply(sr_end_month, axis=1)

In [374]:
senior['nov 25, 2018'] = senior[['account tier', 'employment','monthly deposit avg', 'payme deposit', 'nov 10, 2018']].apply(sr_sec_check, axis=1)

In [375]:
senior['dec 10, 2018'] = senior[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'nov 25, 2018']].apply(sr_end_month, axis=1)

In [376]:
senior['dec 25, 2018'] = senior[['account tier', 'employment','monthly deposit avg', 'payme deposit', 'dec 10, 2018']].apply(sr_sec_check, axis=1)

In [377]:
senior['jan 10, 2019'] = senior[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'dec 25, 2018']].apply(sr_end_month, axis=1)

In [378]:
senior['jan 25, 2019'] = senior[['account tier', 'employment','monthly deposit avg', 'payme deposit', 'jan 10, 2019']].apply(sr_sec_check, axis=1)

In [379]:
senior['feb 10, 2019'] = senior[['account tier', 'monthly expense avg', 'credit card expense', 'payme expense', 'jan 25, 2019']].apply(sr_end_month, axis=1)

In [380]:
senior.shape

(452465, 69)

In [393]:
df = pd.concat([senior, mill_wc_df],ignore_index=True, sort=False)

In [396]:
df_bank = df.reset_index()

In [400]:
df = df_bank.drop('index', axis=1)

In [406]:
df.to_csv('monthly_bank.csv', index=False)