In [227]:
import pandas as pd
import matplotlib as plt
import bank_account
from random import randrange

# Generating Dummy Accounts 

This is to create a dummy dataset to draw data from.

In [228]:
def create_accounts (num_accounts):
    """
    Method to create a specified number of accounts and return them as a dict.
    :param num_accounts: number of accounts to create
    :return: dict containing the generated accounts
    """

    all_accounts = {}

    for i in range (num_accounts):
        # Generate accounts with accounts with random ranges of values
        created_account = {}

        created_account["ID"] = i
        created_account["Profile"] = "Young Professional"
        created_account["Balance"] = randrange(10000, 80000)
        created_account["Income"] = randrange(4000, 8000)
        created_account["Housing"] = randrange(800, 1200)
        created_account["Groceries"] = randrange(200, 600)
        created_account["Transportation"] = randrange(150, 350)
        created_account["Medical"] = randrange(100, 300)
        created_account["Entertainment"] = randrange(100, 300)
        created_account["Shopping"] = randrange(100, 300)
        created_account["Dining"] = randrange(200, 400)
        created_account["Total Expenditures"] = created_account["Housing"] \
                                                + created_account["Groceries"] + created_account["Transportation"]\
                                                + created_account["Medical"] + created_account["Entertainment"]\
                                                + created_account["Shopping"] + created_account["Dining"]

        all_accounts[str(i)] = created_account # Increment ID

    return all_accounts

In [229]:
data = create_accounts(10000)

In [230]:
df_numbers = pd.DataFrame.from_dict(data, orient='index')

df_numbers

Unnamed: 0,Profile,Transportation,Total Expenditures,Entertainment,Medical,Housing,Dining,Groceries,Income,Shopping,Balance,ID
0,Young Professional,233,2511,146,256,1035,250,355,5099,236,20714,0
1,Young Professional,173,2373,132,262,994,216,316,5614,280,54353,1
10,Young Professional,273,2331,223,105,886,218,356,6972,270,68238,10
100,Young Professional,245,2418,190,277,845,389,200,5919,272,53617,100
1000,Young Professional,337,2445,133,100,1056,312,274,6702,233,26165,1000
1001,Young Professional,282,2423,104,226,837,347,422,6542,205,36113,1001
1002,Young Professional,189,2640,218,296,1153,252,339,7775,193,21393,1002
1003,Young Professional,342,2693,265,163,1159,260,287,6839,217,28779,1003
1004,Young Professional,290,2635,283,298,1061,207,303,6162,193,56748,1004
1005,Young Professional,214,2541,262,108,956,294,438,4834,269,41693,1005


In [232]:
df_proportions = df_numbers
for column in df_proportions:
    if column not in ["Total Expenditures", "ID", "Balance", "Income", "Profile"]:
        df_proportions[column] = df_proportions[column]/df_proportions["Total Expenditures"]


In [233]:
df_proportions

Unnamed: 0,Profile,Transportation,Total Expenditures,Entertainment,Medical,Housing,Dining,Groceries,Income,Shopping,Balance,ID
0,Young Professional,0.074233,2511,0.011629,0.091756,0.288530,0.029869,0.113102,5099,0.037595,20714,0
1,Young Professional,0.058323,2373,0.011125,0.099368,0.293215,0.027307,0.106532,5614,0.047198,54353,1
10,Young Professional,0.093694,2331,0.019133,0.040541,0.266066,0.028057,0.122179,6972,0.046332,68238,10
100,Young Professional,0.081059,2418,0.015715,0.103102,0.244624,0.048263,0.066170,5919,0.044996,53617,100
1000,Young Professional,0.110266,2445,0.010879,0.036810,0.302331,0.038282,0.089652,6702,0.038119,26165,1000
1001,Young Professional,0.093108,2423,0.008584,0.083946,0.241808,0.042963,0.139331,6542,0.033842,36113,1001
1002,Young Professional,0.057273,2640,0.016515,0.100909,0.305720,0.028636,0.102727,7775,0.029242,21393,1002
1003,Young Professional,0.101597,2693,0.019681,0.054475,0.301263,0.028964,0.085258,6839,0.032232,28779,1003
1004,Young Professional,0.088046,2635,0.021480,0.101784,0.281860,0.023567,0.091992,6162,0.029298,56748,1004
1005,Young Professional,0.067375,2541,0.020622,0.038253,0.263361,0.034711,0.137898,4834,0.042346,41693,1005


# The Big Question: Who is a good spender?
This question will decide who we are comparing our users to

For now: Whoever reduces their "wants" to a threshold proportion of their budget.
For the pur

Somewhat arbitrary assumptions for the purpose of this project: 

    -"Wants" categories: Entertainment, Dining, Shopping.
    - Threshold proportion: 0.07
    
We understand this is a HUGE oversimplification. This is only 
Ideally, we won't be the ones answering this question, it will be a trained machine learning model.


In [269]:
threshold = 0.07

good_spenders = {}

for index, row in df_proportions.iterrows():
    if (row["Entertainment"] + row["Dining"] + row["Shopping"]) <= threshold:
        
        new_good_spender = {}
        for column in df_proportions:
            new_good_spender[column] = row[column]
            good_spenders[row["ID"]] = new_good_spender
    
            
            
df_goodSpenders = pd.DataFrame.from_dict(good_spenders, orient='index')


df_goodSpenders

Unnamed: 0,Profile,Transportation,Total Expenditures,Entertainment,Medical,ID,Dining,Groceries,Income,Shopping,Balance,Housing
4,Young Professional,0.092395,2459,0.017080,0.039894,4,0.028304,0.181537,6262,0.019032,75847,0.270150
21,Young Professional,0.064223,2728,0.011804,0.089406,21,0.022434,0.159531,7536,0.026833,40255,0.294062
29,Young Professional,0.068580,2578,0.019240,0.096703,29,0.029907,0.159814,6826,0.019240,17513,0.254151
30,Young Professional,0.098840,2501,0.012395,0.043543,30,0.036226,0.182647,4444,0.015994,23462,0.263934
34,Young Professional,0.085807,2163,0.009894,0.087795,34,0.031900,0.110957,6882,0.020342,61379,0.314887
42,Young Professional,0.058123,2739,0.016356,0.079518,42,0.023768,0.151588,4848,0.024827,23578,0.298503
49,Young Professional,0.069830,2234,0.019606,0.061235,49,0.028335,0.147538,6861,0.020591,78807,0.291406
50,Young Professional,0.078889,2880,0.015764,0.073125,50,0.023854,0.151111,6388,0.024722,31196,0.287778
58,Young Professional,0.078418,2326,0.011608,0.040628,58,0.026956,0.119003,5601,0.020980,56339,0.355417
70,Young Professional,0.079380,2711,0.011361,0.043821,70,0.034194,0.144006,7849,0.024345,31643,0.308300


In [259]:
# Number of good spenders
print("Number of good spenders = " + str(len(good_spenders)))

Number of good spenders = 1334


In [270]:
print(df_goodSpenders.describe())

       Transportation  Total Expenditures  Entertainment      Medical  \
count     1334.000000         1334.000000    1334.000000  1334.000000   
mean         0.082278         2549.536732       0.013821     0.075419   
std          0.017503          190.534664       0.004054     0.019632   
min          0.044037         1955.000000       0.007109     0.033482   
25%          0.068605         2426.250000       0.010280     0.059852   
50%          0.084202         2561.000000       0.013165     0.077900   
75%          0.095741         2683.750000       0.016872     0.091445   
max          0.129646         3062.000000       0.024386     0.123446   

                ID       Dining    Groceries       Income     Shopping  \
count  1334.000000  1334.000000  1334.000000  1334.000000  1334.000000   
mean   4937.572714     0.029720     0.141563  6027.530735     0.021556   
std    2932.126471     0.004458     0.029121  1143.752994     0.004485   
min       4.000000     0.020654     0.063174  

In [278]:
avg_good_spender_ranges = {}

for column in df_avg_goodSpender:
    if column not in ["Total Expenditures", "ID", "Balance", "Income", "Profile"]:
        avg_good_spender_ranges[column] = (df_avg_goodSpender[column].mean() - 0.05*df_avg_goodSpender[column].mean(), df_avg_goodSpender[column].mean() + 0.05*df_avg_goodSpender[column].mean())
        
print(avg_good_spender_ranges)

{'Shopping': (0.02047837897267619, 0.022633997811905265), 'Entertainment': (0.01313011261146118, 0.014512229728457095), 'Medical': (0.07164815017696778, 0.07919006072191175), 'Dining': (0.028233703694164926, 0.03120567250407702), 'Groceries': (0.13448442808255912, 0.1486406836701969), 'Transportation': (0.07816405118523141, 0.08639184604683471), 'Housing': (0.2755350423188039, 0.30453873098394113)}
