### Setting up the Lending Club time comparisons

First: share living paycheck to paycheck by time

**Importing libraries, loading csv**

In [1]:
import os
import pandas as pd
from datetime import datetime

In [2]:
dir_path = 'C:/Users/marco/Dropbox (MPD)/Analytics Argentina/Billable products/Surveys/Lending Club/All RDs'

# Change the working directory to the specified path
os.chdir(dir_path)

In [3]:
# Needs to be updated periodically

current_month = "April, 2023"
current_order = ['November 2021','December 2021','January 2022','February 2022', 'March 2022','April 2022',
                 'May 2022', "June 2022", "July 2022", "August 2022", "September 2022", "October 2022",
                 "November 2022", "December 2022", "January 2023",'February 2023',"March, 2023","April, 2023"]

In [4]:
#Setting up dataframe + codebook

df=pd.read_csv('LC18.csv', low_memory=False) #harcoded reference
df=df.drop(0) #double header, will create codebook but for now im just keeping the code and not the description
df=df[df["StartDate"].notnull()] # it imported some whitenoise, as all responses must have a startdate, this solves the issue
df=df[df["Status"]=="IP Address"] # No test responses considered
print(f"Load successful. {df.shape[0]} rows and {df.shape[1]} columns loaded into dataframe")

codebook=pd.read_csv('LC18.csv', low_memory=False)
codebook=codebook.loc[0].to_dict()

Load successful. 3652 rows and 277 columns loaded into dataframe


**Pre processing**

In [5]:
# Need to format dates because they got imported as numbers

date_columns=['StartDate','EndDate']
for col in date_columns:
    df[col] = df[col].astype(float)
    df[col] = df[col].astype(int)
    df[col]= df[col]+ 693594 # Offset for converting to a valid date in the Gregorian calendar
    df[col] = df[col].apply(lambda x: datetime.fromordinal(x))
    df[col] = df[col].apply(lambda x: x.strftime('%Y-%m-%d'))
    
print('Check dates: look ok?')
df.StartDate.unique()    

array(['2023-04-03', '2023-04-04', '2023-04-05', '2023-04-06',
       '2023-04-07', '2023-04-08', '2023-04-09', '2023-04-10',
       '2023-04-11', '2023-04-12', '2023-04-13', '2023-04-14',
       '2023-04-15', '2023-04-16', '2023-04-17'], dtype=object)

In [6]:
# Need to format weight as a float

df.weight=df.weight.astype(float)
print(df.weight.sum())

260086951.46169


**Share of consumers living paycheck to paycheck**

In [7]:
word_to_find = 'financial lifestyle'

for key, value in codebook.items():
    if word_to_find in value.lower():
        print(f"Word '{word_to_find}' found in key: {key}")

Word 'financial lifestyle' found in key: D15


In [8]:
df.D15.unique()

array(['I live paycheck to paycheck and have issues paying my bills each month.',
       'I live paycheck to paycheck but am comfortably paying my bills each month.',
       'I do not live paycheck to paycheck and have more than enough earnings to cover my bills each month.'],
      dtype=object)

In [9]:
#Two mappings: one for charts (Eventually) one for acessing values easily and shorthand acess

p2p_mapping = {'I live paycheck to paycheck and have issues paying my bills each month.': 'Paycheck to paycheck and struggling',
           'I live paycheck to paycheck but am comfortably paying my bills each month.': 'Paycheck to paycheck but not struggling',
           'I do not live paycheck to paycheck and have more than enough earnings to cover my bills each month.': 'Not paycheck to paycheck'}

df.D15=df.D15.map(p2p_mapping)

p2p_short = {'Paycheck to paycheck and struggling':'Struggle',
           'Paycheck to paycheck but not struggling': 'P2P',
           'Not paycheck to paycheck':'NotP2P'}

df['D15_short']=df.D15.map(p2p_short)

In [10]:
df.D15.unique()

array(['Paycheck to paycheck and struggling',
       'Paycheck to paycheck but not struggling',
       'Not paycheck to paycheck'], dtype=object)

In [11]:
df.D15_short.unique()

array(['Struggle', 'P2P', 'NotP2P'], dtype=object)

In [19]:
# summing weights by group, then dividing by total to get share living in select lifestyles
# storing results in dataframe, eventually i will be able to loop and get all of them

p2p_groups=df.groupby('D15')

p2p_weight_subtotals = p2p_groups['weight'].sum()
total_weight = p2p_weight_subtotals.sum()
weight_shares = p2p_weight_subtotals / total_weight

p2p_shares = pd.DataFrame({'April 2023': round(weight_shares*100,2)}) #Hard coded