In [10]:
import pandas as pd
import numpy as np
from pandas import Timestamp
import datetime
import math

In [12]:
def fix_date(x):
    if math.isnan(x.year):
        return
        
    if x.year > 2000:
        year = x.year - 100

    else:
        year = x.year

    return datetime.date(int(year),x.month,x.day)

In [11]:
clientdf = pd.read_json('original_data/Clients.json')
assistancedf = pd.read_json('original_data/Assistances.json')

clientdf = (clientdf
                .drop(['LastName, FirstName MI', '#inFamily'], axis=1)
                .assign(Gender=np.nan)
                .assign(Race=np.nan)
                .assign(Banned=np.nan)
                .assign(Note=np.nan)
)


result = pd.merge(clientdf, assistancedf, on='ClientID')
result = (result
            .set_index(['ClientID', 'AssistanceID'])
            .assign(BoyAge=lambda df: df['BoyAge'].astype(str))
            .assign(GirlAge=lambda df: df['GirlAge'].astype(str))
        )
result[['BoyAge', 'GirlAge']] = result[['BoyAge', 'GirlAge']].astype(str)

In [5]:
clientset = set(clientdf['ClientID'])
assistanceset = set(assistancedf['ClientID'])
missing_assistances = assistancedf.loc[assistancedf['ClientID'].isin(assistanceset.difference(clientset))]
missing_clients = clientdf.loc[clientdf['ClientID'].isin(clientset.difference(assistanceset))]

In [14]:
familydf = result.groupby('ClientID').agg(
    BoyAge = pd.NamedAgg(column='BoyAge', aggfunc=lambda x: max(x, key=len)),
    GirlAge = pd.NamedAgg(column='GirlAge', aggfunc=lambda x: max(x, key=len))
)
familydf = (familydf
                .loc[(familydf['BoyAge'] != 'nan') | (familydf['GirlAge'] != 'nan')]
                .assign(NumBoys=lambda df: df['BoyAge'].str.split('-|,').str.len())
                .assign(NumGirls=lambda df: df['GirlAge'].str.split('-|,').str.len())
)
familydf.loc[familydf['BoyAge'] == 'nan', 'NumBoys'] = 0
familydf.loc[familydf['GirlAge'] == 'nan', 'NumGirls'] = 0
familydf['NumKids'] = familydf['NumBoys'] + familydf['NumGirls']

familydf = familydf[['NumKids']]
#familydf.reset_index().to_json('Family.json', orient='records')

In [7]:
notemap = {1135: 'STOLE EILEENS PURSE IN THE CHILDRENS ROOM - BANNED FROM FRANCES HOUSE - PER K.',
  44311: 'emergency banned',
  37842: 'Banned',
  52332: 'BANNED\r\nLaura Young',
  3883: 'BANNED!!!!\r\nHAD TO CALL POLICE ON HER',
  3141: 'Banned until further notice!!!!!!!!!! IF REFUSES TO LEAVE.:CALL 911',
  22413: 'new client\r\nalso uses randy johnson and birthdate of 4-9-63 banned not elibile to return until dec 08',
  22431: 'AGGRESSIVE AND THREATENING - PERMANENTLY BANNED - PER STEPHANIE',
  4959: 'NASTY BELLIGERANT;STARTED FIGHT AND WOULD NOT LEAVE POLICE CAME: BANNED FOR A',
  13608: 'MUST HAVE ID NO EXCEPTIONS!! goes by Gerald Threet\r\n& F Phillip Threet same birthdate check by bday came in may and july under different names! *BANNED FOR 6 MONTHS, SEE DIRECTOR IF HE GIVES ANYONE A BAD TIME.',
  377: 'CANNOT COME IN IF SMELLS OF ALCOHOL!!', 
  20824: 'The police are looking for this guy.  Give him what he wants and notify Alan or Stepahnie.',
  20541: 'also uses last name raymundo (raimundo)'
}

bannedmap = {1135: 1,
  44311: 1,
  37842: 1,
  52332: 1,
  3883: 1,
  3141: 1,
  22431: 1,
  4959: 1
}

clientdf = result.reset_index().groupby('ClientID').agg(
                AssistanceID = pd.NamedAgg(column='AssistanceID', aggfunc=max)
)

clientdf = (clientdf
                .reset_index()
                .set_index(['ClientID', 'AssistanceID'])
                .join(result)
                .reset_index()
                .drop(columns=['MenClothes', 'MQuant', 'WomenClothes', 
                'WQuant', 'ChildrenClothes', 'CQuant','Soap', 'ShampooConditioner', 
                'Lotion', 'Razor', 'Toothpaste', 'Toothbrush', 'Other', 'BoyAge', 
                'GirlAge', 'Furniture', 'Household', 'Director', 'New', 'Date'])
                .rename(columns={'AssistanceID':'LastAssistanceID'})
                .assign(Birthday=lambda df: pd.to_datetime(df['Birthday'], errors='coerce'))
                .fillna(value={'Zip Code': 0})
)

clientdf = (clientdf
                .set_index('ClientID')
                .join(familydf[['NumKids']])
                .reset_index())

clientdf['Note'] = clientdf.ClientID.map(notemap)
clientdf['Banned'] = clientdf.ClientID.map(bannedmap)

clientdf['Birthday'] = clientdf['Birthday'].apply(fix_date)

In [18]:
transaction = assistancedf
transaction = (transaction
                .drop(columns=['BoyAge', 'GirlAge', 'New', 'Date', 'ClientID'])
                .fillna('')
                .assign(Other=lambda df: df['Other'] + df['Furniture'] + df['Household'])
                .drop(columns=['Furniture', 'Household'])
)
transaction = pd.melt(transaction, id_vars=['AssistanceID'], value_vars=['MQuant', 'WQuant', 'CQuant', 'Soap',
       'ShampooConditioner', 'Lotion', 'Razor', 'Toothpaste', 'Toothbrush',
       'Other', 'Director'])
transaction = (transaction
                .loc[(transaction['value'] != 0.0) & (transaction['value'].notna()) & (transaction['value'] != '')]
                .assign(note=lambda df: df['value'])
                .reset_index(drop=True)
                .assign(RequestQuantity=1)
                .rename(columns={'variable':'Item', 'value':'ReceivedQuantity', 'note':'Note'})
)
transaction.loc[transaction['Item'].isin({'MQuant', 'WQuant', 'CQuant', 'Soap',
       'ShampooConditioner', 'Lotion', 'Razor', 'Toothpaste', 'Toothbrush'}), 'Note'] = ''
transaction.loc[transaction['Item'].isin({'MQuant', 'WQuant', 'CQuant'}), 'RequestQuantity'] = 15
transaction.loc[transaction['Item'].isin({'Other', 'Director'}), 'ReceivedQuantity'] = 1

requests = transaction[['AssistanceID', 'Item', 'RequestQuantity', 'Note']]
receipts = transaction[['AssistanceID', 'Item', 'ReceivedQuantity', 'Note']]

In [20]:
transaction

Unnamed: 0,AssistanceID,Item,ReceivedQuantity,Note,RequestQuantity
0,66580,MQuant,2.0,,15
1,66583,MQuant,7.0,,15
2,66584,MQuant,6.0,,15
3,66586,MQuant,6.0,,15
4,66825,MQuant,5.0,,15
...,...,...,...,...,...
731165,300442,Director,1,safeway\r\ngoodwill,1
731166,299680,Director,1,gas card,1
731167,300185,Director,1,goodwill,1
731168,298978,Director,1,Sleeping bag,1


In [9]:
assistancedf

Unnamed: 0,AssistanceID,ClientID,Date,MenClothes,MQuant,WomenClothes,WQuant,ChildrenClothes,CQuant,Soap,...,Razor,Toothpaste,Toothbrush,Other,BoyAge,GirlAge,Furniture,Household,Director,New
0,66576,1141.0,06/01/04 00:00:00,0,0.0,1,12.0,0,0.0,0,...,0,0,0,"2 SHOES, BOOTS",,,,,,
1,66578,1645.0,06/01/04 00:00:00,0,0.0,0,0.0,1,13.0,0,...,0,0,0,Jackets for 4 & 7 year old,10,"4, 7",,,,
2,66579,20540.0,06/01/04 00:00:00,0,0.0,1,11.0,1,12.0,0,...,0,0,0,Jackets for 4 & 8 yr. Old,8,4,,,,
3,66580,20541.0,06/01/04 00:00:00,1,2.0,0,0.0,0,0.0,0,...,0,0,0,also uses last name raymundo (raimundo),,,,,,
4,66581,20542.0,06/01/04 00:00:00,1,0.0,0,0.0,0,0.0,0,...,0,0,0,MOP,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229199,298968,221.0,01/27/20 00:00:00,1,5.0,0,0.0,0,0.0,0,...,0,0,0,clothing and shoes;,,,,"bus ticket , glasses",,
229200,300185,50545.0,03/02/20 00:00:00,1,7.0,0,0.0,0,0.0,0,...,0,0,0,"ID seen. \r\nElectric razor, TV antennae, heav...",,,,,goodwill,
229201,298975,57035.0,01/27/20 00:00:00,1,6.0,0,0.0,0,0.0,0,...,0,0,0,,,,sleeping bag,bus tix (2),,
229202,298978,57768.0,01/27/20 00:00:00,1,3.0,0,0.0,0,0.0,0,...,0,0,0,,,,,"food box, blanket, gas card",Sleeping bag,


In [22]:
assistancedf = (assistancedf
                .drop(columns=['MenClothes', 'MQuant', 'WomenClothes', 
                'WQuant', 'ChildrenClothes', 'CQuant', 'Soap', 'ShampooConditioner', 
                'Lotion', 'Razor', 'Toothpaste', 'Toothbrush', 'Other', 'BoyAge', 
                'GirlAge', 'Furniture', 'Household', 'Director', 'New'])
                .assign(Date=lambda df: pd.to_datetime(df['Date'], errors='coerce'))
                .assign(LastBackPackDate=lambda df: df['Date'])
                .assign(LastSleepingBagDate=lambda df: df['Date'])
)

In [23]:
assistancedf

Unnamed: 0,AssistanceID,ClientID,Date,LastBackPackDate,LastSleepingBagDate
0,66576,1141.0,2004-06-01,2004-06-01,2004-06-01
1,66578,1645.0,2004-06-01,2004-06-01,2004-06-01
2,66579,20540.0,2004-06-01,2004-06-01,2004-06-01
3,66580,20541.0,2004-06-01,2004-06-01,2004-06-01
4,66581,20542.0,2004-06-01,2004-06-01,2004-06-01
...,...,...,...,...,...
229199,298968,221.0,2020-01-27,2020-01-27,2020-01-27
229200,300185,50545.0,2020-03-02,2020-03-02,2020-03-02
229201,298975,57035.0,2020-01-27,2020-01-27,2020-01-27
229202,298978,57768.0,2020-01-27,2020-01-27,2020-01-27


In [18]:
import sqlalchemy
from sqlalchemy import create_engine
from original_data.creds import password, database

In [19]:
url = 'mysql+pymysql://bd4ed864034363:{}@us-cdbr-east-06.cleardb.net/{}'.format(password, database)

In [20]:
engine = create_engine(url, echo=False)
connection = engine.connect()

In [24]:
#clientdf.to_sql('clients', connection, if_exists='replace', index=False, method='multi')

39874

In [None]:
#1 month 6 month 12 month
#views for each column wise pivots