In [1]:
import pandas as pd #noqa: F401
import numpy as np #noqa: F401

import datetime
from datetime import datetime  ###, timedelta, timezone
import phonenumbers

## names and eids of organizers who are no longer with us
from departed import departed_eids, departed_names

## profile-<date>.csv is a dump of the Empower data.  I store these on the N: drive
today = '24-9-24'
home = 'N:/'
path = home + 'Al/RelationalVoterProgram/Python/ReadEmpowerData_2024/'
data_file = 'Profiles/profiles-' + today + '.csv'

## or sometimes I keep them locally...
# home = 'C:/'
# path = home + 'Users/nicho/Downloads/'
# data_file = 'profiles-' + today + '.csv'

organizers = ['Director', 'Organizer', 'Volunteer']
contacts = ['Contact']
regions = ['Green Bay', 'Kenosha', 'Racine', 'Madison', 'Milwaukee', 'Manitowoc', 'Sheboygan',
    'Walworth', 'Waukesha', 'Unknown Region']

import sys
sys.path.append('../Common')
## helper functions for data cleaning
from edatools import InitializeDataFrames, CleanPhone, SplitTime, beginningOfTime #noqa: F401
from edatools import ColumnMove, IsBlank, IsNotBlank #noqa: F40

In [None]:
## download the data
data = InitializeDataFrames(path, data_file, {})
if not data.empty:
    print('Loaded', len(data), 'records.')

data['Created At'] = data['Created At'].apply(lambda x : SplitTime(x)) 
data['Last Used Empower At'] = data['Last Used Empower At'].apply(lambda x : SplitTime(x)) 
data.drop(columns=['Address Line 2'], inplace=True)
data.fillna({'First Name':' ', 'Last Name': ' ', 'Parent EID':' '},  inplace=True)
data = CleanPhone(data, phone_column='Phone')
print("Cleanup complete.")

In [None]:
columns_to_keep = ['Parent EID', 'EID', 'Role','First Name', 'Last Name', 
    'Phone', 'Created At','Last Used Empower At']

## subset the leaders and the contacts
leaders = data.loc[data['Role'].isin(organizers)]
contacts = data.loc[data['Role'].isin(contacts)]

## over the years, many leaders signed onto the system once and never did anything. We want to consider only
## people who signed on at least twice, so their last use date is later than their start date.
print('initial leader count:', len(leaders))
activated_leaders = leaders.loc[leaders['Last Used Empower At'] > leaders['Created At']]
print('activated leader count:', len(activated_leaders))

## we only want to consider people for whom we have a phone number we can call
mask = activated_leaders['Phone'].apply(lambda x : IsNotBlank(x))
reachable_activated_leaders = activated_leaders[mask]
print('total reachable activated leader count:', len(reachable_activated_leaders))

reachable_activated_leaders = reachable_activated_leaders[columns_to_keep]
reachable_activated_leaders.rename(columns={'Parent EID': 'ParentEID', 'First Name': 'FirstName', 
    'Last Name': 'LastName', 'Created At': 'CreatedAt', 'Last Used Empower At': 'LastUsedEmpowerAt'}, inplace=True)

## subset directors, organizers, and volunteers
reachable_directors = reachable_activated_leaders.loc[reachable_activated_leaders['Role'] == 'Director']
print('reachable activated director count:', len(reachable_directors))
reachable_organizers = reachable_activated_leaders.loc[reachable_activated_leaders['Role'] == 'Organizer']
print('reachable activated organizer count:', len(reachable_organizers))
reachable_volunteers = reachable_activated_leaders.loc[reachable_activated_leaders['Role'] == 'Volunteer']
print('reachable activated volunteer count:', len(reachable_volunteers))


In [4]:
parent_names = []
parent_roles = []
full_names = []
for row in reachable_activated_leaders.itertuples():
    parent_eid = row.ParentEID
    full_name = row.FirstName + ' ' + row.LastName
    parent_data = data.loc[data['EID'] == parent_eid]
    try:
        parent_name = parent_data['First Name'].values[0] + ' ' + parent_data['Last Name'].values[0]
        parent_role = parent_data['Role'].values[0]
    except (ValueError, IndexError):
        parent_name = " "
        parent_role = " "
    parent_names.append(parent_name)
    parent_roles.append(parent_role)
    full_names.append(full_name)
    
reachable_activated_leaders['ParentName'] = parent_names
reachable_activated_leaders['ParentRole'] = parent_roles
reachable_activated_leaders['FullName'] = full_names
reachable_activated_leaders = ColumnMove(reachable_activated_leaders, 'ParentRole', 1)
reachable_activated_leaders = ColumnMove(reachable_activated_leaders, 'ParentName', 2)
reachable_activated_leaders = ColumnMove(reachable_activated_leaders, 'FullName', 5)
reachable_activated_leaders.drop(columns=['FirstName', 'LastName'], inplace=True)
reachable_activated_leaders.sort_values(by=['Role','LastUsedEmpowerAt'], ascending = [True, False], inplace=True)
reachable_activated_leaders.reset_index(drop=True, inplace=True)

In [5]:
multi = reachable_activated_leaders.set_index(['ParentRole','ParentEID','ParentName']).sort_values(by = ['ParentRole','ParentName', 'LastUsedEmpowerAt'], ascending = [True, True, False])

In [6]:

def highlight_cells(s):
    return "background-color: yellow;" if s in departed_eids or s.strip() in departed_names else ""

with pd.ExcelWriter('test.xlsx' ) as writer:
    workbook = writer.book
    worksheet = workbook.create_sheet('Sheet1')
    multi.style.map_index(highlight_cells, axis = 'index', level = [1,2] ).to_excel(writer, "Sheet1",  engine='xlsxwriter')