In [4]:
import pandas as pd #noqa: F401
import numpy as np #noqa: F401

import datetime
from datetime import datetime  ###, timedelta, timezone
import networkx as nx
from pyvis.network import Network
from pathlib import Path
import phonenumbers

## names and eids of organizers who are no longer with us
from departed import departed_eids, departed_names

import sys
sys.path.append('../Common/')
## helper functions for data cleaning
from edatools import InitializeDataFrames, CleanPhone, SplitTime, beginningOfTime #noqa: F401
from edatools import ColumnMove, IsBlank, IsNotBlank #noqa: F40

startDate = '2024-04-01 00:00:00'
campaignStartDate = datetime.fromisoformat(startDate)

## profile-<date>.csv is a dump of the Empower data.  I store these on the N: drive
today = '24-9-24'
home = 'N:/'
path = home + 'Al/RelationalVoterProgram/Python/ReadEmpowerData_2024/'
data_file = 'Profiles/profiles-' + today + '.csv'

organizers = ['Director', 'Organizer', 'Volunteer']
voters = ['Contact']
regions = ['Green Bay', 'Kenosha', 'Racine', 'Madison', 'Milwaukee', 'Manitowoc', 'Sheboygan',
    'Walworth', 'Waukesha', 'Unknown Region']

startDate24 = '2024-04-01'
campaignStartDate24 = datetime.fromisoformat(startDate24)

A class to hold data about directors, supervoceros, and voceros:

In [5]:
class Organizer(object):
    def __init__(self, first_name, last_name, role, eid, parent_eid, last_used_empower, 
            personal_voter_count = 0, downstream_voter_count = 0, downstream_leader_count = 0):
        self.first_name = first_name.strip()
        self.last_name = last_name.strip()
        self.full_name = ' '.join([self.first_name, self.last_name])
        self.role = role.strip()
        self.eid = eid
        self.parent_eid = parent_eid
        self.last_used_empower = last_used_empower
        self.organizer_list = []
        self.personal_voter_count = personal_voter_count
        self.downstream_voter_count = downstream_voter_count
        self.downstream_leader_count = downstream_leader_count
        
    def add_organizer(self, organizer):
        self.organizer_list.append(organizer)

Load the data, do some cleanup:

In [None]:
data = InitializeDataFrames(path, data_file, {})
if not data.empty:
    print('Loaded', len(data), 'records.')
    
columns_to_keep = ['Parent EID', 'EID', 'Role','First Name', 'Last Name', 
    'Phone', 'Created At','Last Used Empower At']

columns_renamed = ['ParentEID', 'EID', 'Role','FirstName', 'LastName', 
    'Phone', 'CreatedAt','LastUsedEmpowerAt']

data = data[columns_to_keep]
data.rename(columns=dict(zip(columns_to_keep, columns_renamed)), inplace=True)    

data['CreatedAt'] = data['CreatedAt'].apply(lambda x : SplitTime(x)) 
data['LastUsedEmpowerAt'] = data['LastUsedEmpowerAt'].apply(lambda x : SplitTime(x)) 

data.fillna({'FirstName':' ', 'LastName': ' ', 'ParentEID':' '},  inplace=True)
data = CleanPhone(data, phone_column='Phone')
print("Cleanup complete.")

Separate the voters from the leaders, and pull out the leaders who are no longer with us.  We want them out of the way in order to focus on the people below them in the hierarchy who still could potentially be activated.

In [None]:
## subset the leaders and the contacts
leaders = data.loc[data['Role'].isin(organizers)]
voters = data.loc[data['Role'] == 'Contact']
print('voter count:',len(voters))

## over the years, many leaders signed onto the system once and never did anything. We want to consider only
## people who signed on at least twice, so their last use date is later than their start date.
print('initial leader count:', len(leaders))
activated_leaders = leaders.loc[leaders['LastUsedEmpowerAt'] > leaders['CreatedAt']][columns_renamed]
print('activated leader count:', len(activated_leaders))

activated_directors = activated_leaders.loc[(activated_leaders['Role'] == 'Director') & (~activated_leaders['EID'].isin(departed_eids))]
activated_organizers = activated_leaders.loc[(activated_leaders['Role'] == 'Organizer') & (~activated_leaders['EID'].isin(departed_eids))]
activated_volunteers = activated_leaders.loc[(activated_leaders['Role'] == 'Volunteer') & (~activated_leaders['EID'].isin(departed_eids))]

#do this so we can have a blank parent for building the tree
activated_directors.loc[:,'ParentEID'] = ''

leaders = pd.concat([activated_directors, activated_organizers, activated_volunteers])
leaders['FullName'] = leaders['FirstName'] + ' ' + leaders['LastName']
leaders.reset_index(inplace=True, drop=True)
print('activated and present leader count:', len(leaders))

In [8]:
def WriteTreeForDirector(director_eid, leaders, voters):
    multi = leaders.set_index(['ParentEID','EID'])

    ## all of the leaders whose parent is the selected director
    df = multi.loc[multi.index.get_level_values('ParentEID') == director_eid].sort_values(by = ['Role','LastUsedEmpowerAt'], ascending = [True, False])
    df.reset_index(inplace=True)

    ## add the selected director
    df = pd.concat([df, leaders.loc[leaders['EID'] == director_eid]])

    ## also add the volunteers whose parents are children of the selected director
    for row in df.itertuples():
        if row.Role == 'Organizer':
            df_local = leaders.loc[leaders['ParentEID'] == row.EID]
            df = pd.concat([df, df_local])

    voter_counts = []
    gb = voters.groupby('ParentEID', as_index=False).size()

    ## get a voter count for each parent
    for row in df.itertuples():
        try:
            voter_counts.append(gb.loc[gb['ParentEID'] == row.EID]['size'].values[0])
        except IndexError as e:
            voter_counts.append(0)
        
    df['PersonalVoterCount'] = voter_counts   
    df['DownstreamVoterCount'] = df['PersonalVoterCount']
    df['DownstreamLeaderCount'] = 0

    df_org = df.loc[(df['Role'] == 'Organizer') ]

    ## per Empower, downstream voter count for directors and organizers is the sum of their personal voter count
    ## and the voter counts of their children
    for row in df_org.itertuples():
        local_leaders = leaders.loc[leaders['ParentEID'] == row.EID]
        df.loc[row.Index, 'DownstreamLeaderCount'] = len(local_leaders)
        voter_count = row.PersonalVoterCount
        for leader in local_leaders.itertuples():
            try:
                voter_count += gb.loc[gb['ParentEID'] == leader.EID]['size'].values[0]
            except IndexError as e:
                voter_count += 0
        df.loc[df['EID'] == row.EID,'DownstreamVoterCount'] = voter_count

    ## sum all for the director
    df.loc[df['EID'] == 'u-56-21617','DownstreamLeaderCount'] = df['DownstreamLeaderCount'].sum()
    df.loc[df['EID'] == 'u-56-21617','DownstreamVoterCount'] = df['DownstreamVoterCount'].sum()

    df.sort_values(by = ['Role','ParentEID', 'LastUsedEmpowerAt'], ascending = [True, True,False], inplace=True)
    df.reset_index(inplace=True, drop=True)
    
    return df

In [9]:
def AddParentNames(df):
    parent_names = []
    parent_roles = []
    full_names = []
    for row in df.itertuples():
        parent_eid = row.ParentEID
        full_name = row.FirstName + ' ' + row.LastName
        parent_data = data.loc[data['EID'] == parent_eid]
        try:
            parent_name = parent_data['FirstName'].values[0] + ' ' + parent_data['LastName'].values[0]
            parent_role = parent_data['Role'].values[0]
        except (ValueError, IndexError):
            parent_name = " "
            parent_role = " "
        parent_names.append(parent_name)
        parent_roles.append(parent_role)
        full_names.append(full_name)
        
    df['ParentName'] = parent_names
    df['ParentRole'] = parent_roles
    df['FullName'] = full_names
    df = ColumnMove(df, 'ParentRole', 1)
    df = ColumnMove(df, 'ParentName', 2)
    df = ColumnMove(df, 'FullName', 5)
    df.drop(columns=['FirstName', 'LastName'], inplace=True)
    df.sort_values(by=['Role','LastUsedEmpowerAt'], ascending = [True, False], inplace=True)
    df.reset_index(drop=True, inplace=True)   
    return df 

In [None]:
ids_to_names = {'u-56-21617':'Samanta Cardona',
'c-169766':'Luis Velasquez',
'c-189101':'Jarrett English',
'foup6emzlsdum7':'Laura Pastrana'}

out_file = 'RVPNetworkExtended.xlsx'

def highlight_date(s):
    return np.where(pd.to_datetime(s) < campaignStartDate, 'background-color: yellow;', None)

def highlight_name(s, v=None):
    return np.where(pd.to_datetime(v) < campaignStartDate, 'background-color: yellow;', None)

def highlight_eid(s, role = None, v=None):
    pred = np.where((pd.to_datetime(v) < campaignStartDate) & (role == 'Organizer'), True, False)
    return np.where(pred, 'background-color: yellow;', None)
    

# with pd.ExcelWriter('./RVPNetworkExtended.xlsx', mode = 'a', if_sheet_exists = 'replace', engine = 'openpyxl') as writer:
with pd.ExcelWriter(out_file) as writer:    
    workbook = writer.book
    for key, value in ids_to_names.items():
        worksheet = workbook.create_sheet(value)
        director_df = WriteTreeForDirector(key, leaders, voters)
        director_df = AddParentNames(director_df)
        
        ## the multiindex organized the data 
        ## nicely even we throw it away in the next step
        multi = director_df.set_index(['ParentRole','ParentEID','ParentName'])\
            .sort_values(by = ['ParentRole','ParentName', 'Role','LastUsedEmpowerAt'],\
            ascending = [True, True, True, False])
            
        ## seems to be hard to style a multiindex, for now just collapse it 
        multi.reset_index(inplace=True)
        multi.style.apply(highlight_date, axis = 1, subset=['LastUsedEmpowerAt'])\
                .apply(highlight_name,  v=multi['LastUsedEmpowerAt'], axis = 0, subset=['FullName'])\
                .apply(highlight_eid, role = multi['Role'], v=multi['LastUsedEmpowerAt'], axis = 0, subset=['EID'])\
                .to_excel(writer,value,  engine='xlsxwriter', index = False)
        
        print('wrote tree for:', value)

This demonstrates how to generate a graph of a director's tree


In [11]:
director_name = 'Samanta Cardona'
director_eid = 'u-56-21617'
            
director_df = WriteTreeForDirector(director_eid, leaders, voters)

## load the organizers into the dictionary
organizer_dict = {}
for s in director_df.itertuples():
    try:
        organizer = Organizer(s.FirstName, s.LastName, s.Role, s.EID, s.ParentEID, s.LastUsedEmpowerAt, s.PersonalVoterCount, s.DownstreamVoterCount, s.DownstreamLeaderCount)
        organizer_dict[s.EID] = organizer
        ## load their children too
        if s.Role == 'Organizer':
            local_leaders = activated_volunteers.loc[activated_volunteers['ParentEID'] == s.EID]
            for leader in local_leaders.itertuples():
                try:  
                    organizer = Organizer(leader.FirstName, leader.LastName, leader.Role, leader.EID, leader.ParentEID, leader.LastUsedEmpowerAt, leader.PersonalVoterCount, leader.DownstreamVoterCount, leader.DownstreamLeaderCount)  
                    organizer_dict[leader.EID].add_organizer(organizer)
                except Exception as e:  ## this will happen when the leader.EID is not in the dict
                    pass
    except Exception as e:
        pass
        
## hook up the parent pointers
keys = organizer_dict.keys()
for key in keys:
    current = organizer_dict[key]
    while current.parent_eid in keys:
        parent = organizer_dict[current.parent_eid]
        parent.add_organizer(current)
        current = parent

Demonstrating how to make a title for a node in the graph we'll produce below.

In [12]:
role_map = {'Organizer': 'SuperVocero', 'Director': 'Director', 'Volunteer': 'Vocero'}
def make_title(organizer):
    full_name = ' '.join([organizer.full_name +':', role_map[organizer.role]])
    last_used = organizer.last_used_empower
    personals = organizer.personal_voter_count
    downstreams = organizer.downstream_voter_count
    row0 = full_name
    row1 = 'Last Used Empower: ' + str(last_used)
    row2 = 'Personal Voter Count: ' + str(personals)
    row3 = 'Downstream Voter Count: ' + str(downstreams)
    return '\n'.join([row0, row1, row2, row3])

Create nodes and edges from the tree:

In [13]:
def AddNodesToGraph(graph, organizer):
    title = make_title(organizer)
    if organizer.last_used_empower > campaignStartDate.date(): 
        color = 'blue'
    else: color = 'red'
    graph.add_node(organizer.full_name,  borderWidth = 10,  title = title,  color = color, borderColor = 'blue',
            font = '10px arial black', size = organizer.personal_voter_count)
    for item in organizer.organizer_list:
        graph.add_edge(organizer.full_name, item.full_name)
        AddNodesToGraph(graph, item)

Pick a director, show the supervoceros/voceros below her.  Diameter of the node correlates with size of their voter list.  

In [14]:
G = nx.Graph()
organizer = organizer_dict["u-56-21617"] # Samanta
AddNodesToGraph(G, organizer)
net = Network(  directed = True, width=1000, height=600)
net.from_nx(G)
net.show("example.html")
