In [1]:
import urllib.request
import urllib.parse
import os.path
import pathlib
import pandas as pd
import numpy as np
import functools


from nameparser import HumanName
import humanize


pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 250)

In [2]:
if not os.path.isfile("2021_Contributions.csv"):
    url = "https://www.nyccfb.info/DataLibrary/2021_Contributions.csv"
    urllib.request.urlretrieve(url, "2021_Contributions.csv")

In [3]:
contributions = pd.read_csv("2021_Contributions.csv", header=0, dtype={ 'NAME': str, 'STRNO': str, 'STRNAME': str, 'APARTMENT': str , 'INTZIP': str, 'EXEMPTCD': str })

In [4]:
def employer_format(s):
    if not isinstance(s, str):
        return s
    if s.lower() == 'not employed' or s.lower() == 'unemployed':
        return 'Not Employed'
    elif  s.lower() in ['self-employed', 'self employed', 'self', 'Self Employee']:
        return 'Self Employed'
    elif s.lower() == 'none':
        return 'None'
    elif s.lower() in ['homemaker', 'housewife']:
        return 'Homemaker'
    elif s.lower() in ['Citigroup', 'citi']:
        return 'Citigroup'
    elif 'paul weiss' in s.lower():
        return 'Paul Weiss'
    elif ('Skadden Arps' in s.lower() or s == 'Skadden'):
        return 'Skadden Arps'
    elif 'Rosen & Katz' in s or s == 'Wachtell Lipton':
        return 'Wachtell, Lipton, Rosen & Katz'
    elif 'A&E Real Estate' in s.lower():
        return 'A&E Real Estate'
    else:
        return s


contributions['EMPNAME'] = contributions['EMPNAME'].apply(employer_format)

In [5]:
contributions

Unnamed: 0,ELECTION,OFFICECD,RECIPID,CANCLASS,RECIPNAME,COMMITTEE,FILING,SCHEDULE,PAGENO,SEQUENCENO,REFNO,DATE,REFUNDDATE,NAME,C_CODE,STRNO,STRNAME,APARTMENT,BOROUGHCD,CITY,STATE,ZIP,OCCUPATION,EMPNAME,EMPSTRNO,EMPSTRNAME,EMPCITY,EMPSTATE,AMNT,MATCHAMNT,PREVAMNT,PAY_METHOD,INTERMNO,INTERMNAME,INTSTRNO,INTSTRNM,INTAPTNO,INTCITY,INTST,INTZIP,INTEMPNAME,INTEMPSTNO,INTEMPSTNM,INTEMPCITY,INTEMPST,INTOCCUPA,PURPOSECD,EXEMPTCD,ADJTYPECD,RR_IND,SEG_IND,INT_C_CODE
0,2021,1,2563,P,"Tirschwell, Sara A",H,7,ABC,,,R0000209,3/6/2021,,"""Dombrowski,"", Ray",IND,,,,,Haverford,PA,19041,Turnaround Manager,Alvarez & Marsal,,600 Madison Avenue,New York,NY,2000.0,0,0.0,4,,,,,,,,,,,,,,,,,,N,N,
1,2021,55,2345,P,"Camarena, Rodrigo G",H,6,ABC,,,R0001664,12/17/2020,,"(Dougherty) LoBianco, Kelly",IND,,,,Z,Los Angeles,CA,90068,Chief Program Officer,The HOPE Program,1,Smith Street,Brooklyn,NY,38.0,0,0.0,4,,,,,,,,,,,,,,,,,,N,N,
2,2021,55,2414,P,"Low, Jenny L",H,6,ABC,,,R0000080,9/5/2020,,"(Eng) Lawton, Linnit",IND,,,,K,Brooklyn,NY,11201,Human Resources,Hamilton Madison House,253,South Street,New York,NY,25.0,25,0.0,4,,,,,,,,,,,,,,,,,,N,N,
3,2021,55,283,P,"Gennaro, James F",M,7,ABC,,,R0002295,3/3/2021,,"(Orlow) Husarsky, Miriam",IND,,,,Q,Richmond Hill,NY,11418,Speech Therapist,NYC Dept of Education,63-55,102nd street,Rego Park,NY,175.0,175,0.0,4,,,,,,,,,,,,,,,,,,N,N,
4,2021,55,2454,P,"Boghosian Murphy, Leslie",H,6,ABC,,,R0000171,10/19/2020,,"(Zimmerman)Shaich, Carol",IND,,,,M,Ny,NY,10027,Retired,,,,,,25.0,25,0.0,4,,,,,,,,,,,,,,,,,,N,N,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
369620,2021,11,2546,P,"Yang, Andrew",H,8,ABC,,,R0079799,5/13/2021,,"Zymaris, Mai",IND,,,,Z,BRAINTREE,MA,2184,Attorney,Morrison & Foerster,428,John Mahar Hwy Unit 207,BRAINTREE,MA,50.0,0,275.0,4,,,,,,,,,,,,,,,,,,N,N,
369621,2021,11,2393,P,"Wiley, Maya D",H,7,ABC,,,R0034934,2/13/2021,,"Zysermann, Noemie",IND,,,,M,New York,NY,10023,Consultant,Self Employed,266,W 73rd st,New York,NY,25.0,25,0.0,4,,,,,,,,,,,,,,,,,,N,N,
369622,2021,33,1559,P,"Johnson, Corey D",L,3,ABC,,,R0016683,7/11/2019,,"Zyskowska, Celina",IND,,,,Q,Kew Gardens,NY,11415,Accounting,Commodore Construction,602,South 3rd Avenue,Mt Vernon,NY,250.0,250,0.0,2,16.0,"Oneill, Brendan",,,,Long Island City,NY,11104,Commodore Construction,602,South 3rd Avenue,Mt Vernon,NY,Labor Manager,,,,N,N,IND
369623,2021,11,1545,P,"Adams, Eric L",L,1,ABC,,,R0003574,6/12/2018,,"Zyskowski, Joseph",IND,,,,Q,Middle Village,NY,11379,Owner,K Construction,65-58,Admiral Avenue,Middle Village,NY,5000.0,250,0.0,2,,,,,,,,,,,,,,,,,,N,N,


In [6]:
major_candidates = [
    'Adams, Eric L',
    'Donovan, Shaun',
    'Garcia, Kathryn A',
    'McGuire, Raymond J',
    'Morales, Dianne',
    'Stringer, Scott M',
    'Wiley, Maya D',
    'Yang, Andrew'
]

mayor_candidates_names = contributions[contributions['OFFICECD'].isin(['1', '11'])]['RECIPNAME'].unique()
candidate_last_names = dict(zip(mayor_candidates_names, map(lambda s: HumanName(s).last, mayor_candidates_names)))
candidate_last_names.values()

dict_values(['Tirschwell', 'Wiley', 'Yang', 'Morales', 'McGuire', 'Stringer', 'Sliwa', 'Cullen', 'Garcia', 'Donovan', 'Menchaca', 'Adams', 'Mateo', 'Diaz', 'Pepitone', 'Taylor', 'AbdulMalik', 'Chang', 'Sutton', 'Rose', 'Prince', 'Wright', 'Guimaraes', 'Flores', 'Emilien', 'Foldenauer', 'Filipchenko', 'Laurel-Smith', 'Kavovit', 'Francis', 'Oremland', 'Nunez', 'Downs', 'Kaplan', 'Miles', 'Reaves', 'Coenen', 'Bunea', 'Pinto', 'Fitzgerald', 'Krietchman', "O'Hagan", 'Seidman', 'Seely'])

In [7]:
mayor_donations = contributions[contributions["RECIPNAME"].isin(major_candidates) & contributions["SCHEDULE"].eq("ABC")].copy()

mayor_donations.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 125557 entries, 38 to 369623
Data columns (total 52 columns):
 #   Column      Non-Null Count   Dtype  
---  ------      --------------   -----  
 0   ELECTION    125557 non-null  int64  
 1   OFFICECD    125557 non-null  object 
 2   RECIPID     125557 non-null  object 
 3   CANCLASS    125557 non-null  object 
 4   RECIPNAME   125557 non-null  object 
 5   COMMITTEE   125557 non-null  object 
 6   FILING      125557 non-null  int64  
 7   SCHEDULE    125557 non-null  object 
 8   PAGENO      0 non-null       float64
 9   SEQUENCENO  0 non-null       float64
 10  REFNO       125557 non-null  object 
 11  DATE        125557 non-null  object 
 12  REFUNDDATE  0 non-null       object 
 13  NAME        125557 non-null  object 
 14  C_CODE      125557 non-null  object 
 15  STRNO       0 non-null       object 
 16  STRNAME     0 non-null       object 
 17  APARTMENT   0 non-null       object 
 18  BOROUGHCD   125443 non-null  object 
 19  C

In [8]:
mayor_donations.NAME.value_counts()

Jordan, Tyron         63
Cole, Sarah           59
Perkins, D            58
Koo, Julia            56
Warren, Roxanne       56
                      ..
Betancourt, Jazmin     1
Nyein, Lila            1
Nissan, Rita           1
Morales, Ed            1
Amen, Mohamed          1
Name: NAME, Length: 74907, dtype: int64

In [9]:
mayor_donations['C_CODE'].unique()

array(['PCOMP', 'CORP', 'IND', 'CAN', 'EMPO', 'FAM', 'OTHR', 'PCOMC',
       'PCOMZ', 'LLC'], dtype=object)

In [10]:
def to_money(i):
     return "$" + humanize.intcomma(round(i))

def to_person_name(s):
    return str(HumanName(s))

def parse_name(s):
    names = []
    
    human_name = HumanName(s)
    
    names.append(f"{human_name.last}, {human_name.first}")
    
    if human_name.suffix:
        names.append(f"{human_name.last} {human_name.suffix}, {human_name.first}")
    
    if human_name.middle:
        names.append(f"{human_name.last}, {human_name.first} {human_name.middle}")
    
        if len(human_name.middle) > 1:
            names.append(f"{human_name.last}, {human_name.first} {human_name.middle[0]}")
    
    return names


def name_variations_from_file(file):
    variations = []
    
    with open(file, 'r') as f:
        for line in f:
            for variation in parse_name(line):
                variations.append(variation.upper())

    return variations
                
real_estate_names_from_littlesis = name_variations_from_file('./real_estate_names.txt')
worst_landlords = name_variations_from_file('./worst_landlords.txt')

In [11]:
mayor_donations['realestate_littlesis'] = mayor_donations['NAME'].str.upper().isin(real_estate_names_from_littlesis)
mayor_donations['worst_landlord'] = mayor_donations['NAME'].str.upper().isin(worst_landlords)
mayor_donations['works_in_realestate'] = mayor_donations['OCCUPATION'].str.contains(r'(?i).*REAL ESTATE.*')

In [12]:
flagged_donations = mayor_donations[ (mayor_donations['realestate_littlesis'] | mayor_donations['worst_landlord'] |  mayor_donations['works_in_realestate']  | ( mayor_donations['C_CODE'] == "LLC"))]
flagged_donations_major_candidates = flagged_donations[flagged_donations["RECIPNAME"].isin(major_candidates)]

columns = ['ELECTION', 'RECIPID', 'RECIPNAME', 'COMMITTEE', 'NAME', 'AMNT', 'DATE', 'REFUNDDATE', 'CITY', 'STATE', 'ZIP', 'OCCUPATION', 'EMPNAME', 'C_CODE', 'realestate_littlesis', 'worst_landlord', 'works_in_realestate']

flagged_donations.sort_values(['RECIPNAME', 'DATE'])[columns]

Unnamed: 0,ELECTION,RECIPID,RECIPNAME,COMMITTEE,NAME,AMNT,DATE,REFUNDDATE,CITY,STATE,ZIP,OCCUPATION,EMPNAME,C_CODE,realestate_littlesis,worst_landlord,works_in_realestate
63374,2021,1545,"Adams, Eric L",L,"Cohen, Adir",5100.0,1/10/2019,,Brooklyn,NY,11230,Real Estate,Renaissance Realty Group,IND,False,False,True
124630,2021,1545,"Adams, Eric L",L,"Gottlieb, Israel",1000.0,1/10/2019,,Lakewood Township,NJ,8701,Real estate investor,Self Employed,IND,False,False,True
212793,2021,1545,"Adams, Eric L",L,"Matheus, Donald",500.0,1/10/2019,,Brooklyn,NY,11238,Real estate,Albatross group,IND,False,False,True
321999,2021,1545,"Adams, Eric L",L,"Strulovitch, Baila",2000.0,1/10/2019,,Brooklyn,NY,11205,Real Estate,Self Employee,IND,False,False,True
192053,2021,1545,"Adams, Eric L",L,"Levy, Danny",100.0,1/10/2020,,Brooklyn,NY,11230,Real estate,Self Employed,IND,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
346745,2021,2546,"Yang, Andrew",H,"Wang, Kevin",100.0,5/6/2021,,Somerville,MA,2143,Software Engineer,Wellframe,IND,True,False,False
146638,2021,2546,"Yang, Andrew",H,"Holt, Giles",50.0,5/7/2021,,New York,NY,10065,Real Estate,KBD Holdings,IND,False,False,True
188504,2021,2546,"Yang, Andrew",H,"Lee, Saehan",50.0,5/7/2021,,Santa Monica,CA,90401,Manager Real Estate,Knowledge Universe,IND,False,False,True
103383,2021,2546,"Yang, Andrew",H,"Fishkind, Jason",2000.0,5/8/2021,,Purchase,NY,10577,Real Estate,OOHH,IND,False,False,True


In [13]:
flagged_donations.sort_values(['RECIPNAME', 'DATE']).to_csv('flagged_donations.csv')

In [14]:
total_amount_recieved = mayor_donations[["RECIPNAME", "AMNT"]].groupby("RECIPNAME").sum().sort_values(by="AMNT", ascending=False)
total_amount_recieved["AMNT"]  = total_amount_recieved["AMNT"].apply(to_money)
total_amount_recieved = total_amount_recieved.rename_axis('Candidates').rename(columns={'AMNT': 'Amount'})
total_amount_recieved

Unnamed: 0_level_0,Amount
Candidates,Unnamed: 1_level_1
"McGuire, Raymond J","$10,093,818"
"Adams, Eric L","$4,739,061"
"Yang, Andrew","$3,600,747"
"Donovan, Shaun","$2,858,059"
"Stringer, Scott M","$2,839,137"
"Wiley, Maya D","$1,598,117"
"Garcia, Kathryn A","$1,242,811"
"Morales, Dianne","$860,549"


In [15]:
def largest_donors_per_candidate(donations, limit=15):
    """Dataframe of largest donors for each candidate"""
    agg = donations[["RECIPNAME", "AMNT", "NAME", "EMPNAME"]].groupby(["RECIPNAME", "NAME"]).agg({'AMNT': sum, 'EMPNAME': lambda s: s.unique() }) # .sort_values(by='AMNT', ascending=False) # .groupby(level=0, group_keys=False).head(20) # .reset_index() # ort_values(by='AMNT')  # .groupby(level=0, group_keys=False).apply(lambda x: x.sort_values(('AMNT', 'sum'), ascending=False))
        
    return agg['AMNT'].groupby('RECIPNAME', group_keys=False).apply(lambda x: x.sort_values(ascending=False).head(limit).apply(to_money)).to_frame()
    

In [16]:
largest_donors_per_candidate(mayor_donations, limit=30)

Unnamed: 0_level_0,Unnamed: 1_level_0,AMNT
RECIPNAME,NAME,Unnamed: 2_level_1
"Adams, Eric L","Bensusan, Danny","$12,100"
"Adams, Eric L","Waisman, Shai","$10,200"
"Adams, Eric L","Guzel, Murat","$10,100"
"Adams, Eric L","Singh, Navdeep","$10,000"
"Adams, Eric L","Serova, Daria","$10,000"
"Adams, Eric L","Boutross, Diana","$10,000"
"Adams, Eric L","Bartlett, Gregory","$7,600"
"Adams, Eric L","Karandikar, Satish","$7,100"
"Adams, Eric L","Akcetin, Mehmet","$7,000"
"Adams, Eric L","David, Emanuel","$6,600"


In [17]:
# Largest Real Estate Donors
largest_donors_per_candidate(flagged_donations_major_candidates, limit=30)

Unnamed: 0_level_0,Unnamed: 1_level_0,AMNT
RECIPNAME,NAME,Unnamed: 2_level_1
"Adams, Eric L","Boutross, Diana","$10,000"
"Adams, Eric L","Verrone, Robert","$5,100"
"Adams, Eric L","Cohen, Adir","$5,100"
"Adams, Eric L","Borrok, Charles R.","$5,100"
"Adams, Eric L","Tawil, Eliot","$5,100"
"Adams, Eric L","Demirjian, Alexis","$5,100"
"Adams, Eric L","tamir, Elliot","$5,100"
"Adams, Eric L","Berman, Nathan","$5,100"
"Adams, Eric L","Schwartz, Yoel","$5,000"
"Adams, Eric L","Corman, Jeffrey","$5,000"


In [18]:
def cfb_link(name, only_mayor=False):
    # url = "https://www.nyccfb.info/FTMSearch/Candidates/Contributions?ec=2021&rt=can&ir={}"
    
    if only_mayor:
        url = "https://www.nyccfb.info/FTMSearch/Candidates/Contributions?ec=2021&rt=can&ofc=1%2C11&ir={}"
    else:
        url = "https://www.nyccfb.info/FTMSearch/Candidates/Contributions?ec=2021&rt=can&ir={}"
        

    return url.format(urllib.parse.quote_plus(name))


def all_top_donors_for(candidate):
        columns = ['RECIPNAME', 'DATE', 'AMNT', 'NAME', 'ZIP', 'OCCUPATION', 'EMPNAME']
        donations = mayor_donations[mayor_donations['RECIPNAME'] == candidate][columns]
        donations["name"] =  donations["NAME"].apply(to_person_name)
        donations.rename(columns={'NAME': 'cfb_name', 'OCCUPATION': 'cfb_occupation', 'EMPNAME': 'cfb_employer'}, inplace=True)
        donations['cfb_link'] = donations['cfb_name'].apply(cfb_link)
        return donations.sort_values('AMNT', ascending=False)

def real_estate_donors_for(candidate):
        columns = ['RECIPNAME', 'DATE', 'AMNT', 'NAME', 'ZIP', 'OCCUPATION', 'EMPNAME']
        donations = flagged_donations_major_candidates[flagged_donations_major_candidates['RECIPNAME'] == candidate][columns]
        donations["name"] =  donations["NAME"].apply(to_person_name)
        donations.rename(columns={'NAME': 'cfb_name', 'OCCUPATION': 'cfb_occupation', 'EMPNAME': 'cfb_employer'}, inplace=True)
        donations['cfb_link'] = donations['cfb_name'].apply(functools.partial(cfb_link, only_mayor=True))
        return donations.sort_values('AMNT', ascending=False)

In [19]:
real_estate_donors_for('Donovan, Shaun').head(10)

Unnamed: 0,RECIPNAME,DATE,AMNT,cfb_name,ZIP,cfb_occupation,cfb_employer,name,cfb_link
366523,"Donovan, Shaun",1/4/2021,5100.0,"Zeiler, John",10023,Real Estate,Hudson Housing Capital,John Zeiler,https://www.nyccfb.info/FTMSearch/Candidates/C...
61828,"Donovan, Shaun",2/21/2021,5100.0,"Clark, Ric",11201,Managing Partner,WatermanClark,Ric Clark,https://www.nyccfb.info/FTMSearch/Candidates/C...
229420,"Donovan, Shaun",3/6/2021,5100.0,"Moore, John",35242,Real Estate Finance/Dev,Highland Commercial Mortgage/A,John Moore,https://www.nyccfb.info/FTMSearch/Candidates/C...
225637,"Donovan, Shaun",4/12/2021,5100.0,"Milstein, Constance J",10014,Real Estate Owner/Principal,"Ogden CAP Properties, LLC",Constance J Milstein,https://www.nyccfb.info/FTMSearch/Candidates/C...
307663,"Donovan, Shaun",3/5/2021,5100.0,"Silvers, Nicholas",10007,Real Estate Developer,Tavros,Nicholas Silvers,https://www.nyccfb.info/FTMSearch/Candidates/C...
307713,"Donovan, Shaun",12/7/2020,5100.0,"Silverstein, Larry",10007,Chairman,Silverstein Properties,Larry Silverstein,https://www.nyccfb.info/FTMSearch/Candidates/C...
167795,"Donovan, Shaun",1/28/2021,5100.0,"KATZ, JEFFREY",10023,Real Estate Developer,Sherwood Equities Inc.,JEFFREY KATZ,https://www.nyccfb.info/FTMSearch/Candidates/C...
245965,"Donovan, Shaun",1/5/2021,5100.0,"olshan, andrea",10065,Real Estate,Olshan Properties,andrea olshan,https://www.nyccfb.info/FTMSearch/Candidates/C...
317587,"Donovan, Shaun",1/29/2021,5100.0,"Stacom, Darcy",10022,Broker,CBRE,Darcy Stacom,https://www.nyccfb.info/FTMSearch/Candidates/C...
335519,"Donovan, Shaun",4/9/2021,5100.0,"Trulson, Derek",10128,Commercial real estate,Jones Lang LaSalle,Derek Trulson,https://www.nyccfb.info/FTMSearch/Candidates/C...


In [20]:
real_estate_donors_for('Adams, Eric L').head(10).info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10 entries, 342141 to 305974
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   RECIPNAME       10 non-null     object 
 1   DATE            10 non-null     object 
 2   AMNT            10 non-null     float64
 3   cfb_name        10 non-null     object 
 4   ZIP             10 non-null     object 
 5   cfb_occupation  10 non-null     object 
 6   cfb_employer    10 non-null     object 
 7   name            10 non-null     object 
 8   cfb_link        10 non-null     object 
dtypes: float64(1), object(8)
memory usage: 800.0+ bytes


In [21]:
len(flagged_donations[flagged_donations['RECIPNAME'] == 'Adams, Eric L']['NAME'].unique())

315

In [22]:
pathlib.Path("donations").mkdir(parents=True, exist_ok=True)

# Saves two CSV files for candidate
#  candidate_top_realestate_donors.csv
#  canddiate_top_donors.csv
# see real_estate_donors_for() and all_top_donors_for()
for cand in ['Adams, Eric L', 'Donovan, Shaun', 'Garcia, Kathryn A', 'McGuire, Raymond J', 'Stringer, Scott M', 'Yang, Andrew']:
    path = f"donations/{candidate_last_names.get(cand)}_top_realestate_donors.csv"
    real_estate_donors_for(cand).head(50).to_csv(path, index=False)
    
    path = f"donations/{candidate_last_names.get(cand)}_top_donors.csv"
    all_top_donors_for(cand).head(250).to_csv(path, index=False)
    


In [23]:
# Top Real Estate Donors
largest_donors_per_candidate(flagged_donations_major_candidates, limit=3)

Unnamed: 0_level_0,Unnamed: 1_level_0,AMNT
RECIPNAME,NAME,Unnamed: 2_level_1
"Adams, Eric L","Boutross, Diana","$10,000"
"Adams, Eric L","Verrone, Robert","$5,100"
"Adams, Eric L","Cohen, Adir","$5,100"
"Donovan, Shaun","Moore, John","$5,100"
"Donovan, Shaun","DeBode, Gary","$5,100"
"Donovan, Shaun","Trulson, Derek","$5,100"
"Garcia, Kathryn A","Field, Michael","$2,000"
"Garcia, Kathryn A","Kerr, Zachary","$2,000"
"Garcia, Kathryn A","Kalikow, Peter","$2,000"
"McGuire, Raymond J","Colacino, Michael","$8,433"


In [24]:
def format_supported_candidates(names):
    out = set()
    
    for n in names:
        out.add(HumanName(n).last)
    
    return ", ".join(out)
    

# df = mayor_donations.groupby(["NAME"]).agg({"AMNT": 'sum', 'RECIPNAME': lambda series: list(series.unique()) }).sort_values(by="AMNT", ascending=False).head(50).reset_index()
# df.rename(columns={"NAME": "Donor Name", "AMNT": "Amount", "RECIPNAME": "Supported Candidates" }, inplace=True)
# df['Donor Name'] = df['Donor Name'].apply(to_person_name)
# df["Amount"] = df["Amount"].apply(lambda x: f"${humanize.intcomma(round(x))}" )
# df["Supported Candidates"] = df["Supported Candidates"].apply(format_supported_candidates)



In [25]:
def top_donors_for(df, head=20):
    top_donors = df.groupby(["NAME"]).agg({"AMNT": 'sum', 'RECIPNAME': lambda series: list(series.unique()) }).sort_values(by="AMNT", ascending=False).head(head).reset_index()
    top_donors.rename(columns={"NAME": "Donor Name", "AMNT": "Amount", "RECIPNAME": "Supported Candidates" }, inplace=True)
    top_donors['Donor Name'] = top_donors['Donor Name'].apply(to_person_name)
    top_donors["Amount"] = top_donors["Amount"].apply(lambda x: f"${humanize.intcomma(round(x))}" )
    top_donors["Supported Candidates"] = top_donors["Supported Candidates"].apply(format_supported_candidates)
    return top_donors


all_top_donors = top_donors_for(mayor_donations, head=50)
top_real_estate_donors = top_donors_for(flagged_donations, head=50)


In [26]:
top_real_estate_donors

Unnamed: 0,Donor Name,Amount,Supported Candidates
0,Winston Fisher,"$14,200","Donovan, Yang, Adams, Stringer"
1,Daniel Brodsky,"$12,200","Donovan, Yang, Stringer"
2,Derek Trulson,"$12,200","Donovan, McGuire, Adams"
3,John S Klein,"$10,100",Stringer
4,Diana Boutross,"$10,000",Adams
5,Kenneth Fisher,"$9,100","Yang, Adams, Stringer"
6,Michael Colacino,"$8,433",McGuire
7,Robert Quinlan,"$7,900",Stringer
8,Andrew Brooks,"$7,100","McGuire, Adams"
9,Peter Kalikow,"$7,100","McGuire, Garcia"


In [27]:
all_top_donors

Unnamed: 0,Donor Name,Amount,Supported Candidates
0,Raymond J McGuire,"$1,000,000",McGuire
1,Shaun Donovan,"$19,100",Donovan
2,Anne Williams-Isom,"$17,300","McGuire, Wiley"
3,Ruth Blumenstein,"$15,300",Donovan
4,Allison Lutnick,"$15,300","Donovan, McGuire"
5,John Petry,"$14,200","McGuire, Yang, Adams, Garcia"
6,Winston Fisher,"$14,200","Donovan, Yang, Adams, Stringer"
7,James Tisch,"$14,200","McGuire, Adams, Garcia"
8,Steven Rattner,"$14,200","Donovan, McGuire, Adams, Stringer"
9,Judith Rubin,"$13,050","Yang, Garcia, Morales, McGuire, Stringer, Donovan"


In [28]:
# Top Employers Mayors
mayor_donations[["EMPNAME", "AMNT"]].groupby(["EMPNAME"]).agg(['sum', 'count']).sort_values(by=('AMNT', 'sum'), ascending=False).head(100)

Unnamed: 0_level_0,AMNT,AMNT
Unnamed: 0_level_1,sum,count
EMPNAME,Unnamed: 1_level_2,Unnamed: 2_level_2
Not Employed,4957472.69,39042
Self Employed,2982305.87,10165
Retired,585065.19,1639
Paul Weiss,206473.42,129
Citigroup,159711.71,232
Homemaker,118064.0,197
Skadden Arps,101175.0,102
Self Employee,85935.0,116
"Wachtell, Lipton, Rosen & Katz",73825.0,57
Greenberg Traurig,69275.0,35


In [29]:
# Top Employers Overall
contributions[["EMPNAME", "AMNT"]].groupby(["EMPNAME"]).agg(['sum', 'count']).sort_values(by=('AMNT', 'sum'), ascending=False).head(50)

Unnamed: 0_level_0,AMNT,AMNT
Unnamed: 0_level_1,sum,count
EMPNAME,Unnamed: 1_level_2,Unnamed: 2_level_2
Not Employed,8419627.35,59128
Self Employed,8335968.22,39845
MediaOcean,6800000.0,6
Retired,1384217.32,7453
Hess Corporation,1005100.0,4
Third Point LLC,520125.0,19
Duquesne Family Office LLC,509100.0,4
Citadel,503985.0,15
Daryl Roth Productions,501000.0,2
Tudor Investment Corp,500000.0,1


In [30]:
sum_from_real_estate = flagged_donations_major_candidates[['RECIPNAME', 'AMNT']].groupby('RECIPNAME').sum()
sum_from_real_estate = sum_from_real_estate.sort_values(by='AMNT', ascending=False)
sum_from_real_estate['AMNT'] = sum_from_real_estate['AMNT'].apply(to_money)

sum_from_real_estate

Unnamed: 0_level_0,AMNT
RECIPNAME,Unnamed: 1_level_1
"Adams, Eric L","$289,243"
"McGuire, Raymond J","$285,375"
"Donovan, Shaun","$189,402"
"Stringer, Scott M","$151,556"
"Yang, Andrew","$114,779"
"Garcia, Kathryn A","$56,531"
"Wiley, Maya D","$13,477"
"Morales, Dianne","$3,197"


In [31]:
total_amount_recieved = mayor_donations[["RECIPNAME", "AMNT"]].groupby("RECIPNAME").sum().sort_values(by='RECIPNAME')
total_amount_recieved["Real Estate"] = flagged_donations_major_candidates[['RECIPNAME', 'AMNT']].groupby('RECIPNAME').sum().sort_values(by='RECIPNAME')
total_amount_recieved = total_amount_recieved.reset_index().sort_values(by='Real Estate', ascending=False)

total_amount_recieved["Total"]  = total_amount_recieved["AMNT"].apply(to_money)
total_amount_recieved["Real Estate"] = total_amount_recieved['Real Estate'].apply(to_money)
total_amount_recieved['Name'] = total_amount_recieved['RECIPNAME'].apply(to_person_name)

total_amount_recieved[['Name', 'Total', 'Real Estate']].style.hide_index()

Name,Total,Real Estate
Eric L Adams,"$4,739,061","$289,243"
Raymond J McGuire,"$10,093,818","$285,375"
Shaun Donovan,"$2,858,059","$189,402"
Scott M Stringer,"$2,839,137","$151,556"
Andrew Yang,"$3,600,747","$114,779"
Kathryn A Garcia,"$1,242,811","$56,531"
Maya D Wiley,"$1,598,117","$13,477"
Dianne Morales,"$860,549","$3,197"


In [32]:
# Total of all donations to all major candidates
to_money(flagged_donations_major_candidates['AMNT'].sum())

'$1,103,560'

In [33]:
flagged_donations_major_candidates['AMNT'].sum() / mayor_donations['AMNT'].sum() * 100

3.9650340501573575

In [34]:
# Save CSV of top 100 employers
top_employers = mayor_donations[["EMPNAME", "AMNT"]].copy()
top_employers['EMPNAME'] = top_employers['EMPNAME'].apply(employer_format)

top_employers = top_employers.groupby('EMPNAME').sum().sort_values('AMNT', ascending=False).reset_index()[['EMPNAME', 'AMNT']].sort_values(by='AMNT', ascending=False)
top_employers['Employer Name'] = top_employers['EMPNAME']
top_employers['Amount'] = top_employers['AMNT'].apply(to_money)

top_employers[['Employer Name', 'Amount']].head(100).to_csv('top_100_employers.csv', index=False)

In [35]:
employer_agg = mayor_donations[["RECIPNAME", "EMPNAME", "AMNT"]].copy().groupby(["RECIPNAME", "EMPNAME"]).agg(['sum', 'count']).sort_values(by=('AMNT', 'sum'), ascending=False)
employer_agg.head(50)

Unnamed: 0_level_0,Unnamed: 1_level_0,AMNT,AMNT
Unnamed: 0_level_1,Unnamed: 1_level_1,sum,count
RECIPNAME,EMPNAME,Unnamed: 2_level_2,Unnamed: 3_level_2
"McGuire, Raymond J",Not Employed,1640676.68,2288
"Yang, Andrew",Not Employed,1002820.93,11955
"McGuire, Raymond J",Self Employed,946095.0,1040
"Donovan, Shaun",Not Employed,861399.15,1824
"Adams, Eric L",Self Employed,846381.0,1451
"Stringer, Scott M",Not Employed,486720.92,4354
"Wiley, Maya D",Not Employed,449755.0,10824
"Donovan, Shaun",Self Employed,334462.84,580
"McGuire, Raymond J",Retired,306499.97,273
"Garcia, Kathryn A",Not Employed,271229.44,2331


In [36]:
def employer_donations(name):    
    return mayor_donations[mayor_donations["EMPNAME"].str.lower() == name.lower()]

def employer_donations_contains(name):    
    return mayor_donations[mayor_donations["EMPNAME"].str.lower().contains(name.lower)]

def candidates_supported(employer_name):
    recipients = employer_donations(employer_name)[["RECIPNAME", "AMNT"]].groupby('RECIPNAME').sum().reset_index().sort_values('AMNT', ascending=False).to_dict('records')
    return list(map(lambda x: f"{to_person_name(x['RECIPNAME'])} {to_money(x['AMNT'])}", recipients))

def employer_donations_summary(name, include_donors=False):
    d = { 'name': name }
    d['sum'] = round(employer_donations(name)['AMNT'].sum())
    d['recipients'] = candidates_supported(name)
    
    donors = list(map(to_person_name, employer_donations(name)["NAME"].unique()))
    
    d['donor_count'] = len(donors)
    d['donor_avg'] = to_money(d['sum'] / d['donor_count'])
    
    if include_donors:
        d['donors'] = donors
    
    return d
    

In [37]:
employer_donations_summary("NETFLIX")

{'name': 'NETFLIX',
 'sum': 12411,
 'recipients': ['Raymond J McGuire $10,350',
  'Shaun Donovan $1,275',
  'Andrew Yang $331',
  'Scott M Stringer $300',
  'Dianne Morales $100',
  'Maya D Wiley $55'],
 'donor_count': 17,
 'donor_avg': '$730'}

In [38]:
employer_donations_summary("Kirkland & Ellis", include_donors=True)

{'name': 'Kirkland & Ellis',
 'sum': 29810,
 'recipients': ['Raymond J McGuire $26,600',
  'Scott M Stringer $2,400',
  'Shaun Donovan $400',
  'Andrew Yang $300',
  'Kathryn A Garcia $100',
  'Maya D Wiley $10'],
 'donor_count': 18,
 'donor_avg': '$1,656',
 'donors': ['reginald brown',
  'Lauren Friedman',
  'Daniel Fuglestad',
  'Jacqueline Haberfeld',
  'Jonathan Henes',
  'Jia Li Huang',
  'Melissa Hutson',
  'Jason Kanner',
  'Michael Kim',
  'Peter Leung',
  'Song Lin',
  'Ramiro Ocasio',
  'Jen Perkins',
  'Edward Sassower',
  'Monica Shilling',
  'Eric Wedel',
  'Erica Williams',
  'Ali Zaidi']}

In [39]:
employer_donations_summary("Paul Weiss")

{'name': 'Paul Weiss',
 'sum': 206473,
 'recipients': ['Raymond J McGuire $198,390',
  'Andrew Yang $4,325',
  'Maya D Wiley $2,170',
  'Scott M Stringer $600',
  'Shaun Donovan $350',
  'Kathryn A Garcia $315',
  'Dianne Morales $273',
  'Eric L Adams $50'],
 'donor_count': 107,
 'donor_avg': '$1,930'}

In [40]:
employer_donations_summary("Google")

{'name': 'Google',
 'sum': 53484,
 'recipients': ['Andrew Yang $18,252',
  'Raymond J McGuire $12,065',
  'Kathryn A Garcia $7,810',
  'Dianne Morales $6,012',
  'Shaun Donovan $3,335',
  'Maya D Wiley $3,004',
  'Scott M Stringer $2,605',
  'Eric L Adams $400'],
 'donor_count': 240,
 'donor_avg': '$223'}

In [41]:
employer_donations_summary("Apple")

{'name': 'Apple',
 'sum': 17463,
 'recipients': ['Shaun Donovan $5,875',
  'Raymond J McGuire $5,350',
  'Maya D Wiley $2,405',
  'Andrew Yang $2,303',
  'Dianne Morales $1,354',
  'Eric L Adams $100',
  'Kathryn A Garcia $50',
  'Scott M Stringer $25'],
 'donor_count': 57,
 'donor_avg': '$306'}

In [42]:
employer_donations_summary("A&E Real Estate")

{'name': 'A&E Real Estate',
 'sum': 15100,
 'recipients': ['Raymond J McGuire $12,700',
  'Eric L Adams $2,000',
  'Kathryn A Garcia $400'],
 'donor_count': 5,
 'donor_avg': '$3,020'}

In [43]:
employer_donations_summary("Long Pond Capital")

{'name': 'Long Pond Capital',
 'sum': 23900,
 'recipients': ['Raymond J McGuire $23,900'],
 'donor_count': 6,
 'donor_avg': '$3,983'}