Revising the Senate file to cover both groups of legislators.

This is going to link together three datasets: Texas Legislature Online (TLO), Texas Election Commission (TEC),
and OpenStates.

Loading TLO first.

legislators_tlo.html was copied from the source of
http://www.legis.state.tx.us/Search/BillSearchLegislatorList.aspx?ID=usrLegislatorsFolder$cboAuthor&Leg=85

In [1]:
import pandas as pd
import re
import numpy as np

pattern = """\t<option value="(A[0-9]{3,5})">([^,]+)(, (.+))? \((H|S)-A[0-9]{3,5}\)</option>"""

fp = open('../data/inputs/legislators_tlo.html', encoding = 'utf-8')
lines = fp.readlines()

leg = []

for line in lines:
    # Regex applied to each line 
    match = re.search(pattern, line)
    legislator = [match.group(1), match.group(2), match.group(4), match.group(5)]
    if legislator[3] == "H":
        legislator[3] = "STATEREP"
    else: 
        legislator[3] = "STATESEN"
    leg.append(legislator)
        

In [2]:
# Cleaning text in TLO dataset

def substitute(string):
    if type(string) == str:
        string = string.replace("&#233;", "é")
        string = string.replace("&quot;", '"')
        string = string.replace("&#225;", 'á')
        string = string.replace("&#237;", "í")
        string = string.replace("&#241;", "ñ")
        string = string.replace("\t", "")
    return string

for line in leg:
    line[1] = substitute(line[1])
    line[2] = substitute(line[2])
    if line[1] == "Lucio":
        line[1] = "Lucio Jr."


In [3]:
# Turning TLO dataset into a dataframe

leg = pd.DataFrame(leg, columns = ["TLO_id","last_name","first_name", "filerHoldOfficeCd"])

In [4]:
pd.options.display.max_rows = 200
pd.options.display.max_columns = 200
leg

Unnamed: 0,TLO_id,last_name,first_name,filerHoldOfficeCd
0,A2100,Allen,,STATEREP
1,A2125,Alonzo,,STATEREP
2,A2135,Alvarado,,STATEREP
3,A2150,Anchia,,STATEREP
4,A2155,Anderson,"Charles ""Doc""",STATEREP
5,A2215,Anderson,Rodney,STATEREP
6,A3555,Arévalo,,STATEREP
7,A2330,Ashby,,STATEREP
8,A3200,Bailes,,STATEREP
9,A2335,Bell,,STATEREP


In [5]:
# splitting off nicknames and suffixes in the data from TLO

def parsenickname(first):
    nicknamePattern = '[\"\(](\w+)[\"\)]$'
    try:
        match = re.search(nicknamePattern, first)
        newFirst = first.split(match.group(0))[0].strip()
        return newFirst, match.group(1)
    except:
        return first, np.nan

"""   
def parsesuffix(name):
    suffixPattern = "(Jr\.|Sr\.|III)"
    if re.match(suffixPattern, name):
        match = re.search(suffixPattern, name)
        newName = name.strip(match.group(1))
        return newName.strip(), match.group(1)
    else:
        return name, np.nan
""" 

def parsesuffix(row):
    suffixPattern = "(Jr\.|Sr\.|III)"
    answers = [row["first_name"], row["last_name"], np.nan]
    if re.search(suffixPattern, row["last_name"]):
        match = re.search(suffixPattern, row["last_name"])
        newLast = row["last_name"].strip(match.group(1))
        answers = [row["first_name"], newLast.strip(), match.group(1)]
    elif type(row["first_name"]) == str and re.search(suffixPattern, row["first_name"]):
        match = re.search(suffixPattern, row["first_name"])
        newName = row["first_name"].strip(match.group(1))
        answers = [newName.strip(), row["last_name"], match.group(1)]
    if type(answers[0]) == str and len(answers[0]) == 0:
        answers[0] = np.nan
    return answers[0], answers[1], answers[2]

leg["first_name"], leg["filerNameShort"] = zip(*leg["first_name"].map(parsenickname))
leg["first_name"], leg["last_name"], leg["suffixes"] = zip(*leg.apply(parsesuffix, axis = 1))

leg

Unnamed: 0,TLO_id,last_name,first_name,filerHoldOfficeCd,filerNameShort,suffixes
0,A2100,Allen,,STATEREP,,
1,A2125,Alonzo,,STATEREP,,
2,A2135,Alvarado,,STATEREP,,
3,A2150,Anchia,,STATEREP,,
4,A2155,Anderson,Charles,STATEREP,Doc,
5,A2215,Anderson,Rodney,STATEREP,,
6,A3555,Arévalo,,STATEREP,,
7,A2330,Ashby,,STATEREP,,
8,A3200,Bailes,,STATEREP,,
9,A2335,Bell,,STATEREP,,


In [6]:
# This loads a file from an Open States bulk download
# Not sure that {"district": object} is the right choice, but I wanted integers with NaN allowed.

openstates = pd.read_csv("../data/inputs/openstates/2017-06-02-tx-csv/tx_legislators.csv", index_col = "leg_id",
                        dtype = {"district": object})

openstates["first_name"], openstates["filerNameShort"] = zip(*openstates["first_name"].map(parsenickname))

In [7]:
# some of the openstates rows are easy to link because they include the TLO id as part of the URL 
# to the legislator's photo.

import re

def parseTLO(url):
    urlPattern = "small\/(A[0-9]{3,5})\.jpg$"
    try:
        match = re.search(urlPattern, url)
        return match.group(1)
    except:
        return np.nan
    
openstates["TLO_id"] = (openstates["photo_url"].map(parseTLO))

In [8]:
# Finding the senators whose photo file names didn't include their TLO id.

openstates[openstates["TLO_id"].isnull()][openstates["chamber"] == "upper"]

  app.launch_new_instance()


Unnamed: 0_level_0,full_name,first_name,middle_name,last_name,suffixes,nickname,active,state,chamber,district,party,photo_url,created_at,updated_at,filerNameShort,TLO_id
leg_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
TXL000198,"Eddie Lucio, Jr.",Eduardo A.,,Lucio,Jr.,,True,tx,upper,27,Democrat,http://www.house.state.tx.us/photos/members/36...,2010-06-19 03:51:42.083000,2017-06-01 10:38:26.283000,Eddie,
TXL000312,José Menéndez,José,,Menéndez,,,True,tx,upper,26,Democrat,http://www.legdir.legis.state.tx.us/FlashCardD...,2010-06-19 03:51:42.467000,2017-06-01 10:38:26.641000,,
TXL000705,Dawn Buckingham,Dawn,,Buckingham,,,True,tx,upper,24,Republican,,2017-01-18 03:03:18.301000,2017-06-01 10:38:26.639000,,
TXL000707,Borris L. Miles,Borris L.,,Miles,,,True,tx,upper,13,Democrat,,2017-01-18 03:03:18.403000,2017-06-01 10:38:26.414000,,
TXL000716,Bryan Hughes,Bryan,,Hughes,,,True,tx,upper,1,Republican,,2017-01-27 05:42:08.204000,2017-06-01 10:38:26.497000,,


In [9]:
# just setting the remaining 5 senators' TLO_id directly.

tloID = {"TXL000198": "A1300", 
        "TXL000312": "A1110",
        "TXL000705": "A1125",
        "TXL000707": "A1115",
        "TXL000716": "A1135"}

for k in tloID.keys():
    openstates.set_value(k, "TLO_id", tloID[k])


In [10]:
openstates[openstates["chamber"] == "upper"]

Unnamed: 0_level_0,full_name,first_name,middle_name,last_name,suffixes,nickname,active,state,chamber,district,party,photo_url,created_at,updated_at,filerNameShort,TLO_id
leg_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
TXL000190,Craig Estes,Craig,,Estes,,,True,tx,upper,30,Republican,http://www.legdir.legis.state.tx.us/FlashCardD...,2010-06-19 03:51:42.063000,2017-06-01 10:38:26.268000,,A1180
TXL000195,Juan Hinojosa,Juan,,Hinojosa,,,True,tx,upper,20,Democrat,http://www.legdir.legis.state.tx.us/FlashCardD...,2010-06-19 03:51:42.076000,2017-06-01 10:38:26.808000,Chuy,A1250
TXL000196,Joan Huffman,Joan,,Huffman,,,True,tx,upper,17,Republican,http://www.legdir.legis.state.tx.us/FlashCardD...,2010-06-19 03:51:42.078000,2017-06-01 10:38:26.752000,,A1260
TXL000198,"Eddie Lucio, Jr.",Eduardo A.,,Lucio,Jr.,,True,tx,upper,27,Democrat,http://www.house.state.tx.us/photos/members/36...,2010-06-19 03:51:42.083000,2017-06-01 10:38:26.283000,Eddie,A1300
TXL000199,Jane Nelson,Jane,,Nelson,,,True,tx,upper,12,Republican,http://www.legdir.legis.state.tx.us/FlashCardD...,2010-06-19 03:51:42.086000,2017-06-01 10:38:26.740000,,A1450
TXL000200,Robert Nichols,Robert,,Nichols,,,True,tx,upper,3,Republican,http://www.legdir.legis.state.tx.us/FlashCardD...,2010-06-19 03:51:42.088000,2017-06-01 10:38:26.798000,,A1400
TXL000203,Kel Seliger,Kel,,Seliger,,,True,tx,upper,31,Republican,http://www.legdir.legis.state.tx.us/FlashCardD...,2010-06-19 03:51:42.095000,2017-06-01 10:38:26.762000,,A1460
TXL000206,Carlos Uresti,Carlos I.,,Uresti,,,True,tx,upper,19,Democrat,http://www.legdir.legis.state.tx.us/FlashCardD...,2010-06-19 03:51:42.103000,2017-06-01 10:38:26.767000,,A1605
TXL000208,Kirk Watson,Kirk,,Watson,,,True,tx,upper,14,Democrat,http://www.legdir.legis.state.tx.us/FlashCardD...,2010-06-19 03:51:42.107000,2017-06-01 10:38:26.769000,,A1610
TXL000210,Royce West,Royce,,West,,,True,tx,upper,23,Democrat,http://www.legdir.legis.state.tx.us/FlashCardD...,2010-06-19 03:51:42.112000,2017-06-01 10:38:26.772000,,A1625


In [11]:
# Trying to move Jr. to the suffix field when it occurs at the beginning of the first_name field.
# And not to overwrite suffix fields when there's no suffix in the name field.
# Probably need to use map command to take in the contents of both column and then write the contents of both.

# AFAIK, the only row affected by this is TXL000509

def parsefirstsuffix(row):
    suffixPattern = "^(Jr\.|Sr\.|III),"
    if re.match(suffixPattern, row['first_name']):
        match = re.search(suffixPattern, row['first_name'])
        newFirst = row['first_name'].split(match.group(0))[1]
        return newFirst, match.group(1)
    else:
        return row['first_name'], row['suffixes']

openstates["first_name"], openstates["suffixes"] = zip(*openstates.apply(parsefirstsuffix, axis=1))

In [12]:
# This will overwrite any existing middle name if the first name contains a space

def parsefirstmiddle(row):
    try: middle = row['middle_name']
    except: middle = np.nan
    if type(row['first_name']) == str and len(row['first_name'].split()) > 1:
        return row['first_name'].split()[0], row['first_name'].split()[1]
    else:
        return row['first_name'], middle

openstates["first_name"], openstates["middle_name"] = zip(*openstates.apply(parsefirstmiddle, axis=1))
leg["first_name"], leg["middle_name"] = zip(*leg.apply(parsefirstmiddle, axis=1))    

In [13]:
leg

Unnamed: 0,TLO_id,last_name,first_name,filerHoldOfficeCd,filerNameShort,suffixes,middle_name
0,A2100,Allen,,STATEREP,,,
1,A2125,Alonzo,,STATEREP,,,
2,A2135,Alvarado,,STATEREP,,,
3,A2150,Anchia,,STATEREP,,,
4,A2155,Anderson,Charles,STATEREP,Doc,,
5,A2215,Anderson,Rodney,STATEREP,,,
6,A3555,Arévalo,,STATEREP,,,
7,A2330,Ashby,,STATEREP,,,
8,A3200,Bailes,,STATEREP,,,
9,A2335,Bell,,STATEREP,,,


In [14]:
# affects TXL000414

openstates["last_name"] = openstates["last_name"].apply(substitute)
openstates["full_name"] = openstates["full_name"].apply(substitute)

In [15]:
openSenate = openstates[openstates["chamber"] == "upper"]
legSenate = leg[leg["filerHoldOfficeCd"] == "STATESEN"]

In [16]:
senate = pd.merge(legSenate, openSenate.reset_index(), how='left', on='TLO_id')

In [17]:
openHouse = openstates[openstates["chamber"] == "lower"]
legHouse = leg[leg["filerHoldOfficeCd"] == "STATEREP"]

In [18]:
legHouse = legHouse.fillna(np.nan)

In [19]:
legLastOnly = leg[leg["first_name"].isnull()]

In [20]:
openLastOnly = pd.merge(legLastOnly, openHouse.reset_index(), how='left', on=['last_name'])

In [21]:
indexed_df = openLastOnly.set_index('TLO_id_x')

In [22]:
indexed_df[:5]

Unnamed: 0_level_0,last_name,first_name_x,filerHoldOfficeCd,filerNameShort_x,suffixes_x,middle_name_x,leg_id,full_name,first_name_y,middle_name_y,suffixes_y,nickname,active,state,chamber,district,party,photo_url,created_at,updated_at,filerNameShort_y,TLO_id_y
TLO_id_x,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
A2100,Allen,,STATEREP,,,,TXL000214,Alma Allen,Alma,A.,,,True,tx,lower,131,Democratic,http://www.house.state.tx.us/photos/members/21...,2010-06-19 03:51:42.133000,2017-06-01 10:38:26.476000,,
A2125,Alonzo,,STATEREP,,,,TXL000215,Roberto Alonzo,Roberto,R.,,,True,tx,lower,104,Democratic,http://www.house.state.tx.us/photos/members/21...,2010-06-19 03:51:42.135000,2017-06-01 10:38:26.825000,,
A2135,Alvarado,,STATEREP,,,,TXL000216,Carol Alvarado,Carol,,,,True,tx,lower,145,Democratic,http://www.house.state.tx.us/photos/members/21...,2010-06-19 03:51:42.137000,2017-06-01 10:38:25.930000,,
A2150,Anchia,,STATEREP,,,,TXL000217,Rafael Anchia,Rafael,,,,True,tx,lower,103,Democratic,http://www.house.state.tx.us/photos/members/21...,2010-06-19 03:51:42.140000,2017-06-01 10:38:26.780000,,
A3555,Arévalo,,STATEREP,,,,TXL000683,"Arévalo, Diana",Diana,,,,True,tx,lower,116,Democratic,http://www.house.state.tx.us/photos/members/35...,2017-01-07 05:53:03.970000,2017-06-01 10:38:26.361000,,


In [23]:
legHouse = legHouse.set_index("TLO_id")

In [24]:
legHouse["first_name"] = legHouse["first_name"].fillna(indexed_df["first_name_y"])

In [25]:
legHouse

Unnamed: 0_level_0,last_name,first_name,filerHoldOfficeCd,filerNameShort,suffixes,middle_name
TLO_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A2100,Allen,Alma,STATEREP,,,
A2125,Alonzo,Roberto,STATEREP,,,
A2135,Alvarado,Carol,STATEREP,,,
A2150,Anchia,Rafael,STATEREP,,,
A2155,Anderson,Charles,STATEREP,Doc,,
A2215,Anderson,Rodney,STATEREP,,,
A3555,Arévalo,Diana,STATEREP,,,
A2330,Ashby,Trent,STATEREP,,,
A3200,Bailes,Ernest,STATEREP,,,
A2335,Bell,Cecil,STATEREP,,,


In [26]:
openHouse

Unnamed: 0_level_0,full_name,first_name,middle_name,last_name,suffixes,nickname,active,state,chamber,district,party,photo_url,created_at,updated_at,filerNameShort,TLO_id
leg_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
TXL000214,Alma Allen,Alma,A.,Allen,,,True,tx,lower,131,Democratic,http://www.house.state.tx.us/photos/members/21...,2010-06-19 03:51:42.133000,2017-06-01 10:38:26.476000,,
TXL000215,Roberto Alonzo,Roberto,R.,Alonzo,,,True,tx,lower,104,Democratic,http://www.house.state.tx.us/photos/members/21...,2010-06-19 03:51:42.135000,2017-06-01 10:38:26.825000,,
TXL000216,Carol Alvarado,Carol,,Alvarado,,,True,tx,lower,145,Democratic,http://www.house.state.tx.us/photos/members/21...,2010-06-19 03:51:42.137000,2017-06-01 10:38:25.930000,,
TXL000217,Rafael Anchia,Rafael,,Anchia,,,True,tx,lower,103,Democratic,http://www.house.state.tx.us/photos/members/21...,2010-06-19 03:51:42.140000,2017-06-01 10:38:26.780000,,
TXL000218,Charles Anderson,Charles,,Anderson,,Doc,True,tx,lower,56,Republican,http://www.house.state.tx.us/photos/members/21...,2010-06-19 03:51:42.142000,2017-06-01 10:38:26.510000,Doc,
TXL000221,Dwayne Bohac,Dwayne,,Bohac,,,True,tx,lower,138,Republican,http://www.house.state.tx.us/photos/members/22...,2010-06-19 03:51:42.149000,2017-06-01 10:38:26.452000,,
TXL000223,Dennis Bonnen,Dennis,,Bonnen,,,True,tx,lower,25,Republican,http://www.house.state.tx.us/photos/members/22...,2010-06-19 03:51:42.154000,2017-06-01 10:38:26.551000,,
TXL000228,Angie Chen Button,Angie,Chen,Button,,,True,tx,lower,112,Republican,http://www.house.state.tx.us/photos/members/25...,2010-06-19 03:51:42.181000,2017-06-01 10:38:25.932000,,
TXL000235,Garnet Coleman,Garnet,F.,Coleman,,,True,tx,lower,147,Democratic,http://www.house.state.tx.us/photos/members/24...,2010-06-19 03:51:42.198000,2017-06-01 10:38:25.974000,,
TXL000236,Byron Cook,Byron,,Cook,,,True,tx,lower,8,Republican,http://www.house.state.tx.us/photos/members/24...,2010-06-19 03:51:42.201000,2017-06-01 10:38:26.829000,,


In [27]:
openTLO = pd.merge(legHouse.reset_index(), openHouse.reset_index(), how='left', on=['first_name', 'last_name'])

In [28]:
openTLO

Unnamed: 0,TLO_id_x,last_name,first_name,filerHoldOfficeCd,filerNameShort_x,suffixes_x,middle_name_x,leg_id,full_name,middle_name_y,suffixes_y,nickname,active,state,chamber,district,party,photo_url,created_at,updated_at,filerNameShort_y,TLO_id_y
0,A2100,Allen,Alma,STATEREP,,,,TXL000214,Alma Allen,A.,,,True,tx,lower,131,Democratic,http://www.house.state.tx.us/photos/members/21...,2010-06-19 03:51:42.133000,2017-06-01 10:38:26.476000,,
1,A2125,Alonzo,Roberto,STATEREP,,,,TXL000215,Roberto Alonzo,R.,,,True,tx,lower,104,Democratic,http://www.house.state.tx.us/photos/members/21...,2010-06-19 03:51:42.135000,2017-06-01 10:38:26.825000,,
2,A2135,Alvarado,Carol,STATEREP,,,,TXL000216,Carol Alvarado,,,,True,tx,lower,145,Democratic,http://www.house.state.tx.us/photos/members/21...,2010-06-19 03:51:42.137000,2017-06-01 10:38:25.930000,,
3,A2150,Anchia,Rafael,STATEREP,,,,TXL000217,Rafael Anchia,,,,True,tx,lower,103,Democratic,http://www.house.state.tx.us/photos/members/21...,2010-06-19 03:51:42.140000,2017-06-01 10:38:26.780000,,
4,A2155,Anderson,Charles,STATEREP,Doc,,,TXL000218,Charles Anderson,,,Doc,True,tx,lower,56,Republican,http://www.house.state.tx.us/photos/members/21...,2010-06-19 03:51:42.142000,2017-06-01 10:38:26.510000,Doc,
5,A2215,Anderson,Rodney,STATEREP,,,,TXL000370,Rodney Anderson,,,,True,tx,lower,105,Republican,http://www.house.state.tx.us/photos/members/22...,2011-01-14 22:24:08.381000,2017-06-01 10:38:26.177000,,
6,A3555,Arévalo,Diana,STATEREP,,,,TXL000683,"Arévalo, Diana",,,,True,tx,lower,116,Democratic,http://www.house.state.tx.us/photos/members/35...,2017-01-07 05:53:03.970000,2017-06-01 10:38:26.361000,,
7,A2330,Ashby,Trent,STATEREP,,,,TXL000424,"Ashby, Trent",,,,True,tx,lower,57,Republican,http://www.house.state.tx.us/photos/members/23...,2013-01-09 00:29:35.404000,2017-06-01 10:38:26.827000,,
8,A3200,Bailes,Ernest,STATEREP,,,,TXL000693,"Bailes, Ernest",,,,True,tx,lower,18,Republican,http://www.house.state.tx.us/photos/members/32...,2017-01-07 05:53:04.179000,2017-06-01 10:38:26.363000,,
9,A2335,Bell,Cecil,STATEREP,,,,TXL000425,"Bell, Cecil",,,,True,tx,lower,3,Republican,http://www.house.state.tx.us/photos/members/23...,2013-01-09 00:29:35.419000,2017-06-01 10:38:26.329000,,


In [29]:
openTLOouter = pd.merge(legHouse.reset_index(), openHouse.reset_index(), how='outer', on=['first_name', 'last_name'])

In [31]:
new = legHouse.merge(indexed_df, left_on='lkey', right_on='rkey', how='outer')

KeyError: 'rkey'

In [None]:
# Trying to fill the np.nan values with strings from the other dataframe. Not working!

def getFirst(row):
    if type(row['first_name']) == str:
        print("yes")
        return row['first_name']
    else:
        print("no")
        return indexed_df["first_name_y"][row["TLO_id"]]

legHouse["new_first_name"] = legHouse.apply(getFirst, axis = 1)

In [None]:
legHouse

In [None]:
indexed_df["first_name_y"]["A2100"]

In [None]:
legHouse

In [None]:
indexed_df['last_name']['A3555']

In [None]:
leg["first_name"][3]

In [None]:
legHouse

In [None]:
# oops, forgot there aren't many first names in the TLO data.

house = pd.merge(legHouse, openHouse.reset_index(), how='left', on=['first_name', 'last_name'])

In [None]:
house

In [None]:
senate = tloOpen[tloOpen["ctaSeekOfficeCd"] == "STATESEN"]

In [None]:
# This loads a file from the Texas Election Commission

tx = pd.read_csv("../data/inputs/TEC_CF_CSV/filers.csv")

In [None]:
tx = tx[tx["filerTypeCd"] == 'COH']

In [None]:
# removed 'STATEREP',  to focus on Senate
# removed [tx['ctaSeekOfficeCd'].isin(['STATESEN'], changed to filerHoldOfficeCd 

txRep = tx[tx['filerHoldOfficeCd'].isin(['STATESEN'])][tx['filerFilerpersStatusCd'] != "NOT_OFFICEHOLDER"]

In [None]:
senate[:5]

In [None]:
txRep[:5]

In [None]:
txRep = txRep.rename(columns={"filerHoldOfficeDistrict": "district"})

# txRep["district"].describe()



In [None]:
allThree = pd.merge(senate, txRep, how='left', on = 'district')

In [None]:
allThree[["first_name_x", "last_name_x", "full_name", "first_name_y", "last_name_y", 
          "filerName"]]

In [None]:
def s (string):
    return string.strip()

allThree["full_name"] = allThree["full_name"].apply(s)

In [None]:
answer = allThree[pd.notnull(allThree['filerName'])]

In [None]:
answer[:1]

In [None]:
answerShort = answer[["TLO_id", "leg_id", "filerIdent", "full_name", "first_name_y", "filerNameShort", "last_name_x", "suffixes_x", "party", "district"]]

In [None]:
answerShort = answerShort.rename(columns={"first_name_y": "first_name", "last_name_x": "last_name", 
                                "filerNameShort": "nickname", "suffixes_x": "suffixes"})

In [None]:
answerShort["filerIdent"] = answerShort["filerIdent"].astype(int)

In [None]:
answerShort = answerShort.set_index("TLO_id")

In [None]:
answerShort.to_csv("../data/senateCrosswalk.csv")