## Scraping US Judicial Data (Vacancies, Confirmations, Nominations) 

### Goal : A map of all the current list of vacancies, recent confirmations and recent nominations to the District Courts in the United States. 

### Importing all the necessary packages to do the work

In [347]:
###Import your scraping libraries
import requests
import pandas as pd
from bs4 import BeautifulSoup
import numpy as np
import glob
import time
import re
import json
from pandas import json_normalize


from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select

from webdriver_manager.chrome import ChromeDriverManager

import tika
tika.initVM()
from tika import parser

### Step 1: Get a table of all the current vacancies in the Supreme Court

In [44]:
my_url_vacancies = "http://www.uscourts.gov/judges-judgeships/judicial-vacancies/current-judicial-vacancies"
raw_html_vacancies = requests.get(my_url_vacancies).content
soup_doc_vacancies = BeautifulSoup(raw_html_vacancies, "html.parser")

table_vacancies = soup_doc_vacancies.find('table')
#I found a shortcut to doing this instead of all the tds and trs
df_vacancies = pd.read_html(str(table_vacancies))[0]
df_vacancies['Vacancy Date']=pd.to_datetime(df_vacancies['Vacancy Date'])


In [45]:
df_vacancies

Unnamed: 0,Court,Incumbent,Vacancy Reason,Vacancy Date,Nominee,Nomination Date
0,01 - MA,"Young,William G.",Senior,2021-07-01,,
1,01 - MA,"O'Toole Jr.,George A.",Senior,2018-01-01,,
2,01 - NH,"Barbadoro,Paul J.",Senior,2021-03-01,"Elliott,Samantha D.",09/30/2021
3,01 - PR,"Cerezo,Carmen Consuelo",Retired,2021-02-28,,
4,01 - PR,"Gelpi Jr.,Gustavo A.",Elevated,2021-10-18,,
...,...,...,...,...,...,...
74,CL,"Braden,Susan G.",Senior,2018-07-13,,
75,CL,"Damich,Edward J.",Senior,2013-10-21,"Bonilla,Armando O.",10/05/2021
76,CL,"Sweeney,Margaret M.",Senior,2020-10-23,"Lerner,Carolyn N.",07/13/2021
77,IT,"Gordon,Leo M.",Senior,2019-03-22,,


In [71]:
#A function to classify 
def classify (row):
    try: 
        if np.isnan(row['Nominee']):
            return 'Vacant'
    except:
        return 'Nominee'
    
def districtify (row):
    district=row['Court'].split(" - ")[-1].replace("-","")
    return district
    
    
df_vacancies['Category'] = df_vacancies.apply(lambda row: classify(row), axis=1)
df_vacancies['District'] = df_vacancies.apply(lambda row: districtify(row), axis=1)


In [72]:
df_vacancies

Unnamed: 0,Court,Incumbent,Vacancy Reason,Vacancy Date,Nominee,Nomination Date,Category,District
0,01 - MA,"Young,William G.",Senior,2021-07-01,,,Vacant,MA
1,01 - MA,"O'Toole Jr.,George A.",Senior,2018-01-01,,,Vacant,MA
2,01 - NH,"Barbadoro,Paul J.",Senior,2021-03-01,"Elliott,Samantha D.",09/30/2021,Nominee,NH
3,01 - PR,"Cerezo,Carmen Consuelo",Retired,2021-02-28,,,Vacant,PR
4,01 - PR,"Gelpi Jr.,Gustavo A.",Elevated,2021-10-18,,,Vacant,PR
...,...,...,...,...,...,...,...,...
74,CL,"Braden,Susan G.",Senior,2018-07-13,,,Vacant,CL
75,CL,"Damich,Edward J.",Senior,2013-10-21,"Bonilla,Armando O.",10/05/2021,Nominee,CL
76,CL,"Sweeney,Margaret M.",Senior,2020-10-23,"Lerner,Carolyn N.",07/13/2021,Nominee,CL
77,IT,"Gordon,Leo M.",Senior,2019-03-22,,,Vacant,IT


In [68]:
districtify(df_vacancies.iloc[10]).replace("-","")

'NYS'

### Step 2: Get a table of all the recent confirmations in the Supreme Court

In [74]:
my_url_confirmations = "http://www.uscourts.gov/judges-judgeships/judicial-vacancies/confirmation-listing"
raw_html_confirmations = requests.get(my_url_confirmations).content
soup_doc_confirmations = BeautifulSoup(raw_html_confirmations, "html.parser")

table_confirmations = soup_doc_confirmations.find('table')
df_confirmations = pd.read_html(str(table_confirmations))[0]
df_confirmations['Nomination Date']=pd.to_datetime(df_confirmations['Nomination Date'])
df_confirmations['Confirmation Date']=pd.to_datetime(df_confirmations['Confirmation Date'])
df_confirmations['Vacancy Date']=pd.to_datetime(df_confirmations['Vacancy Date'])


In [80]:
#A function to classify    
def districtify (row):
    district=row['Court'].split(" - ")[-1].replace("-","")
    return district
    
    
df_confirmations['Category'] = 'Recent Confirmation'
df_confirmations['District'] = df_confirmations.apply(lambda row: row['Court'].split(" - ")[-1], axis=1)

In [81]:
df_confirmations

Unnamed: 0,Nominee,Nomination Date,Confirmation Date,Court,Incumbent,Vacancy Reason,Vacancy Date,Category,District
0,"Gelpi,Gustavo A.",2021-05-12,2021-10-18,01 - CCA,"Torruella,Juan R.",Deceased,2020-10-26,Recent Confirmation,CCA
1,"Kelley,Angel",2021-05-12,2021-09-14,01 - MA,"Woodlock,Douglas P.",Senior,2015-06-01,Recent Confirmation,MA
2,"Lee,Eunice C.",2021-05-12,2021-08-07,02 - CCA,"Katzmann,Robert A.",Senior,2021-01-21,Recent Confirmation,CCA
3,"Perez,Myrna",2021-06-15,2021-10-25,02 - CCA,"Chin,Denny",Senior,2021-06-01,Recent Confirmation,CCA
4,"Robinson,Beth",2021-08-05,2021-11-01,02 - CCA,"Hall,Peter W.",Senior,2021-03-04,Recent Confirmation,CCA
5,"Merriam,Sarah A.L.",2021-06-15,2021-10-06,02 - CT,"Hall,Janet C.",Senior,2021-01-21,Recent Confirmation,CT
6,"Nagala,Sarala Vidya",2021-06-15,2021-10-27,02 - CT,"Bryant,Vanessa Lynne",Senior,2021-02-01,Recent Confirmation,CT
7,"Williams,Omar Antonio",2021-06-15,2021-10-28,02 - CT,"Thompson,Alvin W.",Senior,2018-08-31,Recent Confirmation,CT
8,"Neals,Julien Xavier",2021-04-19,2021-06-08,03 - NJ,"Martini,William J.",Senior,2015-02-10,Recent Confirmation,NJ
9,"O'Hearn,Christine P.",2021-04-29,2021-10-19,03 - NJ,"Kugler,Robert B.",Senior,2018-11-02,Recent Confirmation,NJ


### Step 3 : Let's club both our dataframes to make one large dataframe

In [84]:
df_all=df_confirmations.append(df_vacancies)[["Nominee","Incumbent","Category","District"]]

In [228]:
df_all['Category'].value_counts()

Vacant                 53
Recent Confirmation    28
Nominee                26
Name: Category, dtype: int64

In [201]:
df_all['Nominee'].unique()

array(['Gelpi,Gustavo A.', 'Kelley,Angel', 'Lee,Eunice C.', 'Perez,Myrna',
       'Robinson,Beth', 'Merriam,Sarah A.L.', 'Nagala,Sarala Vidya',
       'Williams,Omar Antonio', 'Neals,Julien Xavier',
       "O'Hearn,Christine P.", 'Quraishi,Zahid N.',
       'Williams,Karen McGlashan', 'Heytens,Toby J.',
       'Boardman,Deborah L.', 'Griggsby,Lydia Kay',
       'Giles,Patricia Tolliver', 'Nachmanoff,Michael S.',
       'Jackson-Akiwumi,Candace', 'Estudillo,David G.', 'King,Lauren J.',
       'Lin,Tana', 'Rossman,Veronica S.', 'Rodriguez,Regina M.',
       'Strickland,Margaret Irene', 'Jackson,Ketanji Brown',
       'Cobb,Jia M.', 'Pan,Florence Y.', 'Cunningham,Tiffany P.', nan,
       'Elliott,Samantha D.', 'Ho,Dale E.', 'Castner,Georgette',
       'Kumar,Shalina D.', 'Beckering,Jane M.', 'Brennan,Bridget Meehan',
       'Fleming,Charles Esque', 'Ruiz,David Augustin',
       'Menendez,Katherine Marie', 'Frimpong,Maame Ewusi-Mensah',
       'Vera,Hernan D.', 'Thurston,Jennifer L.',
    

In [211]:
pd.options.display.max_rows = 200
df_all.to_csv("data.csv")

### Step 4 : Get PDFs concerning these folks

In [220]:
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get("https://www.judiciary.senate.gov/hearings?PageNum_rs=1&type=Nomination&month=0&year=0")



Current google-chrome version is 96.0.4664
Get LATEST chromedriver version for 96.0.4664 google-chrome
Driver [C:\Users\sriha\.wdm\drivers\chromedriver\win32\96.0.4664.45\chromedriver.exe] found in cache


In [221]:
rows=driver.find_elements_by_tag_name("td")
get_urls=[]
for row in rows:
    if row.text=="Nominations":
        a_tag=row.find_element_by_tag_name("a")
        get_urls.append(a_tag.get_attribute("href"))

In [222]:
get_urls

['https://www.judiciary.senate.gov/meetings/11/24/2021/nominations',
 'https://www.judiciary.senate.gov/meetings/11/10/2021/nominations',
 'https://www.judiciary.senate.gov/meetings/10/27/2021/nominations',
 'https://www.judiciary.senate.gov/meetings/10/13/2021/nominations',
 'https://www.judiciary.senate.gov/meetings/09/29/2021/nominations',
 'https://www.judiciary.senate.gov/meetings/09/07/2021/nominations',
 'https://www.judiciary.senate.gov/meetings/08/04/2021/nominations',
 'https://www.judiciary.senate.gov/meetings/06/16/2021/nominations',
 'https://www.judiciary.senate.gov/meetings/06/02/2021/nominations',
 'https://www.judiciary.senate.gov/meetings/05/19/2021/nominations',
 'https://www.judiciary.senate.gov/meetings/05/05/2021/nominations',
 'https://www.judiciary.senate.gov/meetings/04/21/2021/nominations',
 'https://www.judiciary.senate.gov/meetings/04/07/2021/nominations',
 'https://www.judiciary.senate.gov/meetings/02/25/2021/nominations',
 'https://www.judiciary.senate.gov

In [229]:
judge_urls=[]
for url in get_urls:
    driver.get(url)
    time.sleep(5)
    a_tags = driver.find_elements_by_tag_name("a")
    for a in a_tags:
        try:
            if "-sjq" in a.get_attribute("href"):
                judge_urls.append(a.get_attribute("href"))
        except:
            pass

KeyboardInterrupt: 

In [225]:
judge_urls

['https://www.judiciary.senate.gov/stark-sjq',
 'https://www.judiciary.senate.gov/calvert-sjq',
 'https://www.judiciary.senate.gov/corley-sjq',
 'https://www.judiciary.senate.gov/geraghty-sjq',
 'https://www.judiciary.senate.gov/ho-sjq',
 'https://www.judiciary.senate.gov/vidal-sjq',
 'https://www.judiciary.senate.gov/brennan-sjq',
 'https://www.judiciary.senate.gov/chun-sjq',
 'https://www.judiciary.senate.gov/fleming-sjq',
 'https://www.judiciary.senate.gov/ruiz-sjq',
 'https://www.judiciary.senate.gov/sanchez-sjq',
 'https://www.judiciary.senate.gov/elliott-sjq',
 'https://www.judiciary.senate.gov/lopez-sjq',
 'https://www.judiciary.senate.gov/menendez-sjq',
 'https://www.judiciary.senate.gov/ohta-sjq',
 'https://www.judiciary.senate.gov/urias-sjq',
 'https://www.judiciary.senate.gov/thomas-sjq',
 'https://www.judiciary.senate.gov/dimke-sjq',
 'https://www.judiciary.senate.gov/frimpong-sjq',
 'https://www.judiciary.senate.gov/sweeney-sjq',
 'https://www.judiciary.senate.gov/thurston

We notice that there are 49 URLs, out of which there are 4 repeats. So there are 45 URLs and we have 54 Nominees + Recent confirmations. I manually gathered the locations of the URLs using Google search and put them in a list of files and fed it to wget. Wget then got the links downloaded for me. 


### Step 5 : Download all the PDFs using wget

In [None]:
#wget --content-disposition --trust-server-names -i list_of_files.txt

### Step 6 : Read through the PDFs and extract information necessary

In [234]:
path = r'C:\Users\sriha\Desktop\Columbia\fall2021-2\databases\files'
files = glob.glob(path + '/*.pdf')

In [316]:
pdf_data=[]
for file in files: 
    person={}
    person['file']=file.split("\\")[-1]
    parsed_pdf=parser.from_file(file)
    try: 
        person['name']=re.findall(r'(?s)(?<=former names used\))(.*?)(?=\n\n2.)',parsed_pdf['content'])[0].split("\n")[2]
    except:
        person['name']=''
    try: 
        year_of_birth=re.findall(r'(?s)(?<=birth. \n\n)(.*?)(?= \n\n5.)',parsed_pdf['content'])[0].split(";")[0].replace(" ","")
        place_of_birth=re.findall(r'(?s)(?<=birth. \n\n)(.*?)(?= \n\n5.)',parsed_pdf['content'])[0].split(";")[1]
        person['birthyear']=year_of_birth
        person['place_of_birth']=place_of_birth
    except:
        pass
        person['birthyear']=''
        person['place_of_birth']=''
    try: 
        person['education']=re.findall(r'(?s)(?<=and the date each degree was received. \n\n)(.*?)(?=\n6. )',parsed_pdf['content'])
    except:
        person['education']=''
    pdf_data.append(person)

In [318]:
pd.DataFrame(pdf_data).to_csv("info.csv")


### Step 7 : The data from the regexes is not super clean, so we manually fix it on good ol Excel

In [319]:
df_clean=pd.read_csv("info2.csv")

### Step 8 : We merge the dataframes from before with the information I got from the PDFs

In [320]:
df_clean.head()

Unnamed: 0,short,file,name,birthyear,place_of_birth,education,match
0,Beckering,Beckering SJQ Public Final.pdf,Jane Marie Beckering,1965,"Grand Rapids, Michigan",['1987 - 1990: University of Wisconsin Law Sch...,"Beckering,Jane M."
1,Boardman,Boardman SJQ Public Final for Box.pdf,Deborah Lynn Boardman,1974,"Silver Spring, Maryland","['1997 -2000, University of Virginia School of...","Boardman,Deborah L."
2,Bonilla,Bonilla SJQ Public Final.pdf,Armando Omar Bonilla,1967,"New York, New York","['1989-1992, Seton Hall University School of L...","Bonilla,Armando O."
3,Brennan,Brennan SJQ Public Final.pdf,Bridget Meehan Brennan,1974,"Camp Hill, Pennsylvania","['1998 -2000, Case Western Reserve University ...","Brennan,Bridget Meehan"
4,Calvert,Calvert SJQ Public Final.pdf,Victoria Marie Calvert,1981,"Bronx, New York","['2003 - 2006, New York University School of L...","Calvert,Victoria Marie"


In [321]:
df_all.head()

Unnamed: 0,Nominee,Incumbent,Category,District
0,"Gelpi,Gustavo A.","Torruella,Juan R.",Recent Confirmation,CCA
1,"Kelley,Angel","Woodlock,Douglas P.",Recent Confirmation,MA
2,"Lee,Eunice C.","Katzmann,Robert A.",Recent Confirmation,CCA
3,"Perez,Myrna","Chin,Denny",Recent Confirmation,CCA
4,"Robinson,Beth","Hall,Peter W.",Recent Confirmation,CCA


In [323]:
final_df= pd.merge(
    df_all,
    df_clean,
    how="left",
    left_on="Nominee",
    right_on="match",
)

In [324]:
final_df

Unnamed: 0,Nominee,Incumbent,Category,District,short,file,name,birthyear,place_of_birth,education,match
0,"Gelpi,Gustavo A.","Torruella,Juan R.",Recent Confirmation,CCA,Gelpi,Gelpi SJQ Public Final1.pdf,Gustavo Antonio Gelpi,1965.0,"San Juan, Puerto Rico","['1988 - 1991, Suffolk University Law School; ...","Gelpi,Gustavo A."
1,"Kelley,Angel","Woodlock,Douglas P.",Recent Confirmation,MA,Kelley,Kelley SJQ Public Final.pdf,Angel Kelley,1967.0,"New Rochelle, New York","['2002 - 2003, Temple University School of Law...","Kelley,Angel"
2,"Lee,Eunice C.","Katzmann,Robert A.",Recent Confirmation,CCA,Lee,Lee SJQ Public Final.pdf,Eunice Cheryl Lee,1970.0,"Wiesbaden, Germany (U.S. Air Force base)","['1993 -1996, Yale Law School; J.D., 1996 \nSu...","Lee,Eunice C."
3,"Perez,Myrna","Chin,Denny",Recent Confirmation,CCA,Perez,Perez SJQ Public Final.pdf,Myrna Perez,1974.0,"San Antonio, Texas","['2000 - 2003, Columbia Law School; J.D., 2003...","Perez,Myrna"
4,"Robinson,Beth","Hall,Peter W.",Recent Confirmation,CCA,Robinson,Robinson SJQ Public FINAL.pdf,Beth Robinson,1965.0,"Karachi, Pakistan","['Fall semester 1991, Akron University. I took...","Robinson,Beth"
5,"Merriam,Sarah A.L.","Hall,Janet C.",Recent Confirmation,CT,Merriam,Merriam SJQ Public Final.pdf,Sarah Ann Leilani Merriam,1971.0,"Honolulu, Hawaii","['2016-2018, Duke Law School; L.L.M. in Judici...","Merriam,Sarah A.L."
6,"Nagala,Sarala Vidya","Bryant,Vanessa Lynne",Recent Confirmation,CT,Nagala,Nagala SJQ Public Final.pdf,Sarala Vidya Nagala,1983.0,"Oakes, North Dakota","['2005 -2008, University of California, Berkel...","Nagala,Sarala Vidya"
7,"Williams,Omar Antonio","Thompson,Alvin W.",Recent Confirmation,CT,O. Williams,O. Williams SJQ Public Final.pdf,Omar Antonio Williams,1977.0,"Rochester, New York","['1999 -2002, University of Connecticut School...","Williams,Omar Antonio"
8,"Neals,Julien Xavier","Martini,William J.",Recent Confirmation,NJ,Neals,Neals Senate Questionnaire Final.pdf,Julien Xavier Neals,1965.0,"Newark, New Jersey","['1988-1991, Emory University School of Law, J...","Neals,Julien Xavier"
9,"O'Hearn,Christine P.","Kugler,Robert B.",Recent Confirmation,NJ,,,,,,,


In [340]:
final_df=final_df.replace(np. nan,'',regex=True)

In [341]:
list_of_districts=final_df.District.unique()
list_of_categories=final_df.Category.unique()

In [342]:
final_object=[]
for district in list_of_districts:
    district_object={}
    district_object['name']=district
    df_district=final_df[final_df['District']==district]
    for category in list_of_categories:
        category_object=[]
        df_category=df_district[df_district['Category']==category]
        for row in df_category.iterrows():
            row_object={}
            row_object['Nominee']=row[1]['Nominee']
            row_object['Incumbent']=row[1]['Incumbent']
            row_object['ProperName']=row[1]['name']
            row_object['BirthYear']=row[1]['birthyear']
            row_object['PlaceOfBirth']=row[1]['place_of_birth']
            row_object['Education']=row[1]['education']
            category_object.append(row_object)
        district_object[category]=category_object
    final_object.append(district_object)
        

In [343]:
dummy=pd.DataFrame(final_object)

In [344]:
#df_all.groupby(['District','Category'])['Nominee'].value_counts(dropna=False)

In [359]:
dummy

Unnamed: 0,name,Recent Confirmation,Vacant,Nominee
0,CCA,"[{'Nominee': 'Gelpi,Gustavo A.', 'Incumbent': ...","[{'Nominee': '', 'Incumbent': 'Smith,D. Brooks...",[]
1,MA,"[{'Nominee': 'Kelley,Angel', 'Incumbent': 'Woo...","[{'Nominee': '', 'Incumbent': 'Young,William G...",[]
2,CT,"[{'Nominee': 'Merriam,Sarah A.L.', 'Incumbent'...",[],[]
3,NJ,"[{'Nominee': 'Neals,Julien Xavier', 'Incumbent...","[{'Nominee': '', 'Incumbent': 'Hochberg,Faith ...","[{'Nominee': 'Castner,Georgette', 'Incumbent':..."
4,MD,"[{'Nominee': 'Boardman,Deborah L.', 'Incumbent...",[],[]
5,VAE,"[{'Nominee': 'Giles,Patricia Tolliver', 'Incum...","[{'Nominee': '', 'Incumbent': 'Gibney Jr.,John...",[]
6,WAW,"[{'Nominee': 'Estudillo,David G.', 'Incumbent'...","[{'Nominee': '', 'Incumbent': 'Settle,Benjamin...","[{'Nominee': 'Chun,John H.', 'Incumbent': 'Rob..."
7,CO,"[{'Nominee': 'Rodriguez,Regina M.', 'Incumbent...",[],"[{'Nominee': 'Sweeney,Charlotte N.', 'Incumben..."
8,NM,"[{'Nominee': 'Strickland,Margaret Irene', 'Inc...","[{'Nominee': '', 'Incumbent': 'Herrera,Judith ...",[]
9,DC,"[{'Nominee': 'Cobb,Jia M.', 'Incumbent': 'Sull...",[],[]


In [374]:
dummy['ConfirmLength']=dummy['Recent Confirmation'].apply(lambda row: len(row))
dummy['VacantLength']=dummy['Vacant'].apply(lambda row: len(row))
dummy['NomineeLength']=dummy['Nominee'].apply(lambda row: len(row))

In [376]:
dummy.to_csv("dummy.csv")

### Step 9 : Merging the data we have with the map template

In [574]:
with open('US_District_Court_Jurisdictions.geojson') as json_data:
    geometry_data = json.load(json_data)

In [575]:
df = pd.DataFrame.from_dict(json_normalize(geometry_data['features']), orient='columns')


In [576]:
df.head()

Unnamed: 0,type,properties.FID,properties.STATEFP,properties.NAME,properties.ALAND,properties.AWATER,properties.STATE,properties.CHIEF_JUDG,properties.NOMINATING,properties.TERM_AS_CH,properties.Shape_Leng,properties.ABBR,properties.DISTRICT_N,properties.SHAPE_Length,properties.SHAPE_Area,geometry.type,geometry.coordinates
0,Feature,1,21,Western District of Kentucky,49705550000.0,1651516000.0,Kentucky,Greg N. Stivers,Barack Obama (D),2018,16.200585,KYW,6,16.200585,5.216899,MultiPolygon,"[[[[-89.48247982199996, 36.502137781000044], [..."
1,Feature,2,21,Eastern District of Kentucky,52573940000.0,723821300.0,Kentucky,Danny Reeves,George W. Bush (R),2019,13.514251,KYE,6,13.514251,5.451047,MultiPolygon,"[[[[-84.62011661499997, 39.073464501000046], [..."
2,Feature,3,18,Southern District of Indiana,58245170000.0,594117600.0,Indiana,Jane Magnus-Stinson,Barack Obama (D),2016,14.956126,INS,7,14.956126,6.137433,MultiPolygon,"[[[[-85.86280909599998, 40.464758698000026], [..."
3,Feature,4,1,Middle District of Alabama,34126730000.0,547242300.0,Alabama,Emily Coody Marks,Donald Trump (R),2019,10.235799,ALM,11,10.235799,3.858442,MultiPolygon,"[[[[-85.33828446299998, 33.494706345000054], [..."
4,Feature,5,1,Southern District of Alabama,62358820000.0,3052681000.0,Alabama,Kristi DuBose,George W. Bush (R),2017,12.976906,ALS,11,12.976906,3.278871,MultiPolygon,"[[[[-88.08681708099994, 30.259869545000072], [..."


In [577]:
map_df= pd.merge(
    df,
    dummy,
    how="left",
    left_on="properties.ABBR",
    right_on="name",
)

In [579]:
pd.set_option('display.max_columns', None)
map_df

Unnamed: 0,type,properties.FID,properties.STATEFP,properties.NAME,properties.ALAND,properties.AWATER,properties.STATE,properties.CHIEF_JUDG,properties.NOMINATING,properties.TERM_AS_CH,properties.Shape_Leng,properties.ABBR,properties.DISTRICT_N,properties.SHAPE_Length,properties.SHAPE_Area,geometry.type,geometry.coordinates,name,Recent Confirmation,Vacant,Nominee,ConfirmLength,VacantLength,NomineeLength
0,Feature,1,21,Western District of Kentucky,49705550000.0,1651516000.0,Kentucky,Greg N. Stivers,Barack Obama (D),2018,16.200585,KYW,6,16.200585,5.216899,MultiPolygon,"[[[[-89.48247982199996, 36.502137781000044], [...",,,,,,,
1,Feature,2,21,Eastern District of Kentucky,52573940000.0,723821300.0,Kentucky,Danny Reeves,George W. Bush (R),2019,13.514251,KYE,6,13.514251,5.451047,MultiPolygon,"[[[[-84.62011661499997, 39.073464501000046], [...",,,,,,,
2,Feature,3,18,Southern District of Indiana,58245170000.0,594117600.0,Indiana,Jane Magnus-Stinson,Barack Obama (D),2016,14.956126,INS,7,14.956126,6.137433,MultiPolygon,"[[[[-85.86280909599998, 40.464758698000026], [...",,,,,,,
3,Feature,4,1,Middle District of Alabama,34126730000.0,547242300.0,Alabama,Emily Coody Marks,Donald Trump (R),2019,10.235799,ALM,11,10.235799,3.858442,MultiPolygon,"[[[[-85.33828446299998, 33.494706345000054], [...",ALM,[],"[{'Nominee': '', 'Incumbent': 'Brasher,Andrew ...",[],0.0,1.0,0.0
4,Feature,5,1,Southern District of Alabama,62358820000.0,3052681000.0,Alabama,Kristi DuBose,George W. Bush (R),2017,12.976906,ALS,11,12.976906,3.278871,MultiPolygon,"[[[[-88.08681708099994, 30.259869545000072], [...",,,,,,,
5,Feature,6,5,Western District of Arkansas,61636170000.0,1313839000.0,Arkansas,Susan Hickey,Barack Obama (D),2019,17.746755,ARW,8,17.746755,6.194531,MultiPolygon,"[[[[-94.26928693099995, 36.49923735100003], [-...",ARW,[],"[{'Nominee': '', 'Incumbent': 'Holmes III,Paul...",[],0.0,1.0,0.0
6,Feature,7,5,Eastern District of Arkansas,73132700000.0,1649021000.0,Arkansas,D.P. Marshall Jr.,Barack Obama (D),2019,20.659817,ARE,8,20.659817,7.390924,MultiPolygon,"[[[[-91.67234942699997, 36.49946967600005], [-...",,,,,,,
7,Feature,8,6,Northern District of California,53194160000.0,7787389000.0,California,Richard Seeborg,Barack Obama (D),2021,26.865847,CAN,9,26.865847,5.590307,MultiPolygon,"[[[[-123.00090629299996, 37.70101589600006], [...",CAN,[],"[{'Nominee': '', 'Incumbent': 'White,Jeffrey S...","[{'Nominee': 'Corley,Jacqueline Scott', 'Incum...",0.0,1.0,2.0
8,Feature,9,6,Eastern District of California,225003500000.0,4008883000.0,California,Kimberly Mueller,Barack Obama (D),2020,31.166453,CAE,9,31.166453,23.651767,MultiPolygon,"[[[[-122.03457673799994, 42.00470130000008], [...",CAE,[],"[{'Nominee': '', 'Incumbent': 'England Jr.,Mor...","[{'Nominee': 'Thurston,Jennifer L.', 'Incumben...",0.0,1.0,1.0
9,Feature,10,6,Central District of California,103588300000.0,7056692000.0,California,Philip Gutierrez,George W. Bush (R),2020,24.76322,CAC,9,24.76322,10.240974,MultiPolygon,"[[[[-118.59577516699994, 33.03434072400006], [...",CAC,[],"[{'Nominee': '', 'Incumbent': 'Selna,James V.'...","[{'Nominee': 'Frimpong,Maame Ewusi-Mensah', 'I...",0.0,4.0,2.0


In [591]:
import math
def add_text(cell): 
    if math.isnan(cell):
        return "0 vacancies"
    else:
        return str(int(cell)) + " vacancies"

In [592]:
map_df['properties.headline'] = map_df['properties.NAME']+ "<br>"+ map_df['VacantLength'].apply(add_text)

In [750]:
map_df

Unnamed: 0,type,properties.FID,properties.STATEFP,properties.NAME,properties.ALAND,properties.AWATER,properties.STATE,properties.CHIEF_JUDG,properties.NOMINATING,properties.TERM_AS_CH,properties.Shape_Leng,properties.ABBR,properties.DISTRICT_N,properties.SHAPE_Length,properties.SHAPE_Area,geometry.type,geometry.coordinates,name,Recent Confirmation,Vacant,Nominee,ConfirmLength,VacantLength,NomineeLength,properties.headline,properties.article,properties.color,properties.group_id,properties.group_name
0,Feature,1,21,Western District of Kentucky,49705550000.0,1651516000.0,Kentucky,Greg N. Stivers,Barack Obama (D),2018,16.200585,KYW,6,16.200585,5.216899,MultiPolygon,"[[[[-89.48247982199996, 36.502137781000044], [...",,,,,,,,Western District of Kentucky<br>0 vacancies,<b>Western District of Kentucky </b><br> There...,f7f7f7,6,District 6
1,Feature,2,21,Eastern District of Kentucky,52573940000.0,723821300.0,Kentucky,Danny Reeves,George W. Bush (R),2019,13.514251,KYE,6,13.514251,5.451047,MultiPolygon,"[[[[-84.62011661499997, 39.073464501000046], [...",,,,,,,,Eastern District of Kentucky<br>0 vacancies,<b>Eastern District of Kentucky </b><br> There...,f7f7f7,6,District 6
2,Feature,3,18,Southern District of Indiana,58245170000.0,594117600.0,Indiana,Jane Magnus-Stinson,Barack Obama (D),2016,14.956126,INS,7,14.956126,6.137433,MultiPolygon,"[[[[-85.86280909599998, 40.464758698000026], [...",,,,,,,,Southern District of Indiana<br>0 vacancies,<b>Southern District of Indiana </b><br> There...,f7f7f7,7,District 7
3,Feature,4,1,Middle District of Alabama,34126730000.0,547242300.0,Alabama,Emily Coody Marks,Donald Trump (R),2019,10.235799,ALM,11,10.235799,3.858442,MultiPolygon,"[[[[-85.33828446299998, 33.494706345000054], [...",ALM,[],"[{'Nominee': '', 'Incumbent': 'Brasher,Andrew ...",[],0.0,1.0,0.0,Middle District of Alabama<br>1 vacancies,<b>Middle District of Alabama </b><br> There a...,#fdbb84,11,District 11
4,Feature,5,1,Southern District of Alabama,62358820000.0,3052681000.0,Alabama,Kristi DuBose,George W. Bush (R),2017,12.976906,ALS,11,12.976906,3.278871,MultiPolygon,"[[[[-88.08681708099994, 30.259869545000072], [...",,,,,,,,Southern District of Alabama<br>0 vacancies,<b>Southern District of Alabama </b><br> There...,f7f7f7,11,District 11
5,Feature,6,5,Western District of Arkansas,61636170000.0,1313839000.0,Arkansas,Susan Hickey,Barack Obama (D),2019,17.746755,ARW,8,17.746755,6.194531,MultiPolygon,"[[[[-94.26928693099995, 36.49923735100003], [-...",ARW,[],"[{'Nominee': '', 'Incumbent': 'Holmes III,Paul...",[],0.0,1.0,0.0,Western District of Arkansas<br>1 vacancies,<b>Western District of Arkansas </b><br> There...,#fdbb84,8,District 8
6,Feature,7,5,Eastern District of Arkansas,73132700000.0,1649021000.0,Arkansas,D.P. Marshall Jr.,Barack Obama (D),2019,20.659817,ARE,8,20.659817,7.390924,MultiPolygon,"[[[[-91.67234942699997, 36.49946967600005], [-...",,,,,,,,Eastern District of Arkansas<br>0 vacancies,<b>Eastern District of Arkansas </b><br> There...,f7f7f7,8,District 8
7,Feature,8,6,Northern District of California,53194160000.0,7787389000.0,California,Richard Seeborg,Barack Obama (D),2021,26.865847,CAN,9,26.865847,5.590307,MultiPolygon,"[[[[-123.00090629299996, 37.70101589600006], [...",CAN,[],"[{'Nominee': '', 'Incumbent': 'White,Jeffrey S...","[{'Nominee': 'Corley,Jacqueline Scott', 'Incum...",0.0,1.0,2.0,Northern District of California<br>1 vacancies,<b>Northern District of California </b><br> Th...,#fdbb84,9,District 9
8,Feature,9,6,Eastern District of California,225003500000.0,4008883000.0,California,Kimberly Mueller,Barack Obama (D),2020,31.166453,CAE,9,31.166453,23.651767,MultiPolygon,"[[[[-122.03457673799994, 42.00470130000008], [...",CAE,[],"[{'Nominee': '', 'Incumbent': 'England Jr.,Mor...","[{'Nominee': 'Thurston,Jennifer L.', 'Incumben...",0.0,1.0,1.0,Eastern District of California<br>1 vacancies,<b>Eastern District of California </b><br> The...,#fdbb84,9,District 9
9,Feature,10,6,Central District of California,103588300000.0,7056692000.0,California,Philip Gutierrez,George W. Bush (R),2020,24.76322,CAC,9,24.76322,10.240974,MultiPolygon,"[[[[-118.59577516699994, 33.03434072400006], [...",CAC,[],"[{'Nominee': '', 'Incumbent': 'Selna,James V.'...","[{'Nominee': 'Frimpong,Maame Ewusi-Mensah', 'I...",0.0,4.0,2.0,Central District of California<br>4 vacancies,<b>Central District of California </b><br> The...,#d7301f,9,District 9


In [856]:
def nice_text(name, vacancies, confirmation, nominee):
#     name = district['properties.NAME']
#     vacancies = district['VacantLength']
    if math.isnan(vacancies):
        vacancies = 0;
    else:
        vacancies = str(int(vacancies))
        
    string = ''
        
    headline = "<b>"+ name + " </b><br><br> There are " + str(vacancies) + " vacancies in this court district. <br><br> "
    
    paragraph_confirmed = ""
    all_persons_confirmed = ''
    
    paragraph_nominee = ""
    all_persons_nominee = ''
    
    try:
        if len(confirmation) > 0:
            paragraph_confirmed = '<b>Recent Confirmations to the District: </b> <br><br>'
            all_persons_confirmed =''
            for item in confirmation: 
                person=''
                person = person + "Name: <b>" + item['ProperName'] + "</b><br>"
                person = person + "Birth Year: <b> " + str(int(item['BirthYear'])) + "</b><br>"
                person = person + "Place of Birth: <b>" + item['PlaceOfBirth'] + "</b><br>"
                education = item['Education'].replace("[","").replace("]","").replace("\\n","<br>").replace("'","")
                person = person + "Education: " + education + "<br><br>"
                all_persons_confirmed = all_persons_confirmed+person
    except:
        paragraph_confirmed = "There have been no recent confirmations to this district. <br><br> "
        all_persons_confirmed = ''
        
    try:
        if len(nominee) > 0:
            paragraph_nominee = '<b>Recent Nominations to the District: </b> <br><br>'
            all_persons_nominee =''
            for item in nominee: 
                person=''
                person = person + "Name: <b>" + item['ProperName'] + "</b><br>"
                person = person + "Birth Year: <b> " + str(int(item['BirthYear'])) + "</b><br>"
                person = person + "Place of Birth: <b>" + item['PlaceOfBirth'] + "</b><br>"
                education = item['Education'].replace("[","").replace("]","").replace("\\n","<br>").replace("'","")
                person = person + "Education: " + education + "<br><br>"
    except:
        paragraph_nominee = "There have been no recent confirmations to this district. <br><br>"
        all_persons_nominee = ''

        
        
    string = headline+ paragraph_confirmed + paragraph_nominee + all_persons_confirmed + all_persons_nominee
    return string

In [858]:
map_df['properties.article'] = map_df.apply(lambda x: nice_text(x['properties.NAME'], x['VacantLength'],x['Recent Confirmation'],x['Nominee']), axis=1)


In [859]:
map_df.head()

Unnamed: 0,type,properties.FID,properties.STATEFP,properties.NAME,properties.ALAND,properties.AWATER,properties.STATE,properties.CHIEF_JUDG,properties.NOMINATING,properties.TERM_AS_CH,properties.Shape_Leng,properties.ABBR,properties.DISTRICT_N,properties.SHAPE_Length,properties.SHAPE_Area,geometry.type,geometry.coordinates,name,Recent Confirmation,Vacant,Nominee,ConfirmLength,VacantLength,NomineeLength,properties.headline,properties.article,properties.color,properties.group_id,properties.group_name
0,Feature,1,21,Western District of Kentucky,49705550000.0,1651516000.0,Kentucky,Greg N. Stivers,Barack Obama (D),2018,16.200585,KYW,6,16.200585,5.216899,MultiPolygon,"[[[[-89.48247982199996, 36.502137781000044], [...",,,,,,,,Western District of Kentucky<br>0 vacancies,<b>Western District of Kentucky </b><br><br> T...,#d3d3d3,6,District 6
1,Feature,2,21,Eastern District of Kentucky,52573940000.0,723821300.0,Kentucky,Danny Reeves,George W. Bush (R),2019,13.514251,KYE,6,13.514251,5.451047,MultiPolygon,"[[[[-84.62011661499997, 39.073464501000046], [...",,,,,,,,Eastern District of Kentucky<br>0 vacancies,<b>Eastern District of Kentucky </b><br><br> T...,#d3d3d3,6,District 6
2,Feature,3,18,Southern District of Indiana,58245170000.0,594117600.0,Indiana,Jane Magnus-Stinson,Barack Obama (D),2016,14.956126,INS,7,14.956126,6.137433,MultiPolygon,"[[[[-85.86280909599998, 40.464758698000026], [...",,,,,,,,Southern District of Indiana<br>0 vacancies,<b>Southern District of Indiana </b><br><br> T...,#d3d3d3,7,District 7
3,Feature,4,1,Middle District of Alabama,34126730000.0,547242300.0,Alabama,Emily Coody Marks,Donald Trump (R),2019,10.235799,ALM,11,10.235799,3.858442,MultiPolygon,"[[[[-85.33828446299998, 33.494706345000054], [...",ALM,[],"[{'Nominee': '', 'Incumbent': 'Brasher,Andrew ...",[],0.0,1.0,0.0,Middle District of Alabama<br>1 vacancies,<b>Middle District of Alabama </b><br><br> The...,#fdbb84,11,District 11
4,Feature,5,1,Southern District of Alabama,62358820000.0,3052681000.0,Alabama,Kristi DuBose,George W. Bush (R),2017,12.976906,ALS,11,12.976906,3.278871,MultiPolygon,"[[[[-88.08681708099994, 30.259869545000072], [...",,,,,,,,Southern District of Alabama<br>0 vacancies,<b>Southern District of Alabama </b><br><br> T...,#d3d3d3,11,District 11


In [886]:
def rand_color(value):
    color='#d3d3d3'
    try:  
        value = float(value)
        if value == 0:
            color='#d3d3d3'
        elif value >= 1 and value <=2:
            color='#ffeda0'
        elif value > 2 and value <=3:
            color='#feb24c'
        elif value > 3:
            color='#f03b20'
    except:
        pass
    
    return color

In [887]:
rand_color(np.nan)

'#d3d3d3'

In [888]:
map_df['properties.color'] = map_df['VacantLength'].apply(lambda x: rand_color(x))

In [889]:
map_df

Unnamed: 0,type,properties.FID,properties.STATEFP,properties.NAME,properties.ALAND,properties.AWATER,properties.STATE,properties.CHIEF_JUDG,properties.NOMINATING,properties.TERM_AS_CH,properties.Shape_Leng,properties.ABBR,properties.DISTRICT_N,properties.SHAPE_Length,properties.SHAPE_Area,geometry.type,geometry.coordinates,name,Recent Confirmation,Vacant,Nominee,ConfirmLength,VacantLength,NomineeLength,properties.headline,properties.article,properties.color,properties.group_id,properties.group_name
0,Feature,1,21,Western District of Kentucky,49705550000.0,1651516000.0,Kentucky,Greg N. Stivers,Barack Obama (D),2018,16.200585,KYW,6,16.200585,5.216899,MultiPolygon,"[[[[-89.48247982199996, 36.502137781000044], [...",,,,,,,,Western District of Kentucky<br>0 vacancies,<b>Western District of Kentucky </b><br><br> T...,#d3d3d3,6,District 6
1,Feature,2,21,Eastern District of Kentucky,52573940000.0,723821300.0,Kentucky,Danny Reeves,George W. Bush (R),2019,13.514251,KYE,6,13.514251,5.451047,MultiPolygon,"[[[[-84.62011661499997, 39.073464501000046], [...",,,,,,,,Eastern District of Kentucky<br>0 vacancies,<b>Eastern District of Kentucky </b><br><br> T...,#d3d3d3,6,District 6
2,Feature,3,18,Southern District of Indiana,58245170000.0,594117600.0,Indiana,Jane Magnus-Stinson,Barack Obama (D),2016,14.956126,INS,7,14.956126,6.137433,MultiPolygon,"[[[[-85.86280909599998, 40.464758698000026], [...",,,,,,,,Southern District of Indiana<br>0 vacancies,<b>Southern District of Indiana </b><br><br> T...,#d3d3d3,7,District 7
3,Feature,4,1,Middle District of Alabama,34126730000.0,547242300.0,Alabama,Emily Coody Marks,Donald Trump (R),2019,10.235799,ALM,11,10.235799,3.858442,MultiPolygon,"[[[[-85.33828446299998, 33.494706345000054], [...",ALM,[],"[{'Nominee': '', 'Incumbent': 'Brasher,Andrew ...",[],0.0,1.0,0.0,Middle District of Alabama<br>1 vacancies,<b>Middle District of Alabama </b><br><br> The...,#ffeda0,11,District 11
4,Feature,5,1,Southern District of Alabama,62358820000.0,3052681000.0,Alabama,Kristi DuBose,George W. Bush (R),2017,12.976906,ALS,11,12.976906,3.278871,MultiPolygon,"[[[[-88.08681708099994, 30.259869545000072], [...",,,,,,,,Southern District of Alabama<br>0 vacancies,<b>Southern District of Alabama </b><br><br> T...,#d3d3d3,11,District 11
5,Feature,6,5,Western District of Arkansas,61636170000.0,1313839000.0,Arkansas,Susan Hickey,Barack Obama (D),2019,17.746755,ARW,8,17.746755,6.194531,MultiPolygon,"[[[[-94.26928693099995, 36.49923735100003], [-...",ARW,[],"[{'Nominee': '', 'Incumbent': 'Holmes III,Paul...",[],0.0,1.0,0.0,Western District of Arkansas<br>1 vacancies,<b>Western District of Arkansas </b><br><br> T...,#ffeda0,8,District 8
6,Feature,7,5,Eastern District of Arkansas,73132700000.0,1649021000.0,Arkansas,D.P. Marshall Jr.,Barack Obama (D),2019,20.659817,ARE,8,20.659817,7.390924,MultiPolygon,"[[[[-91.67234942699997, 36.49946967600005], [-...",,,,,,,,Eastern District of Arkansas<br>0 vacancies,<b>Eastern District of Arkansas </b><br><br> T...,#d3d3d3,8,District 8
7,Feature,8,6,Northern District of California,53194160000.0,7787389000.0,California,Richard Seeborg,Barack Obama (D),2021,26.865847,CAN,9,26.865847,5.590307,MultiPolygon,"[[[[-123.00090629299996, 37.70101589600006], [...",CAN,[],"[{'Nominee': '', 'Incumbent': 'White,Jeffrey S...","[{'Nominee': 'Corley,Jacqueline Scott', 'Incum...",0.0,1.0,2.0,Northern District of California<br>1 vacancies,<b>Northern District of California </b><br><br...,#ffeda0,9,District 9
8,Feature,9,6,Eastern District of California,225003500000.0,4008883000.0,California,Kimberly Mueller,Barack Obama (D),2020,31.166453,CAE,9,31.166453,23.651767,MultiPolygon,"[[[[-122.03457673799994, 42.00470130000008], [...",CAE,[],"[{'Nominee': '', 'Incumbent': 'England Jr.,Mor...","[{'Nominee': 'Thurston,Jennifer L.', 'Incumben...",0.0,1.0,1.0,Eastern District of California<br>1 vacancies,<b>Eastern District of California </b><br><br>...,#ffeda0,9,District 9
9,Feature,10,6,Central District of California,103588300000.0,7056692000.0,California,Philip Gutierrez,George W. Bush (R),2020,24.76322,CAC,9,24.76322,10.240974,MultiPolygon,"[[[[-118.59577516699994, 33.03434072400006], [...",CAC,[],"[{'Nominee': '', 'Incumbent': 'Selna,James V.'...","[{'Nominee': 'Frimpong,Maame Ewusi-Mensah', 'I...",0.0,4.0,2.0,Central District of California<br>4 vacancies,<b>Central District of California </b><br><br>...,#f03b20,9,District 9


In [891]:
def dis_num(district):
    if district == 'District of Columbia':
        return '12'
    else:
        return district

def dis_name(district):
    if district == 'District of Columbia':
        return district
    else:
        return "District " + district


In [892]:
map_df['properties.group_id'] = map_df['properties.DISTRICT_N'].apply(dis_num)

In [893]:
map_df['properties.group_name'] = map_df['properties.DISTRICT_N'].apply(dis_name)

In [895]:
ok_json = json.loads(map_df.to_json(orient='records'))


In [896]:
def process_to_geojson(file):
    geo_data = {"type": "FeatureCollection", "features":[]}
    for row in file:
        this_dict = {"type": "Feature", "properties":{}, "geometry": {}}
        for key, value in row.items():
            key_names = key.split('.')
            if key_names[0] == 'geometry':
                this_dict['geometry'][key_names[1]] = value
            if str(key_names[0]) == 'properties':
                this_dict['properties'][key_names[1]] = value
        geo_data['features'].append(this_dict)
    return geo_data


In [897]:
geo_format = process_to_geojson(ok_json)

In [898]:
#Variable name
with open('geo-data.js', 'w') as outfile:
    outfile.write("var infoData = ")
#geojson output
with open('geo-data.js', 'a') as outfile:
    json.dump(geo_format, outfile)
