# Diversity Analysis of America's Federal Courts

This Python project analyzes each sitting judge that serves in the Courts of Appeals and the District Courts. After scraping the names of each sitting judge in every district using Selenium, I will work to analyze who nominated each judge (political party of president), and the gender makeup of the courts.

## Scraping Current Judicial Vacancies
To pull a listing of current judicial vacancies by court. The name of the incumbent, the reason for the vacancy, the vacancy date, the nominee, and the nomination date.

http://www.uscourts.gov/judges-judgeships/judicial-vacancies/current-judicial-vacancies

In [1]:
#Importing scraping libraries, using Selenium
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import NoSuchElementException

In [2]:
###Directing Selenium to the Judicial Vacancies website
driver = webdriver.Chrome()
driver.get('http://www.uscourts.gov/judges-judgeships/judicial-vacancies/current-judicial-vacancies')

In [3]:
vacancies = []

rows = driver.find_elements_by_tag_name('tr')

for row in rows:
    
    cells = row.find_elements_by_css_selector('td')
    
    if not cells:
        print("null")
    
    else:
    
        data = {
            'court':  cells[0].text,
            'incumbent': cells[1].text,
            'vacancy_reason': cells[2].text,
            'nom_date': cells[3].text,
            'nominee': cells[4].text,
    }
        
        vacancies.append(data)

import pandas as pd

df = pd.DataFrame(vacancies)
df.to_csv("judge_vacancies.csv", index=False)
df.head(10)

null


Unnamed: 0,court,incumbent,nom_date,nominee,vacancy_reason
0,01 - MA,"O'Toole Jr.,George A.",01/01/2018,,Senior
1,01 - MA,"Woodlock,Douglas P.",06/01/2015,,Senior
2,01 - ME,"Woodcock Jr.,John A.",06/27/2017,"Walker,Lance E.",Senior
3,01 - PR,"Fuste,Jose Antonio",06/01/2016,"Arias-Marxuach,Raul M.",Retired
4,01 - RI,"Lisi,Mary M.",10/01/2015,"McElroy,Mary S.",Senior
5,02 - CCA,"Lynch,Gerard E.",09/05/2016,,Senior
6,02 - CCA,"Wesley,Richard C.",08/01/2016,"Sullivan,Richard J.",Senior
7,02 - CT,"Chatigny,Robert N.",01/01/2017,"Dooley,Kari A.",Senior
8,02 - NY-E,"Feuerstein,Sandra J.",01/21/2015,"Brown,Gary Richard",Senior
9,02 - NY-E,"Gleeson,John",03/09/2016,"Gujarati,Diane",Resigned


## Scraping Current Judicial Confirmations
Scrape the first page of judicial confirmations:

http://www.uscourts.gov/judges-judgeships/judicial-vacancies/confirmation-listing


In [4]:
driver = webdriver.Chrome()
driver.get('http://www.uscourts.gov/judges-judgeships/judicial-vacancies/confirmation-listing')

In [13]:
confirmations = []

rows = driver.find_elements_by_tag_name('tr')

for row in rows:
    
    cells = row.find_elements_by_css_selector('td')
    
    if not cells:
        print("null")
    
    else:   
        data = {
            'nominee': cells[0].text,
            'nom_date': cells[1].text,
            'conf_date': cells[2].text,
            'court':  cells[3].text,
            'incumbent': cells[4].text,
            'vacancy_reason': cells[5].text,
            'vacancy_date': cells[-1].text
        }
        
        confirmations.append(data)

import pandas as pd

df = pd.DataFrame(confirmations)
df.to_csv("judge_confirmations.csv", index=False)
df.head(10)

null


Unnamed: 0,conf_date,court,incumbent,nom_date,nominee,vacancy_date,vacancy_reason
0,11/02/2017,03 - CCA,"Rendell,Marjorie O.",06/19/2017,"Bibas,Stephanos",07/01/2015,Senior
1,11/16/2017,04 - SC,"Anderson Jr.,Joseph F.",08/03/2017,"Coggins Jr.,Donald C.",11/16/2014,Senior
2,03/01/2018,04 - SC,"Currie,Cameron McGowan",08/03/2017,"Quattlebaum Jr.,A. Marvin",10/03/2013,Senior
3,04/24/2018,05 - CCA,"Davis,W. Eugene",01/08/2018,"Duncan,Stuart Kyle",12/31/2016,Senior
4,05/09/2018,05 - CCA,"Clement,Edith Brown",01/08/2018,"Engelhardt,Kurt D.",05/09/2018,Senior
5,12/14/2017,05 - CCA,"King,Carolyn Dineen",10/16/2017,"Ho,James C.",12/31/2013,Senior
6,12/13/2017,05 - CCA,"Garza,Emilio M.",10/03/2017,"Willett,Don R.",08/01/2012,Senior
7,03/06/2018,05 - LAW,"James,Robert G.",08/03/2017,"Doughty,Terry A.",05/31/2016,Senior
8,03/05/2018,05 - TXN,"Solis,Jorge A.",09/07/2017,"Scholer,Karen Gren",05/01/2016,Retired
9,06/05/2018,05 - TXS,"Costa,Gregg Jeffrey",09/07/2017,"Rodriguez Jr.,Fernando",05/20/2014,Elevated


## A list of the U.S. Courts of Appeals Judges (13 Courts)
Based on an Advanced Search in the Federal Judiciary Center, the research and education agency of the judicial branch. https://www.fjc.gov/history/judges/search/advanced-search

In [91]:
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import NoSuchElementException

In [92]:
#Pull names for Court of Appeals and Sitting Judge
driver = webdriver.Chrome()
driver.get('https://www.fjc.gov/history/judges/search/advanced-search')

In [93]:
#Click the Court button
button = driver.find_element_by_xpath("//*[@id='views_ef_fieldset-wrapper']/div/div/fieldset/div/fieldset[1]/legend/span/a")
button.click()
time.sleep(1)

In [94]:
#Click the U.S. Court of Appeals button
dis_button = driver.find_element_by_xpath("//*[@id='edit-court-appeals-wrapper']/div/div/fieldset/legend/span/a")
dis_button.click()
time.sleep(1)

In [95]:
#Click the Select All button
select_all = driver.find_element_by_xpath("//*[@id='edit-court-appeals-wrapper']/div/div/fieldset/div/div/a")
select_all.click()

In [96]:
#Click 'Limit to Sitting Judges'
current_button = driver.find_element_by_xpath("//*[@id='views_ef_fieldset-wrapper']/div/div/fieldset/div/fieldset[6]/legend/span/a")
current_button.click()
time.sleep(1)

In [97]:
#Click "All Sitting Judges"
sit_button = driver.find_element_by_id("edit-limit-sitting-judges-1")
sit_button.click()
time.sleep(1)

In [98]:
#Click Search Button
search = driver.find_element_by_id("edit-submit-history-judges-advanced-search")
search.click()
time.sleep(5)

In [100]:
#Click on the Window for Judges
judge = driver.find_element_by_class_name("colorbox-node")
judge.click()

In [None]:
#Scrape the Search Results Names with the intention of using RegEx to search
time.sleep(2)

sitting_judges = []
count = 0

while (count < 273):
    rows = driver.find_elements_by_id('cboxContent')
    for row in rows:
        data = {
            'name': row.find_element_by_css_selector("h1").text,
            'bio': row.find_element_by_class_name("field").text
        }
    
    sitting_judges.append(data)
    count = count + 1
    driver.find_element_by_id("cboxNext").click()
    time.sleep(4)
                
import pandas as pd
sitting_df = pd.DataFrame(sitting_judges)
sitting_df.to_csv("sitting_judge_names.csv", index=False)

In [19]:
import pandas as pd
df = pd.read_csv("appel_court_sitting_judge_names.csv")
df.head(5)

Unnamed: 0,bio,name
0,"Born 1954 in Robersonville, NC\n\nFederal Judicial Service:\nJudge, U.S. Court of Appeals for the Fourth Circuit\nNominated by Barack Obama on November 4, 2009, to a seat vacated by James Dickson Phillips, Jr. Confirmed by the Senate on August 5, 2010, and received commission on August 10, 2010.\n\nEducation:\nUniversity of North Carolina, B.A., 1975\nMarquette University Law School, J.D., 1979\nUniversity of Virginia School of Law, LL.M., 1995\n\nProfessional Career:\nU.S. Navy captain, JAG Corps, 1979-1983\nU.S. Naval Reserve captain, JAG Corps, 1983-2009\nAssistant appellate defender, State of North Carolina, 1983-1984\nPrivate practice, Wilson and Greenville, North Carolina, 1984-1990\nAssociate judge, North Carolina Court of Appeals, 1990-1998, 1999-2010\nAssociate justice, Supreme Court of North Carolina, 1998\nOther Nominations/Recess Appointments:\nNominated to U.S. Court of Appeals for the Fourth Circuit, August 5, 1999; no Senate vote\nNominated to U.S. Court of Appeals for the Fourth Circuit, January 3, 2001; nomination withdrawn by president, March 19, 2001","Wynn, James Andrew, Jr."
1,"Born 1952 in Roanoke, VA\n\nFederal Judicial Service:\nJudge, U.S. Court of Appeals for the Fourth Circuit\nNominated by George W. Bush on March 13, 2008, to a seat vacated by J. Michael Luttig. Confirmed by the Senate on May 20, 2008, and received commission on July 1, 2008.\n\nEducation:\nBridgewater College, B.A., 1974\nUniversity of Virginia School of Law, J.D., 1977\nNew York University School of Law, LL.M., 1978\n\nProfessional Career:\nU.S. Army Reserve, 1986-1997\nPrivate practice, Roanoke, Virginia, 1977-2000\nState delegate, Virginia, 1982-1994\nJudge, Court of Appeals of Virginia, 2001-2003\nJustice, Supreme Court of Virginia, 2003-2008","Agee, G. Steven"
2,"Born 1949 in Cambridge, OH\n\nFederal Judicial Service:\nJudge, U.S. Court of Appeals for the Third Circuit\nNominated by William J. Clinton on September 28, 1999, to a seat vacated by Walter K. Stapleton. Confirmed by the Senate on February 10, 2000, and received commission on February 16, 2000.\n\nEducation:\nGeorgetown University, B.A., 1971\nGeorgetown University Law Center, J.D., 1975\n\nProfessional Career:\nLaw clerk, Hon. Daniel L. Herrmann, Delaware Supreme Court, 1975-1976\nPrivate practice, Wilmington, Delaware, 1976-2000","Ambro, Thomas L."
3,"Born 1936 in Macon, GA\n\nFederal Judicial Service:\nJudge, U.S. Court of Appeals for the Fifth Circuit\nNominated by Jimmy Carter on April 18, 1979, to a new seat authorized by 92 Stat. 1629. Confirmed by the Senate on July 12, 1979, and received commission on July 13, 1979. Service terminated on October 1, 1981, due to reassignment.\n\nJudge, U.S. Court of Appeals for the Eleventh Circuit\nReassigned on October 1, 1981, to a new seat authorized by 94 Stat. 1994. Served as chief judge, 1999-2002. Assumed senior status on January 31, 2009.\n\nEducation:\nYale College, A.B., 1958\nHarvard Law School, LL.B., 1961\n\nProfessional Career:\nPrivate practice, Macon, Georgia, 1961, 1963-1979\nU.S. Army captain, 1961-1963\nMember, Judicial Conference of the United States, 1999-2002","Anderson, R[obert] Lanier III"
4,"Born 1932 in Salt Lake City, UT\n\nFederal Judicial Service:\nJudge, U.S. Court of Appeals for the Tenth Circuit\nNominated by Ronald Reagan on July 23, 1985, to a new seat authorized by 98 Stat. 333. Confirmed by the Senate on October 16, 1985, and received commission on October 16, 1985. Assumed senior status on January 1, 2000.\n\nEducation:\nUniversity of Utah College of Law, LL.B., 1960\n\nProfessional Career:\nU.S. Army 44th Infantry Division, 1953-1955\nTrial attorney, Tax Division, U.S. Department of Justice, 1960-1964\nPrivate practice, Salt Lake City, Utah, 1964-1985","Anderson, Stephen Hale"


## A list of the U.S. District Court judges (94 Courts)
Based on an Advanced Search in the Federal Judiciary Center, the research and education agency of the judicial branch. https://www.fjc.gov/history/courts/u.s.-district-courts-and-federal-judiciary

In [1]:
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import NoSuchElementException

In [2]:
#Pull names for Court of Appeals and Sitting Judge
driver = webdriver.Chrome()
driver.get('https://www.fjc.gov/history/judges/search/advanced-search')

In [3]:
#Click the Court button
button = driver.find_element_by_xpath("//*[@id='views_ef_fieldset-wrapper']/div/div/fieldset/div/fieldset[1]/legend/span/a")
button.click()
time.sleep(1)

In [4]:
#Click the U.S. District Courts button
dis_button = driver.find_element_by_xpath("//*[@id='edit-court-district-wrapper']/div/div/fieldset/legend/span/a")
dis_button.click()
time.sleep(1)

In [5]:
#Click the Select All button
select_all = driver.find_element_by_xpath("//*[@id='edit-court-district-wrapper']/div/div/fieldset/div/div/a")
select_all.click()

In [6]:
#Click 'Limit to Sitting Judges'
current_button = driver.find_element_by_xpath("//*[@id='views_ef_fieldset-wrapper']/div/div/fieldset/div/fieldset[6]/legend/span/a")
current_button.click()
time.sleep(1)

In [7]:
#Click "All Sitting Judges"
sit_button = driver.find_element_by_id("edit-limit-sitting-judges-1")
sit_button.click()
time.sleep(1)

In [8]:
#Click Search Button
search = driver.find_element_by_id("edit-submit-history-judges-advanced-search")
search.click()
time.sleep(5)

In [9]:
#Click on the Window for Judges
judge = driver.find_element_by_class_name("colorbox-node")
judge.click()

In [11]:
#Scrape the Search Results Names with the intention of using RegEx to search

district_judges = []
count = 0

while (count < 1007):
    pages = driver.find_elements_by_id('cboxContent')
    for page in pages:
        data = {
            'name': page.find_element_by_css_selector("h1").text,
            'bio': page.find_element_by_class_name("field").text
        }
    
    district_judges.append(data)
    count = count + 1
    driver.find_element_by_id("cboxNext").click()
    time.sleep(2)
                
import pandas as pd
district_df = pd.DataFrame(district_judges)
district_df.to_csv("district_judge_names.csv", index=False)
district_df.head()

Unnamed: 0,bio,name
0,"Born 1974 in Madison, WI\n\nFederal Judicial S...","Abrams, Leslie Joyce"
1,"Born 1968 in New York, NY\n\nFederal Judicial ...","Abrams, Ronnie"
2,"Born 1945 in Jacksonville, FL\n\nFederal Judic...","Adams, Henry Lee, Jr."
3,"Born 1955 in Orrville, OH\n\nFederal Judicial ...","Adams, John R."
4,"Born 1939 in Milwaukee, WI\n\nFederal Judicial...","Adelman, Lynn S."


## RegEx to clean the Court of Appeals dataframe

In [2]:
import pandas as pd
df = pd.read_csv("appel_court_sitting_judge_names.csv")
cols = list(df.columns.values)
cols

['bio', 'name']

In [3]:
df = df[['name', 'bio']]
df.head(3)

Unnamed: 0,name,bio
0,"Wynn, James Andrew, Jr.","Born 1954 in Robersonville, NC\n\nFederal Judi..."
1,"Agee, G. Steven","Born 1952 in Roanoke, VA\n\nFederal Judicial S..."
2,"Ambro, Thomas L.","Born 1949 in Cambridge, OH\n\nFederal Judicial..."


In [4]:
#Grab the last name of the judge
import re

df['last_name'] = df['name'].str.extract(r'\b(\w+)\b')
df.head()

Unnamed: 0,name,bio,last_name
0,"Wynn, James Andrew, Jr.","Born 1954 in Robersonville, NC\n\nFederal Judi...",Wynn
1,"Agee, G. Steven","Born 1952 in Roanoke, VA\n\nFederal Judicial S...",Agee
2,"Ambro, Thomas L.","Born 1949 in Cambridge, OH\n\nFederal Judicial...",Ambro
3,"Anderson, R[obert] Lanier III","Born 1936 in Macon, GA\n\nFederal Judicial Ser...",Anderson
4,"Anderson, Stephen Hale","Born 1932 in Salt Lake City, UT\n\nFederal Jud...",Anderson


In [5]:
#Code to extract the first name
df['first_name'] = df['name'].str.replace(', Jr.', ' Jr.')
df['first_name'].head(30)

0            Wynn, James Andrew Jr.
1                   Agee, G. Steven
2                  Ambro, Thomas L.
3     Anderson, R[obert] Lanier III
4            Anderson, Stephen Hale
5           Arnold, Morris Sheppard
6           Bacharach, Robert Edwin
7                Baldock, Bobby Ray
8          Barksdale, Rhesa Hawkins
9                Barrett, Amy Coney
10           Barron, David Jeremiah
11            Barry, Maryanne Trump
12          Batchelder, Alice Moore
13            Bauer, William Joseph
14                   Bea, Carlos T.
15           Beam, C[larence] Arlen
16       Benavides, Fortunato Pedro
17            Benton, William Duane
18            Berzon, Marsha Siegel
19                 Bibas, Stephanos
20             Black, Susan Harrell
21              Boggs, Danny Julian
22                  Boudin, Michael
23       Bowman, Pasco Middleton II
24            Branch, Elizabeth Lee
25           Brennan, Michael Brian
26               Briscoe, Mary Beck
27                     Brorb

In [6]:
df['first_name'] = df['name'].str.extract(r", (.+)$")

In [7]:
df.head(10)

Unnamed: 0,name,bio,last_name,first_name
0,"Wynn, James Andrew, Jr.","Born 1954 in Robersonville, NC\n\nFederal Judi...",Wynn,"James Andrew, Jr."
1,"Agee, G. Steven","Born 1952 in Roanoke, VA\n\nFederal Judicial S...",Agee,G. Steven
2,"Ambro, Thomas L.","Born 1949 in Cambridge, OH\n\nFederal Judicial...",Ambro,Thomas L.
3,"Anderson, R[obert] Lanier III","Born 1936 in Macon, GA\n\nFederal Judicial Ser...",Anderson,R[obert] Lanier III
4,"Anderson, Stephen Hale","Born 1932 in Salt Lake City, UT\n\nFederal Jud...",Anderson,Stephen Hale
5,"Arnold, Morris Sheppard","Born 1941 in Texarkana, TX\n\nFederal Judicial...",Arnold,Morris Sheppard
6,"Bacharach, Robert Edwin","Born 1959 in Clarksdale, MS\n\nFederal Judicia...",Bacharach,Robert Edwin
7,"Baldock, Bobby Ray","Born 1936 in Rocky, OK\n\nFederal Judicial Ser...",Baldock,Bobby Ray
8,"Barksdale, Rhesa Hawkins","Born 1944 in Jackson, MS\n\nFederal Judicial S...",Barksdale,Rhesa Hawkins
9,"Barrett, Amy Coney","Born 1972 in New Orleans, LA\n\nFederal Judici...",Barrett,Amy Coney


In [17]:
df['bio'] = df['bio'].str.strip('\n').str.replace('\n',' ')
df['bio'].head(10)

0    Born 1954 in Robersonville, NC  Federal Judici...
1    Born 1952 in Roanoke, VA  Federal Judicial Ser...
2    Born 1949 in Cambridge, OH  Federal Judicial S...
3    Born 1936 in Macon, GA  Federal Judicial Servi...
4    Born 1932 in Salt Lake City, UT  Federal Judic...
5    Born 1941 in Texarkana, TX  Federal Judicial S...
6    Born 1959 in Clarksdale, MS  Federal Judicial ...
7    Born 1936 in Rocky, OK  Federal Judicial Servi...
8    Born 1944 in Jackson, MS  Federal Judicial Ser...
9    Born 1972 in New Orleans, LA  Federal Judicial...
Name: bio, dtype: object

## RegEx to extract the name of the Court where each judge serves

In [99]:
df['court'] = df['bio'].str.extract(r'(\bU.S. Court\b(.*?)\bCircuit\b\s)')[0]

In [100]:
#Duplicate the Court column in order to create a merge column for the map
df['court_code'] = df['court']

## Extracting the President who nominated each judge and running through a function to determine each president's political party

In [14]:
#RegEx to extract the president's name
df['president'] = df['bio'].str.extract(r'(\bNominated by [\w]*......[\w]*)')
df['president'].head(10)

0          Nominated by Barack Obama
1        Nominated by George W. Bush
2    Nominated by William J. Clinton
3          Nominated by Jimmy Carter
4         Nominated by Ronald Reagan
5         Nominated by Ronald Reagan
6          Nominated by Barack Obama
7         Nominated by Ronald Reagan
8      Nominated by George H.W. Bush
9       Nominated by Donald J. Trump
Name: president, dtype: object

In [None]:
# Function to determine the presidential party of each nominator
def presidential_party(RepDem):
    if RepDem == "Nominated by Barack Obama":
        return "Democrat"
    elif RepDem == "Nominated by George W. Bush":
        return "Republican"
    elif RepDem == "Nominated by William J. Clinton":
        return "Democrat"
    elif RepDem == "Nominated by Jimmy Carter":
        return "Democrat"
    elif RepDem == "Nominated by Ronald Reagan":
        return "Republican"
    elif RepDem == "Nominated by George H.W. Bush":
        return "Republican"
    elif RepDem == "Nominated by Donald J. Trump":
        return "Republican"
    elif RepDem == "Nominated by Richard M. Nixon":
        return "Republican"
    elif RepDem == "Nominated by Lyndon B. Johnson":
        return "Democrat"
    elif RepDem == "Nominated by Gerald Ford on":
        return "Republican"
    else:
        return "President Unknown"

In [132]:
df['Presidential_Party'] = df.president.apply(presidential_party)

## A List of the Genders of Judges: U.S. Court of Appeals (COA 13)
https://www.fjc.gov/history/judges/search/women

*Code Thought Process*: Once I have a list of the sitting judges in the district courts and court of appeals, I'd write a for loop where if the "Gender" Judge matched a name in the "Court of Appeals" or "District Judge" dictionaries, I'd have it save into a "Women" dictionary.

In [213]:
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common import action_chains
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import NoSuchElementException

In [214]:
#Pull gender for Court of Appeals 
driver = webdriver.Chrome()
driver.get('https://www.fjc.gov/history/judges/search/advanced-search')

#Click the Court button
button = driver.find_element_by_xpath("//*[@id='views_ef_fieldset-wrapper']/div/div/fieldset/div/fieldset[1]/legend/span/a")
button.click()
time.sleep(1)

#Click the U.S. Court of Appeals button
dis_button = driver.find_element_by_xpath("//*[@id='edit-court-appeals-wrapper']/div/div/fieldset/legend/span/a")
dis_button.click()
time.sleep(1)

In [215]:
#Click the Select All button
select_all = driver.find_element_by_xpath("//*[@id='edit-court-appeals-wrapper']/div/div/fieldset/div/div/a")
select_all.click()
time.sleep(2)
button.click()

In [216]:
#Select Gender
gender = driver.find_element_by_xpath("//*[@id='views_ef_fieldset-wrapper']/div/div/fieldset/div/fieldset[4]/legend/span/a")
gender.click()

In [217]:
#Edit Gender
edit_gender = driver.find_element_by_id("edit-gender")
edit_gender.click()

In [218]:
#Click female
female = driver.find_element_by_xpath("//*[@id='edit-gender']/option[2]")
female.click()
time.sleep(2)
gender.click()

In [219]:
#Click 'Limit to Sitting Judges'
current_button = driver.find_element_by_xpath("//*[@id='views_ef_fieldset-wrapper']/div/div/fieldset/div/fieldset[6]/legend/span/a")
current_button.click()
time.sleep(1)

In [220]:
#Click "All Sitting Judges"
sit_button = driver.find_element_by_id("edit-limit-sitting-judges-1")
sit_button.click()
time.sleep(1)

#Click Search Button
search = driver.find_element_by_id("edit-submit-history-judges-advanced-search")
search.click()
time.sleep(5)

In [221]:
#Click on the Window for Judges
judge = driver.find_element_by_class_name("colorbox-node")
judge.click()

In [224]:
#Scrape the Search Results Names with the intention of using RegEx to search
time.sleep(2)

female_COA = []
count = 0

while (count < 73):
    rows = driver.find_elements_by_id('cboxContent')
    for row in rows:
        data = {
            'name': row.find_element_by_css_selector("h1").text
        }
    
    female_COA.append(data)
    count = count + 1
    driver.find_element_by_id("cboxNext").click()
    time.sleep(5)
                
import pandas as pd
female_COA = pd.DataFrame(female_COA)
female_COA.to_csv("female_court_of_appeals.csv", index=False)
female_COA.head()

Unnamed: 0,name
0,"Barrett, Amy Coney"
1,"Barry, Maryanne Trump"
2,"Batchelder, Alice Moore"
3,"Berzon, Marsha Siegel"
4,"Black, Susan Harrell"


## Add  Genders of Judges to U.S. Court of Appeals Dataframe

In [26]:
female_COA = pd.read_csv("female_court_of_appeals.csv", na_values=['nan'])
female_COA = list(female_COA['name'])
female_COA

['Barrett, Amy Coney',
 'Barry, Maryanne Trump',
 'Batchelder, Alice Moore',
 'Berzon, Marsha Siegel',
 'Black, Susan Harrell',
 'Branch, Elizabeth Lee',
 'Briscoe, Mary Beck',
 'Callahan, Consuelo Maria',
 'Carnes, Julie E.',
 'Carney, Susan Laura',
 'Christen, Morgan',
 'Clement, Edith Brown',
 'Cook, Deborah L.',
 'Daughtrey, Martha Craig',
 'Donald, Bernice Bouie',
 'Duncan, Allyson Kay',
 'Eid, Allison Hartwell',
 'Elrod, Jennifer Walker',
 'Friedland, Michelle Taryn',
 'Gibbons, Julia Smith',
 'Graber, Susan',
 'Harris, Pamela Ann',
 'Haynes, Catharina',
 'Henderson, Karen LeCraft',
 'Hull, Frank M.',
 'Ikuta, Sandra Segal',
 'Jones, Edith Hollan',
 'Kearse, Amalya Lyle',
 'Keenan, Barbara Milano',
 'Kelly, Jane Louise',
 'King, Carolyn Dineen',
 'Krause, Cheryl Ann',
 'Larsen, Joan Louise',
 'Livingston, Debra Ann',
 'Lynch, Sandra Lea',
 'Martin, Beverly Baldwin',
 'McHugh, Carolyn Baldwin',
 'McKeown, M. Margaret',
 'Millett, Patricia Ann',
 'Moore, Karen Nelson',
 'Moore, Kim

In [64]:
#Add a Gender column. if name in FemaleCOA is also in df['name'], 
#then return "Female" to a third column/list, called 'gender'

def get_gender(row):
    gender = "Male"
    
    for nm in female_COA:
        if nm == row:
            gender = 'Female'
    return gender

df['gender'] = df['name'].apply(lambda x: get_gender(x))

In [65]:
df['gender'].head(10)

0      Male
1      Male
2      Male
3      Male
4      Male
5      Male
6      Male
7      Male
8      Male
9    Female
Name: gender, dtype: object

In [60]:
df.keys()

Index(['name', 'first_name', 'last_name', 'gender', 'president',
       'Presidential_Party', 'bio'],
      dtype='object')

In [133]:
df = df[['court', 'name', 'first_name', 'last_name', 'gender', 'president', 'Presidential_Party', 'bio']]

In [134]:
df.to_csv("final_court_of_appeals.csv", index=False)

## Using RegEx to clean the District Judge dataframe

In [3]:
import pandas as pd
dfd = pd.read_csv("district_judge_names.csv")
d_cols = list(dfd.columns.values)
d_cols

['bio', 'name']

In [4]:
dfd = dfd[['name', 'bio']]
dfd.head()

Unnamed: 0,name,bio
0,"Abrams, Leslie Joyce","Born 1974 in Madison, WI\n\nFederal Judicial S..."
1,"Abrams, Ronnie","Born 1968 in New York, NY\n\nFederal Judicial ..."
2,"Adams, Henry Lee, Jr.","Born 1945 in Jacksonville, FL\n\nFederal Judic..."
3,"Adams, John R.","Born 1955 in Orrville, OH\n\nFederal Judicial ..."
4,"Adelman, Lynn S.","Born 1939 in Milwaukee, WI\n\nFederal Judicial..."


In [5]:
#Grab the last name of the district judge
import re

dfd['last_name'] = dfd['name'].str.extract(r'\b(\w+)\b')
dfd.head(3)

Unnamed: 0,name,bio,last_name
0,"Abrams, Leslie Joyce","Born 1974 in Madison, WI\n\nFederal Judicial S...",Abrams
1,"Abrams, Ronnie","Born 1968 in New York, NY\n\nFederal Judicial ...",Abrams
2,"Adams, Henry Lee, Jr.","Born 1945 in Jacksonville, FL\n\nFederal Judic...",Adams


In [6]:
#Grab the first name of the district judge

dfd['first_name'] = dfd['name'].str.replace(', Jr.', ' Jr.')
dfd['first_name'].head()

0    Abrams, Leslie Joyce
1          Abrams, Ronnie
2    Adams, Henry Lee Jr.
3          Adams, John R.
4        Adelman, Lynn S.
Name: first_name, dtype: object

In [7]:
dfd['first_name'] = dfd['name'].str.extract(r", (.+)$")

In [8]:
dfd['bio'] = dfd['bio'].str.strip('\n').str.replace('\n',' ')
dfd['bio'].head(3)

0    Born 1974 in Madison, WI  Federal Judicial Ser...
1    Born 1968 in New York, NY  Federal Judicial Se...
2    Born 1945 in Jacksonville, FL  Federal Judicia...
Name: bio, dtype: object

In [9]:
dfd['court'] = dfd['bio'].str.extract(r'(\bU\.S.\ District Court for the([ \w]*)? [\w]* of\s?(South)?(North)?(New)?(West)?(Rhode)?(District of)? [\w]*)')[0]
dfd['court'].head(10)

0    U.S. District Court for the Middle District of...
1    U.S. District Court for the Southern District ...
2    U.S. District Court for the Middle District of...
3    U.S. District Court for the Northern District ...
4    U.S. District Court for the Eastern District o...
5    U.S. District Court for the Eastern District o...
6       U.S. District Court for the District of Oregon
7    U.S. District Court for the Middle District of...
8    U.S. District Court for the Eastern District o...
9    U.S. District Court for the Western District o...
Name: court, dtype: object

In [45]:
#Duplicate the Court column in order to create a merge column for the map
dfd['court_code'] = dfd['court']

In [26]:
#RegEx to extract the president's name
dfd['president'] = dfd['bio'].str.extract(r'(\bNominated by [\w]*......[\w]*)')[0]
dfd['president'].head(10)

0          Nominated by Barack Obama
1          Nominated by Barack Obama
2    Nominated by William J. Clinton
3        Nominated by George W. Bush
4    Nominated by William J. Clinton
5        Nominated by George W. Bush
6    Nominated by William J. Clinton
7      Nominated by George H.W. Bush
8          Nominated by Barack Obama
9         Nominated by Ronald Reagan
Name: president, dtype: object

In [25]:
#Applies the above function written to determine the party of the president.
dfd['Presidential_Party'] = dfd.president.apply(presidential_party)
dfd['Presidential_Party'].head(10)

0      Democrat
1      Democrat
2      Democrat
3    Republican
4      Democrat
5    Republican
6      Democrat
7    Republican
8      Democrat
9    Republican
Name: Presidential_Party, dtype: object

## Scraping a List of the Genders of Judges: District Judges
https://www.fjc.gov/history/judges/search/women

*Code Thought Process*: Once I have a list of the sitting judges in the district courts and court of appeals, I'd write a for loop where if the "Gender" Judge matched a name in the "Court of Appeals" or "District Judge" dictionaries, I'd have it save into a "Women" dictionary.

In [225]:
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common import action_chains
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import NoSuchElementException

In [230]:
#Pull gender for District Judges
driver = webdriver.Chrome()
driver.get('https://www.fjc.gov/history/judges/search/advanced-search')

#Click the Court button
button = driver.find_element_by_xpath("//*[@id='views_ef_fieldset-wrapper']/div/div/fieldset/div/fieldset[1]/legend/span/a")
button.click()
time.sleep(1)

#Click the District button
dis_button = driver.find_element_by_xpath("//*[@id='edit-court-district-wrapper']/div/div/fieldset/legend/span/a")
dis_button.click()
time.sleep(1)

In [231]:
#Click the Select All button
select_all = driver.find_element_by_xpath("//*[@id='edit-court-district-wrapper']/div/div/fieldset/div/div/a")
select_all.click()
time.sleep(2)
button.click()

In [232]:
#Select Gender
gender = driver.find_element_by_xpath("//*[@id='views_ef_fieldset-wrapper']/div/div/fieldset/div/fieldset[4]/legend/span/a")
gender.click()

In [233]:
#Edit Gender
edit_gender = driver.find_element_by_id("edit-gender")
edit_gender.click()

In [234]:
#Click female
female = driver.find_element_by_xpath("//*[@id='edit-gender']/option[2]")
female.click()
time.sleep(2)
gender.click()

In [235]:
#Click 'Limit to Sitting Judges'
current_button = driver.find_element_by_xpath("//*[@id='views_ef_fieldset-wrapper']/div/div/fieldset/div/fieldset[6]/legend/span/a")
current_button.click()
time.sleep(1)

In [236]:
#Click "All Sitting Judges"
sit_button = driver.find_element_by_id("edit-limit-sitting-judges-1")
sit_button.click()
time.sleep(1)

#Click Search Button
search = driver.find_element_by_id("edit-submit-history-judges-advanced-search")
search.click()
time.sleep(5)

In [237]:
#Click on the Window for Judges
judge = driver.find_element_by_class_name("colorbox-node")
judge.click()

In [239]:
#Scrape the Search Results Names with the intention of using RegEx to search
time.sleep(2)

female_DJ = []
count = 0

while (count < 269):
    rows = driver.find_elements_by_id('cboxContent')
    for row in rows:
        data = {
            'name': row.find_element_by_css_selector("h1").text
        }
    
    female_DJ.append(data)
    count = count + 1
    driver.find_element_by_id("cboxNext").click()
    time.sleep(5)
                
import pandas as pd
female_DJ = pd.DataFrame(female_DJ)
female_DJ.to_csv("female_district_judges.csv", index=False)
female_DJ.head()

Unnamed: 0,name
0,"Abrams, Leslie Joyce"
1,"Abrams, Ronnie"
2,"Aiken, Ann L."
3,"Allen, Arenda Lauretta Wright"
4,"Altonaga, Cecilia M."


In [18]:
female_DJ = pd.read_csv("female_district_judges.csv", na_values=['nan'])
female_DJ = list(female_DJ['name'])
female_DJ

['Abrams, Leslie Joyce',
 'Abrams, Ronnie',
 'Aiken, Ann L.',
 'Allen, Arenda Lauretta Wright',
 'Altonaga, Cecilia M.',
 'Alvarez, Micaela',
 'Ambrose, Donetta W.',
 'Amon, Carol Bagley',
 'Arguello, Christine M.',
 'Arleo, Madeline Cox',
 'Armijo, M. Christina',
 'Armstrong, Saundra Brown',
 'Arterton, Janet Bond',
 'Atlas, Nancy Friedman',
 'Axon, Annemarie Carney',
 'Aycock, Sharion',
 'Azrack, Joan Marie',
 'Baker, Kristine Gerhard',
 'Barker, Sarah Evans',
 'Bashant, Cynthia Ann',
 'Battani, Marianne O.',
 'Batts, Deborah A.',
 'Beckwith, Sandra Shank',
 'Beetlestone, Wendy',
 'Bencivengo, Cathy Ann',
 'Berger, Irene Cornelia',
 'Berrigan, Helen Ginger',
 'Biggs, Loretta Copeland',
 'Bissoon, Cathy',
 'Blackburn, Sharon Lovelace',
 'Blake, Catherine C.',
 'Bloom, Beth Francine',
 'Bolton, Susan Ritchie',
 'Boom, Claria Horn',
 'Bowdre, Karon O.',
 'Boyle, Jane J.',
 'Brinkema, Leonie M.',
 'Brodie, Margo Kitsy',
 'Brody, Anita Blumstein',
 'Brown, Anna J.',
 'Brown, Debra Marie',

In [19]:
#Apply Function written to determine whether female list matches judge names

def get_gender(row):
    gender = "Male"
    
    for nm in female_DJ:
        if nm == row:
            gender = 'Female'
    return gender

dfd['gender'] = dfd['name'].apply(lambda x: get_gender(x))

In [20]:
dfd['gender'].head(10)

0    Female
1    Female
2      Male
3      Male
4      Male
5      Male
6    Female
7      Male
8    Female
9      Male
Name: gender, dtype: object

In [21]:
dfd.keys()

Index(['name', 'bio', 'last_name', 'first_name', 'court', 'president',
       'Presidential_Party', 'gender'],
      dtype='object')

In [22]:
dfd = dfd[['court', 'name', 'first_name', 'last_name', 'gender', 'president', 'Presidential_Party', 'bio']]

In [23]:
dfd.to_csv("final_district_courts.csv", index=False)