# Preparing the data for the map template

## Editorial decisions:
- whether or not to include circuit courts?
- whether or not to include federeal courts in areas outside of the U.S. such as Puerto Rico, Marshal islands, etc.
- whether or not to include only the Judges
- whether or not to remove the male nominees

In [1]:
import pandas as pd
from glob import glob
import re
import os
import requests
from bs4 import BeautifulSoup

In [2]:
df = pd.read_csv('biden_nominees.csv')
df

Unnamed: 0,name,district,court
0,Bridget Meehan Brennan,Northern District of Ohio,District Court
1,Victoria Marie Calvert,Northern District of Georgia,District Court
2,John H. Chun,Western District of Washington,District Court
3,Samantha D. Elliott,District of New Hampshire,District Court
4,Charles Esque Fleming,Northern District of Ohio,District Court
...,...,...,...
226,Judge Ana Isabel de Alba,Eastern District of California,District Court
227,Robert Steven Huie,Southern District of California,District Court
228,Natasha C. Merle,Eastern District of New York,District Court
229,Jennifer H. Rearden,Southern District of New York,District Court


In [3]:
df['district'] = df['district'].astype(str)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 231 entries, 0 to 230
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   name      231 non-null    object
 1   district  231 non-null    object
 2   court     227 non-null    object
dtypes: object(3)
memory usage: 5.5+ KB


In [5]:
def transform_district(district):
    # Remove "District of" if present
    district = district.replace("District of", "").strip()

    # Split the remaining words
    words = district.split()

    # Check if there are at least two words
    if len(words) >= 2:
        # Switch the first and last words
        transformed_district = f"{words[-1]} {words[0]}"
        return transformed_district
    else:
        return district

In [6]:
df

Unnamed: 0,name,district,court
0,Bridget Meehan Brennan,Northern District of Ohio,District Court
1,Victoria Marie Calvert,Northern District of Georgia,District Court
2,John H. Chun,Western District of Washington,District Court
3,Samantha D. Elliott,District of New Hampshire,District Court
4,Charles Esque Fleming,Northern District of Ohio,District Court
...,...,...,...
226,Judge Ana Isabel de Alba,Eastern District of California,District Court
227,Robert Steven Huie,Southern District of California,District Court
228,Natasha C. Merle,Eastern District of New York,District Court
229,Jennifer H. Rearden,Southern District of New York,District Court


In [7]:
df['properties.name'] = df['district'].apply(lambda x: x.replace("District of", "").strip())

In [8]:
df

Unnamed: 0,name,district,court,properties.name
0,Bridget Meehan Brennan,Northern District of Ohio,District Court,Northern Ohio
1,Victoria Marie Calvert,Northern District of Georgia,District Court,Northern Georgia
2,John H. Chun,Western District of Washington,District Court,Western Washington
3,Samantha D. Elliott,District of New Hampshire,District Court,New Hampshire
4,Charles Esque Fleming,Northern District of Ohio,District Court,Northern Ohio
...,...,...,...,...
226,Judge Ana Isabel de Alba,Eastern District of California,District Court,Eastern California
227,Robert Steven Huie,Southern District of California,District Court,Southern California
228,Natasha C. Merle,Eastern District of New York,District Court,Eastern New York
229,Jennifer H. Rearden,Southern District of New York,District Court,Southern New York


In [9]:
# Specify the words to move to the end
words_to_move_to_end = ['Northern', 'Southern', 'Western', 'Eastern', 'Middle']

# Apply lambda function to 'properties.name' column
df['properties.name'] = df['properties.name'].apply(
    lambda x: ' '.join([word for word in x.split() if word not in words_to_move_to_end] +
                       [word for word in x.split() if word in words_to_move_to_end]).strip()
    if isinstance(x, str) else x
)

In [10]:
df

Unnamed: 0,name,district,court,properties.name
0,Bridget Meehan Brennan,Northern District of Ohio,District Court,Ohio Northern
1,Victoria Marie Calvert,Northern District of Georgia,District Court,Georgia Northern
2,John H. Chun,Western District of Washington,District Court,Washington Western
3,Samantha D. Elliott,District of New Hampshire,District Court,New Hampshire
4,Charles Esque Fleming,Northern District of Ohio,District Court,Ohio Northern
...,...,...,...,...
226,Judge Ana Isabel de Alba,Eastern District of California,District Court,California Eastern
227,Robert Steven Huie,Southern District of California,District Court,California Southern
228,Natasha C. Merle,Eastern District of New York,District Court,New York Eastern
229,Jennifer H. Rearden,Southern District of New York,District Court,New York Southern


In [11]:
df = df.drop(columns=df.columns[1])

In [12]:
df

Unnamed: 0,name,court,properties.name
0,Bridget Meehan Brennan,District Court,Ohio Northern
1,Victoria Marie Calvert,District Court,Georgia Northern
2,John H. Chun,District Court,Washington Western
3,Samantha D. Elliott,District Court,New Hampshire
4,Charles Esque Fleming,District Court,Ohio Northern
...,...,...,...
226,Judge Ana Isabel de Alba,District Court,California Eastern
227,Robert Steven Huie,District Court,California Southern
228,Natasha C. Merle,District Court,New York Eastern
229,Jennifer H. Rearden,District Court,New York Southern


## Groupby nominees

In order to show the number of nominees per district in the map, the DataFrame should include a groupby. Where properties.headline is the number of nominees for each of the districts, and properties.article contains the names of those nominees.

In [13]:
df['headline'] = df.groupby('properties.name')['name'].transform('count')

In [14]:
df

Unnamed: 0,name,court,properties.name,headline
0,Bridget Meehan Brennan,District Court,Ohio Northern,4
1,Victoria Marie Calvert,District Court,Georgia Northern,3
2,John H. Chun,District Court,Washington Western,4
3,Samantha D. Elliott,District Court,New Hampshire,1
4,Charles Esque Fleming,District Court,Ohio Northern,4
...,...,...,...,...
226,Judge Ana Isabel de Alba,District Court,California Eastern,4
227,Robert Steven Huie,District Court,California Southern,9
228,Natasha C. Merle,District Court,New York Eastern,6
229,Jennifer H. Rearden,District Court,New York Southern,6


In [15]:
def create_article(names):
    return ', '.join(names)

df['article'] = df.groupby('properties.name')['name'].transform(lambda x: create_article(x.unique()))
df

Unnamed: 0,name,court,properties.name,headline,article
0,Bridget Meehan Brennan,District Court,Ohio Northern,4,"Bridget Meehan Brennan, Charles Esque Fleming,..."
1,Victoria Marie Calvert,District Court,Georgia Northern,3,"Victoria Marie Calvert, Sarah Elisabeth Geragh..."
2,John H. Chun,District Court,Washington Western,4,"John H. Chun, Kymberly Evanson, Jamal Whitehea..."
3,Samantha D. Elliott,District Court,New Hampshire,1,Samantha D. Elliott
4,Charles Esque Fleming,District Court,Ohio Northern,4,"Bridget Meehan Brennan, Charles Esque Fleming,..."
...,...,...,...,...,...
226,Judge Ana Isabel de Alba,District Court,California Eastern,4,"Kirk E. Sherriff, Judge Jennifer L. Thurston, ..."
227,Robert Steven Huie,District Court,California Southern,9,"Linda Lopez, Jinsook Ohta, Judge Ruth Bermudez..."
228,Natasha C. Merle,District Court,New York Eastern,6,"Hector Gonzalez, Nina Morrison, Orelia Merchan..."
229,Jennifer H. Rearden,District Court,New York Southern,6,"Dale E. Ho, Jessica G. L. Clarke, Jennifer L. ..."


### Removing the extra columns

In [16]:
df = df.drop(columns=['name'])

In [17]:
df = df.drop(columns=['court'])

In [18]:
df

Unnamed: 0,properties.name,headline,article
0,Ohio Northern,4,"Bridget Meehan Brennan, Charles Esque Fleming,..."
1,Georgia Northern,3,"Victoria Marie Calvert, Sarah Elisabeth Geragh..."
2,Washington Western,4,"John H. Chun, Kymberly Evanson, Jamal Whitehea..."
3,New Hampshire,1,Samantha D. Elliott
4,Ohio Northern,4,"Bridget Meehan Brennan, Charles Esque Fleming,..."
...,...,...,...
226,California Eastern,4,"Kirk E. Sherriff, Judge Jennifer L. Thurston, ..."
227,California Southern,9,"Linda Lopez, Jinsook Ohta, Judge Ruth Bermudez..."
228,New York Eastern,6,"Hector Gonzalez, Nina Morrison, Orelia Merchan..."
229,New York Southern,6,"Dale E. Ho, Jessica G. L. Clarke, Jennifer L. ..."


In [19]:
#Save file as .csv
#df.to_csv('', index=False)