In [1]:
import pandas as pd

# Script assumes maps/simplified has the geojson files for creating court shapes

# Some possibly helpful comparisons....
# https://www.acslaw.org/judicial-nominations/change-in-court-composition/
# https://www.reddit.com/r/MapPorn/comments/18uxvdk/partisan_composition_of_every_district_courts/
# https://en.wikipedia.org/wiki/Judicial_appointment_history_for_United_States_federal_courts#:~:text=As%20of%20January%202%2C%202025%2C%20of%20the%20679%20district%20court,a%20majority%20in%204%20circuits.
# https://www.brookings.edu/articles/how-much-will-trumps-second-term-judicial-appointments-shift-court-balance/

judges_url = 'https://www.fjc.gov/sites/default/files/history/federal-judicial-service.csv'

judges_df_download = pd.read_csv(judges_url)

In [2]:
def fix_nominating_party(row):
    '''
    Fix nominating_party values that start with "None"
    If there is no nominating party (usually because of a recess appointment that did not result in a commission),
    use the last nominating party
    '''
    if row['nominating_party'].startswith("None"):
        previous_rows = judges_df[(judges_df['nid'] == row['nid']) & (judges_df['start_date'] < row['start_date'])]
        if not previous_rows.empty:
            last_valid_row = previous_rows[~previous_rows['nominating_party'].str.startswith("None")].sort_values('start_date', ascending=False).head(1)
            if not last_valid_row.empty:
                return last_valid_row['nominating_party'].values[0]
    return row['nominating_party']

# Define the cutoff date
# The start of Ronald Reagan's presidency
# shortly before 5th circuit was subdivided, creating 11th
cutoff = pd.Timestamp('1981-01-20')

# Preserve the original to avoid requesting the data again
judges_df = judges_df_download.copy()

# Fixing column headers by removing spaces, slashes and commas
judges_df.columns = judges_df.columns.str.lower().str.replace(' ', '_').str.replace('/', '_').str.replace(',', '_')

# The District Court of the District of Columbia was known by Supreme Court of the District of Columbia until 1936-ish
# We will just call it the District Court of the District of Columbia throughout the data.
judges_df['court_name'] = judges_df['court_name'].replace(r"U.S. District Court for the District of Columbia (Supreme Court of the District of Columbia)", "U.S. District Court for the District of Columbia")

# start date is either recess appointment date or commission date
judges_df['start_date'] = judges_df['recess_appointment_date'].fillna(judges_df['commission_date'])

# Note: most recent start date for judge without DEM/GOP nominating party is in 1886, Whig party
# Nominating party is the reappointing president, the appointing president if appointed on first try or George Washington if null.
judges_df['nominating_party'] = judges_df['party_of_reappointing_president'].fillna(judges_df['party_of_appointing_president']).fillna("George Washington")
# Except some were recess appointments and had None as the nominating party because it was a recess appointment. We apply the party of the last appointment.
judges_df['nominating_party'] = judges_df.apply(fix_nominating_party, axis=1)

# Eliminate the courts that we do not want.
# Circuit courts ended in 1911.
# "Supreme Court", 
judges_df = judges_df[
    judges_df['court_type'].isin(["U.S. District Court", "U.S. Court of Appeals"]) &
    (judges_df['court_name'] != "U.S. Court of Appeals for the Federal Circuit")
]

# Make sure date columns are datetime
judges_df['start_date'] = pd.to_datetime(judges_df['start_date'])
judges_df['termination_date'] = pd.to_datetime(judges_df['termination_date'])
judges_df['senior_status_date'] = pd.to_datetime(judges_df['senior_status_date'])

# Filter to judges serving on the cutoff date or later
judges_df = judges_df[
    (((judges_df['start_date'] <= cutoff) & ((judges_df['termination_date'].isna()) | (judges_df['termination_date'] >= cutoff)))
     | (judges_df['start_date'] > cutoff))
]


In [3]:
from collections import defaultdict

# Now, we identify all the events, 
# including appointments, senior status changes, and terminations.

# Step 1: Copy the judges_df
df = judges_df.copy()

# Step 2: Collect all relevant dates per judge
events = []

for _, row in df.iterrows():
    # Add appointment
    events.append({
        'date': row['start_date'],
        'court_name': row['court_name'],
        'court_type': row['court_type'],
        'party': row['nominating_party'],
        'judge_name': row['judge_name'],
        'change': 'add',
        'active': True
    })
    # Add senior status change
    if pd.notnull(row['senior_status_date']):
        events.append({
            'date': row['senior_status_date'],
            'court_name': row['court_name'],
            'court_type': row['court_type'],
            'party': row['nominating_party'],
            'judge_name': row['judge_name'],
            'change': 'senior',
            'active': False
        })
    # Add termination
    if pd.notnull(row['termination_date']):
        events.append({
            'date': row['termination_date'],
            'court_name': row['court_name'],
            'court_type': row['court_type'],
            'party': row['nominating_party'],
            'judge_name': row['judge_name'],
            'change': 'remove',
            'active': False
        })

events_df = pd.DataFrame(events)
events_df = events_df.sort_values('date')

# Step 3: Build running totals by court/date
records = []
judge_state = defaultdict(list)  # court_id -> list of judges (dicts)

for date, group in events_df.groupby('date'):
    courts = group[['court_name', 'court_type']].drop_duplicates()

    for court in courts.itertuples():
        key = (court.court_name, court.court_type)

        # Update judge state
        for _, event in group[group['court_name'] == court.court_name].iterrows():
            # Add appointment
            if event['change'] == 'add':
                judge_state[key].append({
                    'party': event['party'],
                    'active': True,
                    'name': event['judge_name']  # Add name
                })
            # Add senior status change
            elif event['change'] == 'senior':
                for j in judge_state[key]:
                    if j['party'] == event['party'] and j['active'] and j['name'] == event['judge_name']:
                        j['active'] = False
                        break
            # Add termination
            elif event['change'] == 'remove':
                for i, j in enumerate(judge_state[key]):
                    if j['party'] == event['party'] and j['name'] == event['judge_name']:
                        judge_state[key].pop(i)
                        break

        # Note: most recent start date for judge without DEM/GOP nominating party is in 1886, Whig party
        judges = judge_state[key]
        total_judges = len(judges)
        total_active_judges = sum(1 for j in judges if j['active'])
        total_dem = sum(1 for j in judges if j['party'] == 'Democratic')
        total_rep = sum(1 for j in judges if j['party'] == 'Republican')
        total_active_dem = sum(1 for j in judges if j['party'] == 'Democratic' and j['active'])
        total_active_rep = sum(1 for j in judges if j['party'] == 'Republican' and j['active'])
        judge_names = [j['name'] for j in judges]

        records.append({
            'date': date,
            'court_name': court.court_name,
            'court_type': court.court_type,
            'total_judges': total_judges,
            'total_active_judges': total_active_judges,
            'total_dem': total_dem,
            'total_rep': total_rep,
            'total_active_dem': total_active_dem,
            'total_active_rep': total_active_rep,
            'judge_names': judge_names
        })

result_df = pd.DataFrame(records)

# Ensure the first event for each court_name has the cutoff date
first_event_date = cutoff

# Group by court_name and find the most recent event on or before cutoff
adjusted_records = []
for court_name, group in result_df.groupby('court_name'):
    group_before_cutoff = group[group['date'] <= first_event_date]
    if not group_before_cutoff.empty:
        latest_event = group_before_cutoff.iloc[-1]
        adjusted_record = latest_event.copy()
        adjusted_record['date'] = first_event_date
        adjusted_records.append(adjusted_record)

# Remove records on or before cutoff
result_df = result_df[result_df['date'] > first_event_date]

# Add the adjusted records to the result_df
if adjusted_records:
    adjusted_df = pd.DataFrame(adjusted_records)
    result_df = pd.concat([result_df, adjusted_df], ignore_index=True)

# Sort the result_df by date again
result_df = result_df.sort_values('date').reset_index(drop=True)



In [4]:
# Compiling a unique list of court iterations

# There are only three court districts that no longer exist (since at least 1912).
# the eastern district of illinois, and the western and eastern districts of SC.
# eastern illinois ended on 1979-03-31
# The SC districts started 1912-01-01 and ended 1965-11-01, when state went back to one district.
# note that some senior judges remained assigned to the defunct districts for several more years
# until their death

appeals_court_states = {
'U.S. Court of Appeals for the District of Columbia Circuit': ['District of Columbia'],
'U.S. Court of Appeals for the Eighth Circuit': ['Nebraska', 'Minnesota', 'Iowa', 'Arkansas', 'South Dakota', 'Missouri', 'North Dakota'],
'U.S. Court of Appeals for the Eleventh Circuit': ['Alabama', 'Georgia', 'Florida'],
'U.S. Court of Appeals for the Fifth Circuit': ['Louisiana', 'Texas', 'Mississippi'],
'U.S. Court of Appeals for the First Circuit': ['Rhode Island', 'New Hampshire', 'Massachusetts', 'Maine', 'Puerto Rico'],
'U.S. Court of Appeals for the Fourth Circuit': ['Maryland', 'South Carolina', 'West Virginia', 'North Carolina', 'Virginia'],
'U.S. Court of Appeals for the Ninth Circuit': ['Montana', 'Washington', 'Idaho', 'Arizona', 'Nevada', 'Alaska', 'California', 'Oregon', 'Hawaii', 'Guam', 'Northern Marianas Islands'],
'U.S. Court of Appeals for the Second Circuit': ['Vermont', 'Connecticut', 'New York'],
'U.S. Court of Appeals for the Seventh Circuit': ['Wisconsin', 'Illinois', 'Indiana'],
'U.S. Court of Appeals for the Sixth Circuit': ['Tennessee', 'Ohio', 'Kentucky', 'Michigan'],
'U.S. Court of Appeals for the Tenth Circuit': ['Kansas', 'Colorado', 'New Mexico', 'Utah', 'Wyoming', 'Oklahoma'],
'U.S. Court of Appeals for the Third Circuit': ['US Virgin Islands', 'Pennsylvania', 'New Jersey', 'Delaware'],
}

unique_courts = result_df.groupby(['court_type', 'court_name']).agg(
    min_start_date=('date', 'min'),
    max_end_date=('date', 'max')
).reset_index()

# Rename agg columns
unique_courts.rename(columns={'min_start_date': 'start_date', 'max_end_date': 'end_date'}, inplace=True)

# Assign the correct states to the circuit courts
# Note that New Mexico, Arizona, Alaska, Hawaii and Puerto Rico became states/got districts
# as long ago as 1910, but we largely ignore this due to a much later cutoff date, for now (possibly).
unique_courts['states'] = [[] for _ in range(len(unique_courts))] # Initialize with empty lists
unique_courts['states'] = unique_courts['court_name'].map(appeals_court_states).combine_first(unique_courts['states'])

good_courts = unique_courts.copy()

# Make manual changes to account for adding and removing certain courts.
# Start/end dates are somtimes one day before/after the legislative start/end dates
# to distinguish them from other affected courts.
# However, remember that the start/end dates are also driven by when judges were actually
# seated in a new court.
# The below is largely ignored (and incomplete) due to the 
# later cutoff date, but we will keep it for now.

# Indiana was subdivided into two districts
unique_courts.loc[unique_courts['court_name'] == 'U.S. District Court for the District of Indiana', 
                  'end_date'] = pd.to_datetime('1928-04-20') # End a day before the legislative end date

# Replaced with another district
unique_courts.loc[unique_courts['court_name'] == 'U.S. District Court for the Eastern District of Illinois', 
                  'end_date'] = pd.to_datetime('1979-03-30') # End a day before the legislative end date

# Modify District of South Carolina so it starts after its prior subdivision, which begins in 1912-01-01
unique_courts.loc[unique_courts['court_name'] == 
                  'U.S. District Court for the District of South Carolina', 
                  'end_date'] = pd.to_datetime('1911-12-31') # End a day before the legislative start date
duplicate = unique_courts[
                    unique_courts['court_name'] == 
                    'U.S. District Court for the District of South Carolina'].copy()
duplicate['start_date'] = pd.to_datetime('1965-11-01') # End a day before the legislative end date
duplicate['end_date'] = pd.to_datetime('2262-01-01') # End a day before the legislative end date
unique_courts = pd.concat([unique_courts, duplicate], ignore_index=True)

# Fix end dates for other now-defunct SC courts
unique_courts.loc[unique_courts['court_name'] == 
                  'U.S. District Court for the Eastern District of South Carolina', 
                  'end_date'] = pd.to_datetime('1965-10-31') # End a day before the legislative start date
unique_courts.loc[unique_courts['court_name'] == 
                  'U.S. District Court for the Western District of South Carolina', 
                  'end_date'] = pd.to_datetime('1965-10-31') # End a day before the legislative start date

# Create second iteration of Fifth Circuit
unique_courts.loc[unique_courts['court_name'] == 
                  'U.S. Court of Appeals for the Fifth Circuit', 
                  'end_date'] = pd.to_datetime('1981-09-30') # End a day before the legislative start date
unique_courts.at[
    unique_courts.index[unique_courts['court_name'] == 'U.S. Court of Appeals for the Fifth Circuit'][0], 
    'states'] = ['Louisiana', 'Texas', 'Mississippi','Alabama','Georgia','Florida']
duplicate = unique_courts[
                    unique_courts['court_name'] == 
                    'U.S. Court of Appeals for the Fifth Circuit'].copy()
duplicate['start_date'] = pd.to_datetime('1981-10-01') # Start day of legislative start date
duplicate['end_date'] = pd.to_datetime('2262-01-01')
duplicate['states'] = [['Louisiana', 'Texas', 'Mississippi']]
unique_courts = pd.concat([unique_courts, duplicate], ignore_index=True)

# Create second iteration of eighth circuit
eighth_states = ['Nebraska', 'Minnesota', 'Iowa', 'Arkansas', 'South Dakota', 'Missouri', 'North Dakota']
tenth_states = ['Kansas', 'Colorado', 'New Mexico', 'Utah', 'Wyoming', 'Oklahoma']
unique_courts.loc[unique_courts['court_name'] == 
                  'U.S. Court of Appeals for the Eighth Circuit', 
                  'end_date'] = pd.to_datetime('1929-03-27') # End a day before the legislative start date
unique_courts.at[
    unique_courts.index[unique_courts['court_name'] == 'U.S. Court of Appeals for the Eighth Circuit'][0], 
    'states'] = eighth_states + tenth_states
duplicate = unique_courts[
                    unique_courts['court_name'] == 
                    'U.S. Court of Appeals for the Eighth Circuit'].copy()
duplicate['start_date'] = pd.to_datetime('1929-03-28') # Start day of legislative start date
duplicate['end_date'] = pd.to_datetime('2262-01-01')
duplicate['states'] = [eighth_states]
unique_courts = pd.concat([unique_courts, duplicate], ignore_index=True)

# Sort so oldest iteration of each court is first
unique_courts = unique_courts.sort_values(['court_type','court_name','start_date'])

# Add an ID column
unique_courts['id'] = range(1, len(unique_courts) + 1)

# Fix the end dates so that they are WAY in the future, max date for pd datetime
unique_courts.loc[unique_courts['end_date'] > '1982-01-01', 'end_date'] = pd.to_datetime('2262-01-01')

# Reorder columns
unique_courts = unique_courts[['id', 'court_type', 'court_name', 'start_date', 'end_date', 'states']]

# Extract district_name and state_name for "U.S. District Court"
# This is all necessary for joining to the geojson files
def extract_district_and_state(court_name):
    if "U.S. District Court for the" in court_name:
        parts = court_name.replace("U.S. District Court for the ", "").split(" of ")
        if len(parts) == 2:
            if parts[1] == 'Columbia':
                return f"{parts[0]} of {parts[1]}", f"{parts[0]} of {parts[1]}", [f"{parts[0]} of {parts[1]}"]
            elif parts[0] == 'District':
                return f"{parts[0]} of {parts[1]}", f"{parts[0]} of {parts[1]}", [parts[1]]
            else:
                return parts[0], f"{parts[0]} of {parts[1]}", [parts[1]]
    raise ValueError(f"Unexpected court name format: {court_name}")

unique_courts['partial_district'] = ''
unique_courts['full_district'] = ''
mask = unique_courts['court_type'] == 'U.S. District Court'
results = unique_courts.loc[mask, 'court_name'].apply(extract_district_and_state)
unique_courts.loc[mask, ['partial_district', 'full_district', 'states']] = pd.DataFrame(results.tolist(), index=results.index, columns=['partial_district', 'full_district', 'states'])

# Add a boolean called in_box to indicate whether it will be in the inset map
unique_courts['in_box'] = False


In [5]:
import os
import json
import geopandas as gpd

# All spatial data is in SRID 4326
# A lot of this is a start to having shapes change as the courts changed,
# but little is needed due to a more recent cutoff date.
# Among other things, we have to account for each time districts within a state changed
# as the creation/deletion would have led to changes in the shape of the state's other districts.
# This occurred in several states since 1912, including IL, LA, CA, FL, IN, NC, GA, OK
# The circuit shapes also should have changed when AK, PR and HI became states.

# If the start date of the court is prior to the end date of the shapefile, add it to geojson.
# The court's map start date would be the later of the court's start date or the last start date in the geojson for that court.
# The court's map end date would be the earlier of the court's end date or the map's end date.

def change_last_end_date(court_id, courts_geojson, new_end_date):
    mask = courts_geojson['court_id'] == court_id
    if not courts_geojson[mask].empty:
        idx = courts_geojson.loc[mask, 'end_date'].idxmax()
        courts_geojson.at[idx, 'end_date'] = new_end_date
    else:
        raise ValueError(f"No court found with ID {court_id} for new end date.")

cutoff_date = cutoff

courts_geojson = gpd.GeoDataFrame(columns=['court_id','court_type','court_name','start_date','end_date',
                                           'in_box','map_info','geometry'], geometry='geometry', crs='EPSG:4326')
src_folder = r'maps\simplified'
gj_files = sorted([f for f in os.listdir(src_folder) if f.endswith(".json")])

for file_idx, file in enumerate(gj_files):
    gj_filename = os.path.join(src_folder, file)
    #print(gj_filename)

    map_year = int(file.split('.')[0].split('_')[0])
    map_start_date = pd.to_datetime(f'{map_year}-01-01')
    map_end_date = pd.to_datetime(f'{map_year+9}-12-31')

    if map_end_date < cutoff_date:
        #print(f"Skipping {file} because {map_end_date} is less than cutoff date.")
        continue

    gj_file = gpd.read_file(gj_filename)

    # Accounting for differences in field names in 2010 file
    state_field_key = 'state_terr' if 'state_terr' in gj_file.columns else 'state'
    district_field_key = 'judicial_2' if 'judicial_2' in gj_file.columns else 'name'
    df_district_key = 'partial_district' if 'judicial_2' in gj_file.columns else 'full_district'

    for idx, row in unique_courts.iterrows():
        if row['end_date'] < cutoff_date: # South carolina before it was subdivided on the cutoff date
            #print(f"Skipping {row['court_name']} because {row['end_date']} is less than cutoff date.")
            continue
        elif row['start_date'] > map_end_date:
            #print(f"Skipping {row['court_name']} because {row['start_date']} is greater than {map_end_date}.")
            continue
        elif row['end_date'] < map_start_date:
            #print(f"Skipping {row['court_name']} because {row['end_date']} is lesss than {map_start_date}.")
            continue

        if row['court_type'] == 'U.S. Court of Appeals':
            states_list = row['states']
            match = gj_file[gj_file[state_field_key].isin(states_list)]
            geometry = match.dissolve(by=state_field_key,
                                    aggfunc={state_field_key: lambda x: list(set(x))}
                                    ).union_all(method='unary') if not match.empty else None
        else:
            match = gj_file[
                (gj_file[state_field_key] == row['states'][0]) &
                (gj_file[district_field_key] == row[df_district_key])
            ]
            geometry = match.iloc[0].geometry if not match.empty else None

        if match.empty and row['end_date'] < map_end_date: # the court ended during this map period
            # so, we need to update the shape that was last added to courts_geojson for this court
            # to insert the court's end date and ensure that prior shape will display
            change_last_end_date(row['id'], courts_geojson, row['end_date'])
        elif match.empty:
            #print(f"No match found for {row['court_name']} with start date of {row['start_date']} in shapefile {file}.")
            continue
        
        if file_idx == len(gj_files) - 1:
            end_date = pd.to_datetime('2262-01-01')
        else:
            end_date = min(row['end_date'], map_end_date)

        # For the first file, we will set the start date to the later of the cutoff date or the court's start date
        # if it is the first time this court is being added, we will set the start date to the earlier of the court start date or the map start date
        # otherwise, the start date is the map start date
        last_start_date = courts_geojson.loc[courts_geojson['court_id'] == row['id'], 'start_date'].max() or None
        if file_idx == 0 or (row['court_type'] == 'U.S. Court of Appeals' and pd.isna(last_start_date)):
            start_date = max(row['start_date'], cutoff_date)
        elif row['start_date'] > map_start_date:
            start_date = max(row['start_date'], map_start_date) if pd.isna(last_start_date) else map_start_date
        else:
            start_date = min(row['start_date'], map_start_date) if pd.isna(last_start_date) else map_start_date

        # get some info about the geometry added to the geojson        
        map_info = f'{match[state_field_key].tolist()} - {file}'

        # Append row to courts_geojson
        courts_geojson.loc[len(courts_geojson)] = {
            'court_id': row['id'],
            'court_type': row['court_type'],
            'court_name': row['court_name'],
            'start_date': start_date.strftime('%Y-%m-%d'),
            'end_date': end_date.strftime('%Y-%m-%d'),
            'in_box': row['in_box'],
            'map_info': map_info,
            'geometry': geometry
        }

# Now, we go through and fix things for those 
# that will be displayed or not
# Populate those columns correctly
# Remove geometry for any courts not being displayed

only_in_box = {'U.S. District Court for the District of Alaska': 'AK',
               'U.S. District Court for the District of Hawaii': 'HI',
               'U.S. District Court for the District of Puerto Rico': 'PR',}
also_in_box = {'U.S. Court of Appeals for the District of Columbia Circuit': 'DC',
               'U.S. District Court for the District of Columbia': 'DC',
               'U.S. District Court for the District of Rhode Island': 'RI'}

for idx, row in courts_geojson.iterrows():
    if row['court_name'] in only_in_box:
        courts_geojson.at[idx, 'in_box'] = True
        courts_geojson.at[idx, 'geometry'] = None
        courts_geojson.at[idx, 'abbr'] = only_in_box[row['court_name']]
    elif row['court_name'] in also_in_box:
        courts_geojson.at[idx, 'in_box'] = True
        courts_geojson.at[idx, 'abbr'] = also_in_box[row['court_name']]

# Need to remove any keys with null values from each feature, to save space.
# Convert GeoDataFrame to GeoJSON string
geojson_str = courts_geojson.to_json()

# Parse it
data = json.loads(geojson_str)

# Strip out null-valued keys from properties
for feature in data['features']:
    props = feature['properties']
    feature['properties'] = {k: v for k, v in props.items() if v is not None}

# Save to file with compact formatting
with open('docs\\courts.geojson', 'w') as f:
    json.dump(data, f, separators=(',', ':'))



In [6]:
from collections import defaultdict
import json

# Now, create the slimmed down judges data json

# Filter to dates on or after cutoff
df = result_df[result_df['date'] >= cutoff].copy()

# Create the nested dictionary
result = defaultdict(dict)

for _, row in df.iterrows():
    court = unique_courts[(unique_courts['court_name'] == row['court_name']) &
                          (unique_courts['start_date'] <= row['date'])
                          ].sort_values('start_date', ascending=False).iloc[0]
    court_id = int(court['id'])
    date_str = row['date'].strftime('%Y-%m-%d')
    rep_act_percentage = row['total_active_rep'] / row['total_active_judges'] if row['total_active_judges'] > 0 else -1
    rep_percentage = row['total_rep'] / row['total_judges'] if row['total_judges'] > 0 else -1
    result[court_id][date_str] = [row['total_active_judges'],row['total_active_dem'],row['total_active_rep'],rep_act_percentage,
                                  row['total_judges'],row['total_dem'],row['total_rep'],rep_percentage]

# Convert to regular dict for JSON output
#json_output = json.dumps(result, indent=2)
json_output = json.dumps(result, separators=(',', ':'))

# Save to file
with open('docs\\judges.json', 'w') as f:
    f.write(json_output)