In [7]:
import pandas as pd

judges_url = 'https://www.fjc.gov/sites/default/files/history/federal-judicial-service.csv'

judges_df_download = pd.read_csv(judges_url)

# so we are going to be displaying polygons shaded by the percentage red or blue
# there are different polygons for district and circuit and these can change over time
# the districts changed each decade, affecting the circuit shape
# further the circuit (5) became two (11 and 5) in 1981
# We also want the user to be able to include or exclude senior judges
# So, if we want to go back to the start of the court of appeals in 1911,
# we will have ten sets of polygons for circuit (12-13) and district (94)
# So, like 1,100 polygons in the GeoJSON file
# AI suggests having one GEOJSON for the shapes and another for the data.
# The data would have an entry for each of the roughly 46,000 days
# Each day would have a district_id, circuit_id,
# number of active judges for each party and number of active+senior judges for each party

# Use this map for 2004 when Northern Mariana Islands was added to the 9th Circuit...
# https://data.ojp.usdoj.gov/Shapefile/US-Attorney-Districts-Shapefile/5fdt-n5ne/about_data
# We may need to let the data dictate when new districts, etc. were added


# We may have to have a box that displays things like Supreme Court, territories, DC, etc.

In [8]:
# Fix nominating_party values that start with "None"
# If there is no nominating party (usually because of a recess appointment that did not result in a commission),
# use the last nominating party
def fix_nominating_party(row):
    if row['nominating_party'].startswith("None"):
        previous_rows = judges_df[(judges_df['nid'] == row['nid']) & (judges_df['start_date'] < row['start_date'])]
        if not previous_rows.empty:
            last_valid_row = previous_rows[~previous_rows['nominating_party'].str.startswith("None")].sort_values('start_date', ascending=False).head(1)
            if not last_valid_row.empty:
                return last_valid_row['nominating_party'].values[0]
    return row['nominating_party']

# Preserve the original to avoid requesting the data again
judges_df = judges_df_download.copy()

# Fixing column headers by removing spaces, slashes and commas
judges_df.columns = judges_df.columns.str.lower().str.replace(' ', '_').str.replace('/', '_').str.replace(',', '_')

# The District Court of the District of Columbia was known by Supreme Court of the District of Columbia until 1936-ish
# We will just call it the District Court of the District of Columbia throughout the data.
judges_df['court_name'] = judges_df['court_name'].replace(r"U.S. District Court for the District of Columbia (Supreme Court of the District of Columbia)", "U.S. District Court for the District of Columbia")

# start date is either recess appointment date or commission date
judges_df['start_date'] = judges_df['recess_appointment_date'].fillna(judges_df['commission_date'])

# Nominating party is the reappointing president, the appointing president if appointed on first try or George Washington if null.
judges_df['nominating_party'] = judges_df['party_of_reappointing_president'].fillna(judges_df['party_of_appointing_president']).fillna("George Washington")
# Except some were recess appointments and had None as the nominating party because it was a recess appointment. We apply the party of the last appointment.
judges_df['nominating_party'] = judges_df.apply(fix_nominating_party, axis=1)

# Eliminate the courts that we do not want.
# Courts of Appeals started in 1911. So, our start date will be Jan. 1, 1912.
judges_df = judges_df[judges_df['court_type'].isin(["Supreme Court", "U.S. District Court", "U.S. Court of Appeals"])]

# Make sure date columns are datetime
judges_df['start_date'] = pd.to_datetime(judges_df['start_date'])
judges_df['termination_date'] = pd.to_datetime(judges_df['termination_date'])
judges_df['senior_status_date'] = pd.to_datetime(judges_df['senior_status_date'])

# Define the cutoff date
cutoff = pd.Timestamp('1912-01-01')

# Filter to judges serving on the cutoff date or later
judges_df = judges_df[
    (((judges_df['start_date'] <= cutoff) & ((judges_df['termination_date'].isna()) | (judges_df['termination_date'] >= cutoff)))
     | (judges_df['start_date'] > cutoff))
]

# Some helpful comparisons, at least for circuit courts:
# https://www.acslaw.org/judicial-nominations/change-in-court-composition/
# https://www.reddit.com/r/MapPorn/comments/18uxvdk/partisan_composition_of_every_district_courts/
# https://en.wikipedia.org/wiki/Judicial_appointment_history_for_United_States_federal_courts#:~:text=As%20of%20January%202%2C%202025%2C%20of%20the%20679%20district%20court,a%20majority%20in%204%20circuits.



In [9]:
from collections import defaultdict

# Step 1: Parse dates
df = judges_df.copy()
df['start_date'] = pd.to_datetime(df['start_date'])
df['termination_date'] = pd.to_datetime(df['termination_date'], errors='coerce')
df['senior_status_date'] = pd.to_datetime(df['senior_status_date'], errors='coerce')

# Step 2: Collect all relevant dates per judge
events = []

for _, row in df.iterrows():
    # Add appointment
    events.append({
        'date': row['start_date'],
        'court_name': row['court_name'],
        'court_type': row['court_type'],
        'party': row['nominating_party'],
        'judge_name': row['judge_name'],
        'change': 'add',
        'active': True
    })
    # Add senior status change
    if pd.notnull(row['senior_status_date']):
        events.append({
            'date': row['senior_status_date'],
            'court_name': row['court_name'],
            'court_type': row['court_type'],
            'party': row['nominating_party'],
            'judge_name': row['judge_name'],
            'change': 'senior',
            'active': False
        })
    # Add termination
    if pd.notnull(row['termination_date']):
        events.append({
            'date': row['termination_date'],
            'court_name': row['court_name'],
            'court_type': row['court_type'],
            'party': row['nominating_party'],
            'judge_name': row['judge_name'],
            'change': 'remove',
            'active': False
        })

events_df = pd.DataFrame(events)
events_df = events_df.sort_values('date')

# Step 3: Build running totals by court/date

records = []
judge_state = defaultdict(list)  # court_id -> list of judges (dicts)

for date, group in events_df.groupby('date'):
    courts = group[['court_name', 'court_type']].drop_duplicates()

    for court in courts.itertuples():
        key = (court.court_name, court.court_type)

        # Update judge state
        for _, event in group[group['court_name'] == court.court_name].iterrows():
            # Add appointment
            if event['change'] == 'add':
                judge_state[key].append({
                    'party': event['party'],
                    'active': True,
                    'name': event['judge_name']  # Add name
                })
            # Add senior status change
            elif event['change'] == 'senior':
                for j in judge_state[key]:
                    if j['party'] == event['party'] and j['active'] and j['name'] == event['judge_name']:
                        j['active'] = False
                        break
            # Add termination
            elif event['change'] == 'remove':
                for i, j in enumerate(judge_state[key]):
                    if j['party'] == event['party'] and j['name'] == event['judge_name']:
                        judge_state[key].pop(i)
                        break


        judges = judge_state[key]
        total_judges = len(judges)
        total_active_judges = sum(1 for j in judges if j['active'])
        total_dem = sum(1 for j in judges if j['party'] == 'Democratic')
        total_rep = sum(1 for j in judges if j['party'] == 'Republican')
        total_active_dem = sum(1 for j in judges if j['party'] == 'Democratic' and j['active'])
        total_active_rep = sum(1 for j in judges if j['party'] == 'Republican' and j['active'])
        judge_names = [j['name'] for j in judges]

        records.append({
            'date': date,
            'court_name': court.court_name,
            'court_type': court.court_type,
            'total_judges': total_judges,
            'total_active_judges': total_active_judges,
            'total_dem': total_dem,
            'total_rep': total_rep,
            'total_active_dem': total_active_dem,
            'total_active_rep': total_active_rep,
            'judge_names': judge_names
        })

result_df = pd.DataFrame(records)



In [10]:
import geopandas as gpd

# Map circuit names to numbers
circuit_map = {
    'First': 1,
    'Second': 2,
    'Third': 3,
    'Fourth': 4,
    'Fifth': 5,
    'Sixth': 6,
    'Seventh': 7,
    'Eighth': 8,
    'Ninth': 9,
    'Tenth': 10,
    'Eleventh': 11,
    'District of Columbia': 12,
    'Federal': 13
}

# Load shapefile (assumes it has .shp, .shx, .dbf, etc. in the same folder)
circuits_gdf = gpd.read_file(r"districts_map\US Attorney Districts Shapefile_20250504\geo_export_955d985f-a3d6-4717-9f9a-d0c540b3e5c2.shp")

# Group by 'district_n' (which represents the circuit) and dissolve into one shape per group
# get a unique list of states for each circuit
circuits_gdf = circuits_gdf.dissolve(
    by='district_n',
    as_index=False,
    aggfunc={
        'state': lambda x: list(set(x))
    }
)


circuits_gdf['court_id'] = circuits_gdf['district_n']
circuits_gdf = circuits_gdf[['court_id','state','geometry']]
circuits_gdf['court_type'] = 'circuit'
circuits_gdf['start_date'] = '1900-01-01'
circuits_gdf['end_date'] = '9999-01-01'

def extract_court_name(row):
    for name, num in circuit_map.items():
        if num == int(row['court_id'].replace('DC','12')):
            return f'U.S. Court of Appeals for the {name} Circuit'
    return ''

circuits_gdf['court_name'] = circuits_gdf.apply(extract_court_name, axis=1)

#gdf.to_json()

# Or save directly to a GeoJSON file
circuits_gdf.to_file(r"docs\circuits.geojson", driver="GeoJSON")

circuits_df = circuits_gdf.drop(columns='geometry')

In [None]:

unique_courts = result_df.groupby(['court_type', 'court_name']).agg(
    min_start_date=('date', 'min'),
    max_end_date=('date', 'max')
).reset_index()

unique_courts = unique_courts.sort_values(['court_type','court_name'])

# Add an ID column
unique_courts['id'] = range(1, len(unique_courts) + 1)

# Rename agg columns
unique_courts.rename(columns={'min_start_date': 'start_date', 'max_end_date': 'end_date'}, inplace=True)

# Reorder columns
unique_courts = unique_courts[['id', 'court_type', 'court_name', 'start_date', 'end_date']]

# Make manual changes to account for adding and removing certain courts.
# When filtering for the shapes, we need to make sure the date is on or after start date and BEFORE end date.
# Or, we may be able to just filter out zero-judge courts?
# For now, we will make the end date of removed courts one day prior to their legislative end date.
unique_courts.loc[unique_courts['court_name'] == 'U.S. District Court for the Eastern District of Illinois', 
                  'end_date'] = '1979-03-30' # End a day before the legislative end date

# Create second iteration of District of South Carolina
unique_courts.loc[unique_courts['court_name'] == 
                  'U.S. District Court for the District of South Carolina', 
                  'end_date'] = '1911-12-31' # End a day before the legislative start date
duplicate = unique_courts[
                    unique_courts['court_name'] == 
                    'U.S. District Court for the District of South Carolina'].copy()
duplicate['start_date'] = '1965-11-01' # End a day before the legislative end date
unique_courts = pd.concat([unique_courts, duplicate], ignore_index=True)

# Fix end dates for other now-defunct SC courts
unique_courts.loc[unique_courts['court_name'] == 
                  'U.S. District Court for the Eastern District of South Carolina', 
                  'end_date'] = '1965-10-31' # End a day before the legislative start date
unique_courts.loc[unique_courts['court_name'] == 
                  'U.S. District Court for the Western District of South Carolina', 
                  'end_date'] = '1965-10-31' # End a day before the legislative start date

# Create second iteration of Fifth Circuit
unique_courts.loc[unique_courts['court_name'] == 
                  'U.S. Court of Appeals for the Fifth Circuit', 
                  'end_date'] = '1981-09-30' # End a day before the legislative start date
unique_courts.loc[unique_courts['court_name'] == 
                  'U.S. Court of Appeals for the Fifth Circuit', 
                  'states'] = ['Louisiana', 'Texas', 'Mississippi','Alabama','Georgia','Florida']
duplicate = unique_courts[
                    unique_courts['court_name'] == 
                    'U.S. Court of Appeals for the Fifth Circuit'].copy()
duplicate['start_date'] = '1981-10-01' # Start day of legislative start date
duplicate['states'] = ['Louisiana', 'Texas', 'Mississippi']
unique_courts = pd.concat([unique_courts, duplicate], ignore_index=True)

# Create second iteration of eighth circuit
eighth_states = ['Nebraska', 'Minnesota', 'Iowa', 'Arkansas', 'South Dakota', 'Missouri', 'North Dakota']
tenth_states = ['Kansas', 'Colorado', 'New Mexico', 'Utah', 'Wyoming', 'Oklahoma']
# Create second iteration of Fifth Circuit
unique_courts.loc[unique_courts['court_name'] == 
                  'U.S. Court of Appeals for the Fifth Circuit', 
                  'end_date'] = '1929-03-27' # End a day before the legislative start date
unique_courts.loc[unique_courts['court_name'] == 
                  'U.S. Court of Appeals for the Fifth Circuit', 
                  'states'] = eighth_states + tenth_states
duplicate = unique_courts[
                    unique_courts['court_name'] == 
                    'U.S. Court of Appeals for the Fifth Circuit'].copy()
duplicate['start_date'] = '1929-03-28' # Start day of legislative start date
duplicate['states'] = eighth_states
unique_courts = pd.concat([unique_courts, duplicate], ignore_index=True)



# Extract district_name and state_name for "U.S. District Court"
def extract_district_and_state(court_name):
    if "U.S. District Court for the" in court_name:
        parts = court_name.replace("U.S. District Court for the ", "").split(" of ")
        if len(parts) == 2:
            if parts[1] == 'Columbia':
                return f"{parts[0]} of {parts[1]}", f"{parts[0]} of {parts[1]}", f"{parts[0]} of {parts[1]}"
            elif parts[0] == 'District':
                return f"{parts[0]} of {parts[1]}", f"{parts[0]} of {parts[1]}", parts[1]
            else:
                return parts[0], f"{parts[0]} of {parts[1]}", parts[1]
    return None, None, None

unique_courts['partial_district'], unique_courts['full_district'], unique_courts['state_name'] = zip(
    *unique_courts['court_name'].apply(extract_district_and_state)
)

# Add two columns with blank strings, '', called 'displayed' and 'in_box'
unique_courts['displayed'] = ''
unique_courts['in_box'] = ''

unique_courts = unique_courts.merge(
    circuits_df[['court_name', 'state']],
    on='court_name',
    how='left'
)

unique_courts.rename(columns={'state': 'circuit_states'}, inplace=True)

# We need to fix 


unique_courts.to_csv(r"unique_courts.csv", index=False)

# This crosswalk should indicate if the court will be displayed or not and 
# whether it will be in the little box or not.
# It also should have notes about when each court came to be and its composition.
# Start date for each court should be the date first judge is seated.

# There are only three court districts that no longer exist.
# the eastern district of illinois, and the western and eastern districts of SC.
# eastern illinois ended on 1979-03-31
# The SC districts started 1912-01-01 and ended 1965-11-01, when state went back to one district.
# note that some senior judges remained assigned to the defunct districts for several more years
# until their death

# To generate geojson, we maybe start with Appeals

In [None]:
from collections import defaultdict
import json

# Limit to just the circuit courts
result_df = result_df[result_df['court_type'] == 'U.S. Court of Appeals']

# Apply mapping
def extract_court_id(row):
    if row['court_type'] == 'U.S. Court of Appeals':
        for name, num in circuit_map.items():
            if name in row['court_name']:
                return f'{num}'
    return ''

result_df['court_id'] = result_df.apply(extract_court_id, axis=1)

# Filter to dates on or after Jan 1, 1912
cutoff = pd.Timestamp("1912-01-01")

# 1. Get latest entry before cutoff for each court
pre_cutoff = result_df[result_df['date'] < cutoff]
latest_pre_cutoff = pre_cutoff.sort_values('date').groupby('court_name').tail(1).copy()

# 2. Set their date to the cutoff
latest_pre_cutoff['date'] = cutoff

# 3. Append to original DataFrame
trimmed_df = pd.concat([result_df, latest_pre_cutoff], ignore_index=True)

# 4. Filter out anything before the cutoff
df = trimmed_df[trimmed_df['date'] >= cutoff].sort_values(['court_name', 'date']).reset_index(drop=True)

# Create the nested dictionary
result = defaultdict(dict)

for _, row in df.iterrows():
    court_id = row['court_id']
    date_str = row['date'].strftime('%Y-%m-%d')
    rep_percentage = row['total_active_rep'] / row['total_active_judges'] if row['total_active_judges'] > 0 else 0
    result[court_id][date_str] = [row['total_judges'],rep_percentage]

# Convert to regular dict for JSON output
json_output = json.dumps(result, indent=2)

# Optional: Save to file
with open('docs\\court_judges.json', 'w') as f:
    f.write(json_output)

