In [4]:
import pandas as pd

judges_url = 'https://www.fjc.gov/sites/default/files/history/federal-judicial-service.csv'

judges_df_download = pd.read_csv(judges_url)

In [5]:
# Fix nominating_party values that start with "None"
# If there is no nominating party (usually because of a recess appointment that did not result in a commission),
# use the last nominating party
def fix_nominating_party(row):
    if row['nominating_party'].startswith("None"):
        previous_rows = judges_df[(judges_df['nid'] == row['nid']) & (judges_df['start_date'] < row['start_date'])]
        if not previous_rows.empty:
            last_valid_row = previous_rows[~previous_rows['nominating_party'].str.startswith("None")].sort_values('start_date', ascending=False).head(1)
            if not last_valid_row.empty:
                return last_valid_row['nominating_party'].values[0]
    return row['nominating_party']

# Preserve the original to avoid requesting the data again
judges_df = judges_df_download.copy()

# Fixing column headers by removing spaces, slashes and commas
judges_df.columns = judges_df.columns.str.lower().str.replace(' ', '_').str.replace('/', '_').str.replace(',', '_')

# start date is either recess appointment date or commission date
judges_df['start_date'] = judges_df['recess_appointment_date'].fillna(judges_df['commission_date'])

# Nominating party is the reappointing president, the appointing president if appointed on first try or George Washington if null.
judges_df['nominating_party'] = judges_df['party_of_reappointing_president'].fillna(judges_df['party_of_appointing_president']).fillna("George Washington")
# Except some were recess appointments and had None as the nominating party because it was a recess appointment. We apply the party of the last appointment.
judges_df['nominating_party'] = judges_df.apply(fix_nominating_party, axis=1)

# Eliminate the courts that we do not want.
# Courts of Appeals started in 1911. So, our start date will be Jan. 1, 1912.
judges_df = judges_df[judges_df['court_type'].isin(["Supreme Court", "U.S. District Court", "U.S. Court of Appeals"])]

# Make sure date columns are datetime
judges_df['start_date'] = pd.to_datetime(judges_df['start_date'])
judges_df['termination_date'] = pd.to_datetime(judges_df['termination_date'])
judges_df['senior_status_date'] = pd.to_datetime(judges_df['senior_status_date'])

# Define the cutoff date
cutoff = pd.Timestamp('1912-01-01')

# Filter to judges serving on the cutoff date or later
judges_df = judges_df[
    (((judges_df['start_date'] <= cutoff) & ((judges_df['termination_date'].isna()) | (judges_df['termination_date'] >= cutoff)))
     | (judges_df['start_date'] > cutoff))
]

# Some helpful comparisons, at least for circuit courts:
# https://www.acslaw.org/judicial-nominations/change-in-court-composition/
# https://www.reddit.com/r/MapPorn/comments/18uxvdk/partisan_composition_of_every_district_courts/
# https://en.wikipedia.org/wiki/Judicial_appointment_history_for_United_States_federal_courts#:~:text=As%20of%20January%202%2C%202025%2C%20of%20the%20679%20district%20court,a%20majority%20in%204%20circuits.



In [None]:
# Extract unique courts
unique_courts = judges_df[['court_type', 'court_name']].drop_duplicates().reset_index(drop=True)

# Add a sequential ID
unique_courts['id'] = range(1, len(unique_courts) + 1)

# Reorder columns
unique_courts = unique_courts[['id', 'court_type', 'court_name']]

# Extract district_name and state_name for "U.S. District Court"
def extract_district_and_state(court_name):
    if "U.S. District Court for the" in court_name:
        parts = court_name.replace("U.S. District Court for the ", "").split(" of ")
        if len(parts) == 2:
            if parts[0] = 'District':
                return f"{parts[0]} of {parts[1]}", parts[1]
            else:
                return parts[0], parts[1]
    return None, None

unique_courts['district_name'], unique_courts['state_name'] = zip(
    *unique_courts['court_name'].apply(extract_district_and_state)
)

print(unique_courts)

      id             court_type  \
0      1    U.S. District Court   
1      2    U.S. District Court   
2      3    U.S. District Court   
3      4  U.S. Court of Appeals   
4      5    U.S. District Court   
..   ...                    ...   
105  106    U.S. District Court   
106  107    U.S. District Court   
107  108    U.S. District Court   
108  109    U.S. District Court   
109  110    U.S. District Court   

                                            court_name      district_name  \
0     U.S. District Court for the District of Maryland           District   
1    U.S. District Court for the Southern District ...  Southern District   
2    U.S. District Court for the Eastern District o...   Eastern District   
3       U.S. Court of Appeals for the Eleventh Circuit               None   
4    U.S. District Court for the Northern District ...  Northern District   
..                                                 ...                ...   
105  U.S. District Court for the Eastern

In [6]:
import pandas as pd

# Assuming your DataFrame is called `df`
# and columns: start_date, termination_date, senior_status_date, nominating_party, court_name, court_type

# Step 1: Parse dates
df = judges_df.copy()
df['start_date'] = pd.to_datetime(df['start_date'])
df['termination_date'] = pd.to_datetime(df['termination_date'], errors='coerce')
df['senior_status_date'] = pd.to_datetime(df['senior_status_date'], errors='coerce')

# Step 2: Collect all relevant dates per judge
events = []

for _, row in df.iterrows():
    # Add appointment
    events.append({
        'date': row['start_date'],
        'court_name': row['court_name'],
        'court_type': row['court_type'],
        'party': row['nominating_party'],
        'judge_name': row['judge_name'],
        'change': 'add',
        'active': True
    })
    # Add senior status change
    if pd.notnull(row['senior_status_date']):
        events.append({
            'date': row['senior_status_date'],
            'court_name': row['court_name'],
            'court_type': row['court_type'],
            'party': row['nominating_party'],
            'judge_name': row['judge_name'],
            'change': 'senior',
            'active': False
        })
    # Add termination
    if pd.notnull(row['termination_date']):
        events.append({
            'date': row['termination_date'],
            'court_name': row['court_name'],
            'court_type': row['court_type'],
            'party': row['nominating_party'],
            'judge_name': row['judge_name'],
            'change': 'remove',
            'active': False
        })

events_df = pd.DataFrame(events)
events_df = events_df.sort_values('date')

# Step 3: Build running totals by court/date
from collections import defaultdict

records = []
judge_state = defaultdict(list)  # court_id -> list of judges (dicts)

for date, group in events_df.groupby('date'):
    courts = group[['court_name', 'court_type']].drop_duplicates()

    for court in courts.itertuples():
        key = (court.court_name, court.court_type)

        # Update judge state
        for _, event in group[group['court_name'] == court.court_name].iterrows():
            # Add appointment
            if event['change'] == 'add':
                judge_state[key].append({
                    'party': event['party'],
                    'active': True,
                    'name': event['judge_name']  # Add name
                })
            # Add senior status change
            elif event['change'] == 'senior':
                for j in judge_state[key]:
                    if j['party'] == event['party'] and j['active'] and j['name'] == event['judge_name']:
                        j['active'] = False
                        break
            # Add termination
            elif event['change'] == 'remove':
                for i, j in enumerate(judge_state[key]):
                    if j['party'] == event['party'] and j['name'] == event['judge_name']:
                        judge_state[key].pop(i)
                        break


        judges = judge_state[key]
        total_judges = len(judges)
        total_active_judges = sum(1 for j in judges if j['active'])
        total_dem = sum(1 for j in judges if j['party'] == 'Democratic')
        total_rep = sum(1 for j in judges if j['party'] == 'Republican')
        total_active_dem = sum(1 for j in judges if j['party'] == 'Democratic' and j['active'])
        total_active_rep = sum(1 for j in judges if j['party'] == 'Republican' and j['active'])
        judge_names = [j['name'] for j in judges]

        records.append({
            'date': date,
            'court_name': court.court_name,
            'court_type': court.court_type,
            'total_judges': total_judges,
            'total_active_judges': total_active_judges,
            'total_dem': total_dem,
            'total_rep': total_rep,
            'total_active_dem': total_active_dem,
            'total_active_rep': total_active_rep,
            'judge_names': judge_names
        })

result_df = pd.DataFrame(records)

# Limit to just the circuit courts
result_df = result_df[result_df['court_type'] == 'U.S. Court of Appeals']

# Map circuit names to numbers
circuit_map = {
    'First': 1,
    'Second': 2,
    'Third': 3,
    'Fourth': 4,
    'Fifth': 5,
    'Sixth': 6,
    'Seventh': 7,
    'Eighth': 8,
    'Ninth': 9,
    'Tenth': 10,
    'Eleventh': 11,
    'District of Columbia': 12,
    'Federal': 13
}

# Apply mapping
def extract_court_id(row):
    if row['court_type'] == 'U.S. Court of Appeals':
        for name, num in circuit_map.items():
            if name in row['court_name']:
                return f'{num}'
    return ''

result_df['court_id'] = result_df.apply(extract_court_id, axis=1)


In [7]:
import pandas as pd
from collections import defaultdict
import json

# Filter to dates on or after Jan 1, 1912
cutoff = pd.Timestamp("1912-01-01")

# 1. Get latest entry before cutoff for each court
pre_cutoff = result_df[result_df['date'] < cutoff]
latest_pre_cutoff = pre_cutoff.sort_values('date').groupby('court_name').tail(1).copy()

# 2. Set their date to the cutoff
latest_pre_cutoff['date'] = cutoff

# 3. Append to original DataFrame
trimmed_df = pd.concat([result_df, latest_pre_cutoff], ignore_index=True)

# 4. Filter out anything before the cutoff
df = trimmed_df[trimmed_df['date'] >= cutoff].sort_values(['court_name', 'date']).reset_index(drop=True)

# Create the nested dictionary
result = defaultdict(dict)

for _, row in df.iterrows():
    court_id = row['court_id']
    date_str = row['date'].strftime('%Y-%m-%d')
    rep_percentage = row['total_active_rep'] / row['total_active_judges'] if row['total_active_judges'] > 0 else 0
    result[court_id][date_str] = [row['total_judges'],rep_percentage]

# Convert to regular dict for JSON output
json_output = json.dumps(result, indent=2)

# Optional: Save to file
with open('docs\\court_judges.json', 'w') as f:
    f.write(json_output)



In [9]:
import geopandas as gpd

# Load shapefile (assumes it has .shp, .shx, .dbf, etc. in the same folder)
gdf = gpd.read_file(r"districts_map\US Attorney Districts Shapefile_20250504\geo_export_955d985f-a3d6-4717-9f9a-d0c540b3e5c2.shp")

# Group by 'district_n' (which represents the circuit) and dissolve into one shape per group
circuits_gdf = gdf.dissolve(by='district_n', as_index=False)

circuits_gdf['court_id'] = circuits_gdf['district_n']
circuits_gdf = circuits_gdf[['court_id','abbr','geometry']]
circuits_gdf['court_type'] = 'circuit'
circuits_gdf['start_date'] = '1900-01-01'
circuits_gdf['end_date'] = '9999-01-01'

def extract_court_name(row):
    for name, num in circuit_map.items():
        if num == int(row['court_id'].replace('DC','12')):
            return f'U.S. Court of Appeals for the {name} Circuit'
    return ''

circuits_gdf['court_name'] = circuits_gdf.apply(extract_court_name, axis=1)

#gdf.to_json()

# Or save directly to a GeoJSON file
circuits_gdf.to_file(r"docs\circuits.geojson", driver="GeoJSON")

print(circuits_gdf.head())

  court_id abbr                                           geometry court_type  \
0        1   ME  MULTIPOLYGON (((-71.55556 41.20832, -71.55795 ...    circuit   
1       10   CO  POLYGON ((-103.04193 36.50036, -103.04175 36.3...    circuit   
2       11  ALM  MULTIPOLYGON (((-82.87366 24.62647, -82.8752 2...    circuit   
3        2  NYW  MULTIPOLYGON (((-74.0676 40.62387, -74.06315 4...    circuit   
4        3   NJ  MULTIPOLYGON (((-65.00283 18.30503, -65.00254 ...    circuit   

   start_date    end_date                                      court_name  
0  1900-01-01  9999-01-01     U.S. Court of Appeals for the First Circuit  
1  1900-01-01  9999-01-01     U.S. Court of Appeals for the Tenth Circuit  
2  1900-01-01  9999-01-01  U.S. Court of Appeals for the Eleventh Circuit  
3  1900-01-01  9999-01-01    U.S. Court of Appeals for the Second Circuit  
4  1900-01-01  9999-01-01     U.S. Court of Appeals for the Third Circuit  


In [2]:
circuits_gdf.head()

Unnamed: 0,court_id,abbr,geometry,court_type,start_date,end_date
0,1,ME,"MULTIPOLYGON (((-71.55556 41.20832, -71.55795 ...",circuit,1900-01-01,9999-01-01
1,10,CO,"POLYGON ((-103.04193 36.50036, -103.04175 36.3...",circuit,1900-01-01,9999-01-01
2,11,ALM,"MULTIPOLYGON (((-82.87366 24.62647, -82.8752 2...",circuit,1900-01-01,9999-01-01
3,2,NYW,"MULTIPOLYGON (((-74.0676 40.62387, -74.06315 4...",circuit,1900-01-01,9999-01-01
4,3,NJ,"MULTIPOLYGON (((-65.00283 18.30503, -65.00254 ...",circuit,1900-01-01,9999-01-01


In [None]:
from maplibre import Layer, LayerType, Map, MapOptions
from maplibre.sources import GeoJSONSource
from maplibre.utils import df_to_geojson, geopandas_to_geojson
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point
import random

# --- 1. Generate Fake Home Data ---
NUM_HOMES = 100
CENTER_LAT = 21.3069  # Honolulu Latitude
CENTER_LON = -157.8583 # Honolulu Longitude
RADIUS = 0.05 # Degree radius for random points

lats = []
lons = []
for_sale_status = []
year_built_status = []
ids = []

for i in range(NUM_HOMES):
    lat = CENTER_LAT + random.uniform(-RADIUS, RADIUS)
    lon = CENTER_LON + random.uniform(-RADIUS, RADIUS)
    for_sale = random.choice([True, False])
    year_built = random.randint(1900, 2023)

    lats.append(lat)
    lons.append(lon)
    for_sale_status.append(for_sale)
    year_built_status.append(year_built)
    ids.append(f"home_{i}")

# Create a Pandas DataFrame
df = pd.DataFrame({
    'id': ids,
    'latitude': lats,
    'longitude': lons,
    'for_sale': for_sale_status,
    'year_built': year_built_status
})

# --- 2. Create GeoDataFrame ---
# Convert the DataFrame to a GeoDataFrame
gdf = gpd.GeoDataFrame(
    df,
    geometry=gpd.points_from_xy(df.longitude, df.latitude),
    crs="EPSG:4326" # WGS 84 coordinate system
)

# Convert to GEOJSON and as source
gdf_json1 = gdf.to_json()
with open("docs\homes_data.geojson", "w") as file:
    file.write(gdf_json1)


#gdf_json2 = geopandas_to_geojson(gdf) # This one works because it requires dictionary


# homes = GeoJSONSource(data=geopandas_to_geojson(gdf))

# map_options = MapOptions(
#     center=(CENTER_LON, CENTER_LAT),
#     zoom=12,
#     hash=True,
# )

# m = Map(map_options)
# m.add_layer(
#     Layer(
#         id='homes1',
#         type=LayerType.CIRCLE,
#         source=homes,
#         paint={
#             "circle-color": ["match", ["get", "for_sale"], 1, 'red', 'blue'],
#             "circle-radius": 5,
#         },
#     )
# )

# out_filename = 'home.html'
# with open(out_filename, "w") as f:
#     f.write(m.to_html())


In [None]:
import json
from collections import Counter

# Load the GeoJSON data
gdf_data = json.loads(gdf_json1)

# Extract the 'year_built' property from each feature
year_built_list = [feature['properties']['year_built'] for feature in gdf_data['features']]

# Count the occurrences of each year1933
year_counts = Counter(year_built_list)

# Sort the counts in descending order
sorted_year_counts = sorted(year_counts.items(), key=lambda x: x[0], reverse=True)

# Print the sorted list
for year, count in sorted_year_counts:
    print(f"Year: {year}, Count: {count}")



# so we are going to be displaying polygons shaded by the percentage red or blue
# there are different polygons for district and circuit and these can change over time
# the districts changed each decade, affecting the circuit shape
# further the circuit (5) became two (11 and 5) in 1981
# We also want the user to be able to include or exclude senior judges
# So, if we want to go back to the start of the court of appeals in 1911,
# we will have ten sets of polygons for circuit (12-13) and district (94)
# So, like 1,100 polygons in the GeoJSON file
# AI suggests having one GEOJSON for the shapes and another for the data.
# The data would have an entry for each of the roughly 46,000 days
# Each day would have a district_id, circuit_id,
# number of active judges for each party and number of active+senior judges for each party

# Use this map for 2004 when Northern Mariana Islands was added to the 9th Circuit...
# https://data.ojp.usdoj.gov/Shapefile/US-Attorney-Districts-Shapefile/5fdt-n5ne/about_data
# We may need to let the data dictate when new districts, etc. were added


# We may have to have a box that displays things like Supreme Court, territories, DC, etc.




