In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

In [2]:
# Base URL of the website
base_url = "https://trac.syr.edu/immigration/reports/judgereports/"

In [3]:
# URL of the main page containing the table
url = "https://trac.syr.edu/immigration/reports/judgereports/"

In [4]:
# President and party mapping based on years
president_party_mapping = {
    (2021, 2024): ("Joe Biden", "Democrat"),
    (2017, 2020): ("Donald Trump", "Republican"),
    (2009, 2016): ("Barack Obama", "Democrat"),
    (2001, 2008): ("George W. Bush", "Republican"),
    (1993, 2000): ("Bill Clinton", "Democrat"),
    (1989, 1992): ("George H. W. Bush", "Republican"),
    (1981, 1988): ("Ronald Reagan", "Republican"),
    (1977, 1980): ("Jimmy Carter", "Democrat"),
    (1974, 1976): ("Gerald Ford", "Republican"),
    (1969, 1974): ("Richard Nixon", "Republican"),
    (1963, 1968): ("Lyndon B. Johnson", "Democrat"),
    (1961, 1963): ("John F. Kennedy", "Democrat"),
    # Add more as needed, or update with earlier years
}

# Function to determine the president and party based on the appointment year
def get_president_and_party(year):
    year = int(year)
    for years, (president, party) in president_party_mapping.items():
        if years[0] <= year <= years[1]:
            return president, party
    return None, None

In [5]:
# Send a GET request to fetch the content of the page
response = requests.get(url)
response.raise_for_status()  # Ensure the request was successful

In [6]:
# Parse the page content using BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')

In [7]:
# Find the main table
table = soup.find('table')

In [8]:
# Initialize variables
data = []
current_court = ""
rowspan_counter = 0

In [9]:
# Loop through each row in the table
for row in table.find_all('tr'):
    cols = row.find_all('td')
    
    # Continue only if the row has columns (i.e., it's not empty)
    if len(cols) > 0:
        # If a row contains the "rowspan" attribute, it's the start of a new Immigration Court
        if 'rowspan' in cols[0].attrs:
            current_court = cols[0].get_text(strip=True)
            rowspan_counter = int(cols[0]['rowspan'])  # Number of rows this court spans
            
            # Ensure that there are enough columns before accessing
            if len(cols) >= 6:
                judge = cols[1].get_text(strip=True)
                judge_link = cols[1].find('a')['href'] if cols[1].find('a') else None
                total_decisions = cols[2].get_text(strip=True)
                percent_granted_asylum = cols[3].get_text(strip=True)
                percent_granted_other = cols[4].get_text(strip=True)
                percent_denied = cols[5].get_text(strip=True)
            else:
                continue  # Skip row if it doesn't have the expected number of columns
        else:
            # If it's a continuation row (not a new court), ensure there are enough columns
            if len(cols) >= 5:
                judge = cols[0].get_text(strip=True)
                judge_link = cols[0].find('a')['href'] if cols[0].find('a') else None
                total_decisions = cols[1].get_text(strip=True)
                percent_granted_asylum = cols[2].get_text(strip=True)
                percent_granted_other = cols[3].get_text(strip=True)
                percent_denied = cols[4].get_text(strip=True)
            else:
                continue  # Skip row if it doesn't have the expected number of columns
        
        # Decrement the rowspan counter
        rowspan_counter -= 1
        
        # Initialize extracted data
        appointment_year = None
        juris_doctor_year = None
        
        # If there's a link to the judge's bio, extract additional data
        if judge_link:
            # Construct the full URL
            judge_page_url = base_url + judge_link
            judge_page = requests.get(judge_page_url)
            judge_soup = BeautifulSoup(judge_page.text, 'html.parser')
            
            # Extract the paragraph at the specified XPath-like location
            bio_paragraph = judge_soup.select_one("div div div p:nth-of-type(2)")
            if bio_paragraph:
                # Extract the first year as the Appointment Date from this paragraph
                year_match = re.findall(r'\b(\d{4})\b', bio_paragraph.get_text())
                if year_match:
                    appointment_year = year_match[0]  # First year in this specific paragraph
            
            # Extract the Juris Doctor year (closest year to the word "Juris")
            juris_match = re.search(r'Juris.*?(\d{4})', judge_soup.get_text(), re.IGNORECASE)
            if juris_match:
                juris_doctor_year = juris_match.group(1)
        
        # Determine the president and party based on the appointment year
        president, party = get_president_and_party(appointment_year) if appointment_year else (None, None)
        
        # Determine if the appointer was a Democrat
        democrat_appointer = 1 if party == "Democrat" else 0
        
        # Append the data to the list
        data.append([current_court, judge, total_decisions, percent_granted_asylum, 
                     percent_granted_other, percent_denied, appointment_year, 
                     juris_doctor_year, president, party, democrat_appointer])

In [10]:
# Create a DataFrame from the collected data
columns = ["Immigration Court", "Judge", "Total Decisions", "% Granted Asylum", 
           "% Granted Other Relief", "% Denied", "Appointment Date", "Juris Doctor Year",
           "Appointing President", "Party", "Democrat Appointer"]
df = pd.DataFrame(data, columns=columns)

In [11]:
# Save the DataFrame to a CSV file
df.to_csv('immigration_judges.csv', index=False)

print("Data successfully scraped and saved to 'immigration_judges.csv'")

Data successfully scraped and saved to 'immigration_judges.csv'


In [47]:
# 1. Import Data

import pandas as pd
import statsmodels.api as sm

df = pd.read_csv('immigration_judges.csv')

In [None]:
## MODELS

In [49]:
## 1. Impact of Judicial Factors on Denial Rate

# Select the columns to include in X
X = df[["% Granted Asylum", "% Granted Other Relief", "Democrat Appointer",
        "Juris Doctor Year", "Appointing President"]]

# Convert categorical columns to dummy variables
# Convert "Democrat Appointer" and "Immigration Court" to dummies
X = pd.get_dummies(X, columns=["Democrat Appointer", "Appointing President", ], drop_first=True)

# Convert y to numeric, coercing errors to NaN
y = pd.to_numeric(df["% Denied"], errors='coerce')

In [53]:
## 2. Effect of Appointment Factors on Asylum Grant Rates

X = df[["Total Decisions", "% Granted Other Relief", "Juris Doctor Year", "Appointing President", "Party", "Democrat Appointer"]]

# Convert categorical columns to dummy variables
X = pd.get_dummies(X, columns=["Democrat Appointer", "Appointing President", "Party"], drop_first=True)

# Ensure all data is numeric
X = X.apply(pd.to_numeric, errors='coerce')
y = pd.to_numeric(df["% Granted Asylum"], errors='coerce')


In [61]:
## 3. Impact of Judicial and Appointment Factors on Granting Other Relief

X = df[["Total Decisions", "% Granted Asylum", "% Denied", "Juris Doctor Year", "Appointing President", "Party"]]

# Convert categorical columns to dummy variables
X = pd.get_dummies(X, columns=["Appointing President", "Party"], drop_first=True)

# Ensure all data is numeric
X = X.apply(pd.to_numeric, errors='coerce')
y = pd.to_numeric(df["% Granted Other Relief"], errors='coerce')

In [65]:
## 4. Judicial Decisions Analysis by Judge and Court

X = df[["Total Decisions", "% Granted Asylum", "% Granted Other Relief", "Juris Doctor Year", "Democrat Appointer"]]

# Convert categorical columns to dummy variables
X = pd.get_dummies(X, columns=["Democrat Appointer"], drop_first=True)

# Ensure all data is numeric
X = X.apply(pd.to_numeric, errors='coerce')
y = pd.to_numeric(df["Total Decisions"], errors='coerce')

In [None]:
## ANALYSIS

In [67]:

# 2. Ensure All Data is Numeric

# Convert all columns in X to numeric, coercing errors to NaN
X = X.apply(pd.to_numeric, errors='coerce')


# Drop any rows with NaN values in X or y to ensure clean data
X = X.dropna()
y = y.loc[X.index]  # Align y with the cleaned X

# 3. Convert Boolean Columns to Integers (if applicable)

# Convert boolean columns in X to integers (0 and 1)
X = X.astype(int)

# 4. Add a Constant Term to the Model

# Add a constant term (intercept) to the model
X = sm.add_constant(X)

# 5. Fit the OLS Regression Model

# Fit the OLS model
model = sm.OLS(y, X).fit()

# 6. Print the Summary of the Regression

# Output the regression results summary
print(model.summary())



                            OLS Regression Results                            
Dep. Variable:        Total Decisions   R-squared:                       1.000
Model:                            OLS   Adj. R-squared:                  1.000
Method:                 Least Squares   F-statistic:                 2.497e+30
Date:                Sun, 18 Aug 2024   Prob (F-statistic):               0.00
Time:                        17:38:04   Log-Likelihood:                 13727.
No. Observations:                 539   AIC:                        -2.744e+04
Df Residuals:                     533   BIC:                        -2.742e+04
Df Model:                           5                                         
Covariance Type:            nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                   1.13