In [16]:
import pandas as pd

# Read the Excel file into a DataFrame
df = pd.read_excel('wrangled_reading_score_data.xlsx')

# Identify all columns that contain "Opinion" but not "Author" in their names
opinion_columns = [col for col in df.columns if 'Opinion' in col and 'Author' not in col]

# Convert these columns to floats
for col in opinion_columns:
    df[col] = pd.to_numeric(df[col], errors='coerce')


df['Syllabus F-K Score'] = pd.to_numeric(df['Syllabus F-K Score'], errors='coerce')
df['Syllabus FRE Score'] = pd.to_numeric(df['Syllabus FRE Score'], errors='coerce')
df.head()

Unnamed: 0,Case,Year,Syllabus FRE Score,Syllabus F-K Score,Opinion 1 FRE Score,Opinion 1 F-K Score,Opinion 1 Author(s),Opinion 2 FRE Score,Opinion 2 F-K Score,Opinion 2 Author(s),...,Opinion 7 Author(s),Opinion 7 Type,Opinion 8 FRE Score,Opinion 8 F-K Score,Opinion 8 Author(s),Opinion 8 Type,Opinion 9 FRE Score,Opinion 9 F-K Score,Opinion 9 Author(s),Opinion 9 Type
0,Postal Service v. Gregory,2001,45.356667,11.266667,46.713333,1.966667,Justice O'Connor,47.4975,1.95,Justice Thomas,...,,,,,,,,,,
1,TRW INC. v. ANDREWS,2001,43.84,12.5,5.292377,1.653846,Justice Ginsburg,68.2,7.94,Justice Scalia,...,,,,,,,,,,
2,CORRECTIONAL SERVICES CORP. v. MALESKO,2001,44.19,11.7,49.92,1.366667,Chief Justice Rehnquist,67.76,6.8,Justice Scalia,...,,,,,,,,,,
3,CHICKASAW NATION v. UNITED STATES,2001,49.76,1.233333,5.888333,9.883333,Justice Breyer,5.227143,1.285714,Justice O'Connor,...,,,,,,,,,,
4,"ADARAND CONSTRUCTORS, INC. v. MINETA,_x000D_\n...",2001,56.0,9.25,54.238571,9.314286,Per Curiam,,,,...,,,,,,,,,,


In [13]:
unique_justices_opinion1 = df['Opinion 1 Author(s)'].nunique()
unique_justices_opinion1


35

In [14]:
cases_per_year = df.groupby('Year').size()
cases_per_year

dobbs_case = df[df['Case'].str.contains('dobbs', case=False, na=False)]
dobbs_case

Unnamed: 0,Case,Year,Syllabus FRE Score,Syllabus F-K Score,Opinion 1 FRE Score,Opinion 1 F-K Score,Opinion 1 Author(s),Opinion 2 FRE Score,Opinion 2 F-K Score,Opinion 2 Author(s),...,Opinion 7 Author(s),Opinion 7 Type,Opinion 8 FRE Score,Opinion 8 F-K Score,Opinion 8 Author(s),Opinion 8 Type,Opinion 9 FRE Score,Opinion 9 F-K Score,Opinion 9 Author(s),Opinion 9 Type
1608,"DOBBS, STATE HEALTH OFFICER OF THE MISSISSIPPI...",2021-01-01,61.967692,8.538462,6.768533,8.729333,Justice Alito,61.896667,7.666667,Justice Thomas,...,,,,,,,,,,


In [44]:
# Create line chart for F-K scores
import pandas as pd
import altair as alt

avg_scores = df.groupby('Year').agg({'Opinion 1 F-K Score': 'mean', 'Opinion 1 FRE Score': 'mean'}).reset_index()

# Create the nearest selection
nearest = alt.selection_point(on='mouseover', nearest=True, empty='none', fields=['Year'])

# Calculate the y-axis range for F-K scores
fk_min = avg_scores['Opinion 1 F-K Score'].min()
fk_max = avg_scores['Opinion 1 F-K Score'].max()
fk_range = [fk_min - (fk_max - fk_min) * 0.1, fk_max + (fk_max - fk_min) * 0.1]

# Create line chart for F-K scores
fk_line = alt.Chart(avg_scores).mark_line(strokeWidth=3).encode(
    x=alt.X('Year:O', title='Year'),
    y=alt.Y('Opinion 1 F-K Score:Q', title='Average F-K Score', scale=alt.Scale(domain=fk_range))
)

# Create scatter plot for F-K scores with points
fk_points = alt.Chart(avg_scores).mark_point(size=100).encode(
    x=alt.X('Year:O', title='Year'),
    y=alt.Y('Opinion 1 F-K Score:Q', title='Average F-K Score', scale=alt.Scale(domain=fk_range)),
    tooltip=[
        alt.Tooltip('Year:O', title='Year'),
        alt.Tooltip('Opinion 1 F-K Score:Q', title='Average F-K Score')
    ],
    opacity=alt.condition(nearest, alt.value(1), alt.value(0.5))
).add_params(
    nearest
)

# Combine the line chart and points for F-K chart
fk_chart = alt.layer(fk_line, fk_points).properties(
    title='Average F-K Score per Year',
    width=700,
    height=400
)

# Calculate the y-axis range for FRE scores
fre_min = avg_scores['Opinion 1 FRE Score'].min()
fre_max = avg_scores['Opinion 1 FRE Score'].max()
fre_range = [fre_min - (fre_max - fre_min) * 0.1, fre_max + (fre_max - fre_min) * 0.1]

# Create line chart for FRE scores
fre_line = alt.Chart(avg_scores).mark_line(strokeWidth=3).encode(
    x=alt.X('Year:O', title='Year'),
    y=alt.Y('Opinion 1 FRE Score:Q', title='Average FRE Score', scale=alt.Scale(domain=fre_range))
)

# Create scatter plot for FRE scores with points
fre_points = alt.Chart(avg_scores).mark_point(size=100).encode(
    x=alt.X('Year:O', title='Year'),
    y=alt.Y('Opinion 1 FRE Score:Q', title='Average FRE Score', scale=alt.Scale(domain=fre_range)),
    tooltip=[
        alt.Tooltip('Year:O', title='Year'),
        alt.Tooltip('Opinion 1 FRE Score:Q', title='Average FRE Score')
    ],
    opacity=alt.condition(nearest, alt.value(1), alt.value(0.5))
).add_params(
    nearest
)

# Combine the line chart and points for FRE chart
fre_chart = alt.layer(fre_line, fre_points).properties(
    title='Average FRE Score per Year',
    width=700,
    height=400
)

# Display the charts
fk_chart & fre_chart