In [1]:
import pandas as pd

# Read the Excel file into a DataFrame
df = pd.read_excel('wrangled_reading_score_data.xlsx')

# Identify all columns that contain "Opinion" but not "Author" in their names
opinion_columns = [col for col in df.columns if 'Opinion' in col and 'Author' not in col and 'Type' not in col]
author_columns = [col for col in df.columns if 'Author' in col]

# Convert these columns to floats
for col in opinion_columns:
    df[col] = pd.to_numeric(df[col], errors='coerce')
# Convert author columns to strings
for col in author_columns:
    df[col] = df[col].astype(str)
    df[col] = df[col].str.replace('’', "'")
    df[col] = df[col].str.replace(r'\bGinsbur\b', 'Ginsburg', regex=True)

# Convert the remaining score columns to floats
df['Syllabus F-K Score'] = pd.to_numeric(df['Syllabus F-K Score'], errors='coerce')
df['Syllabus FRE Score'] = pd.to_numeric(df['Syllabus FRE Score'], errors='coerce')

#Strip whitespace from author columns
for col in author_columns:
    df[col] = df[col].str.replace('Justuce', 'Justice').str.strip()
df.head()

Unnamed: 0,Case,Year,Syllabus FRE Score,Syllabus F-K Score,Opinion 1 FRE Score,Opinion 1 F-K Score,Opinion 1 Author(s),Opinion 2 FRE Score,Opinion 2 F-K Score,Opinion 2 Author(s),...,Opinion 7 Author(s),Opinion 7 Type,Opinion 8 FRE Score,Opinion 8 F-K Score,Opinion 8 Author(s),Opinion 8 Type,Opinion 9 FRE Score,Opinion 9 F-K Score,Opinion 9 Author(s),Opinion 9 Type
0,Postal Service v. Gregory,2001,45.356667,11.266667,46.713333,1.966667,Justice O'Connor,47.4975,1.95,Justice Thomas,...,,,,,,,,,,
1,TRW INC. v. ANDREWS,2001,43.84,12.5,5.292377,1.653846,Justice Ginsburg,68.2,7.94,Justice Scalia,...,,,,,,,,,,
2,CORRECTIONAL SERVICES CORP. v. MALESKO,2001,44.19,11.7,49.92,1.366667,Chief Justice Rehnquist,67.76,6.8,Justice Scalia,...,,,,,,,,,,
3,CHICKASAW NATION v. UNITED STATES,2001,49.76,1.233333,5.888333,9.883333,Justice Breyer,5.227143,1.285714,Justice O'Connor,...,,,,,,,,,,
4,"ADARAND CONSTRUCTORS, INC. v. MINETA,_x000D_\n...",2001,56.0,9.25,54.238571,9.314286,Per Curiam,,,,...,,,,,,,,,,


In [10]:
# Identify all columns that contain "Opinion" and "Type" in their names
opinion_type_columns = [col for col in df.columns if 'Opinion' in col and 'Type' in col]
for col in opinion_type_columns:
    df[col] = df[col].astype(str)
df['Opinion 2 Type'].unique()

# Create the "Controversial" flag
df['Controversial'] = df[opinion_type_columns].apply(lambda row: (row == 'dissenting').sum() >= 2, axis=1).astype(int)
df[df['Controversial'] == 1]

Unnamed: 0,Case,Year,Syllabus FRE Score,Syllabus F-K Score,Opinion 1 FRE Score,Opinion 1 F-K Score,Opinion 1 Author(s),Opinion 2 FRE Score,Opinion 2 F-K Score,Opinion 2 Author(s),...,Opinion 7 Type,Opinion 8 FRE Score,Opinion 8 F-K Score,Opinion 8 Author(s),Opinion 8 Type,Opinion 9 FRE Score,Opinion 9 F-K Score,Opinion 9 Author(s),Opinion 9 Type,Controversial
31,"MICKENS v. TAYLOR, WARDEN",2001,47.330000,1.500000,47.515385,11.215385,Justice Scalia,49.277500,1.775000,Justice Kennedy,...,,,,,,,,,,1
36,UNITED STATES v. CRAFT,2001,49.910000,11.550000,58.135714,9.157143,Justice O'Connor,51.780000,1.900000,Justice Scalia,...,,,,,,,,,,1
37,"TAHOE-SIERRA PRESERVATION COUNCIL, INC.,_x000D...",2001,44.750000,11.775000,47.378919,11.600000,Justice Stevens,49.488333,1.516667,Chief Justice Rehnquist,...,,,,,,,,,,1
39,"US AIRWAYS, INC. v. BARNETT",2001,39.136667,12.266667,43.485714,11.242857,Justice Breyer,49.150000,11.900000,Justice Stevens,...,,,,,,,,,,1
50,FEDERAL MARITIME COMMISSION v. SOUTH_x000D_\nC...,2001,41.835000,13.125000,49.227391,11.213435,Justice Thomas,53.496667,9.533333,Justice Stevens,...,,,,,,,,,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1662,"SAMIA, aka SAMIC v. UNITED STATES",2022,53.368000,1.260000,53.640000,1.394118,Justice Thomas,65.593333,7.633333,Justice Barrett,...,,,,,,,,,,1
1667,COUNTERMAN v. COLORADO,2022,63.295000,7.475000,63.855714,7.542857,Justice Kagan,61.231818,8.260000,Justice Sotomayor,...,,,,,,,,,,1
1669,"STUDENTS FOR FAIR ADMISSIONS, INC. v._x000D_\n...",2022,58.737692,8.576924,6.892895,8.339474,Chief Justice Roberts,54.558718,9.758772,Justice Thomas,...,,,,,,,,,,1
1684,"WILKINSON v. GARLAND, ATTORNEY GENERAL",2023,47.836667,11.000000,53.233571,9.850000,Justice Sotomayor,57.520000,8.700000,Justice Jackson,...,,,,,,,,,,1


In [3]:
unique_justices_opinion1 = df['Opinion 1 Author(s)'].nunique()
unique_justices_opinion1
# df['Opinion 1 Author(s)'].unique()


19

In [4]:
cases_per_year = df.groupby('Year').size()
cases_per_year

dobbs_case = df[df['Case'].str.contains('dobbs', case=False, na=False)]
dobbs_case

Unnamed: 0,Case,Year,Syllabus FRE Score,Syllabus F-K Score,Opinion 1 FRE Score,Opinion 1 F-K Score,Opinion 1 Author(s),Opinion 2 FRE Score,Opinion 2 F-K Score,Opinion 2 Author(s),...,Opinion 7 Type,Opinion 8 FRE Score,Opinion 8 F-K Score,Opinion 8 Author(s),Opinion 8 Type,Opinion 9 FRE Score,Opinion 9 F-K Score,Opinion 9 Author(s),Opinion 9 Type,Controversial
1608,"DOBBS, STATE HEALTH OFFICER OF THE MISSISSIPPI...",2021,61.967692,8.538462,6.768533,8.729333,Justice Alito,61.896667,7.666667,Justice Thomas,...,,,,,,,,,,0


In [5]:
# Create line chart for F-K scores
import pandas as pd
import altair as alt

avg_scores = df.groupby('Year').agg({'Opinion 1 F-K Score': 'mean', 'Opinion 1 FRE Score': 'mean'}).reset_index()

# Create the nearest selection
nearest = alt.selection_point(on='mouseover', nearest=True, empty='none', fields=['Year'])

# Calculate the y-axis range for F-K scores
fk_min = avg_scores['Opinion 1 F-K Score'].min()
fk_max = avg_scores['Opinion 1 F-K Score'].max()
fk_range = [fk_min - (fk_max - fk_min) * 0.1, fk_max + (fk_max - fk_min) * 0.1]

# Create line chart for F-K scores
fk_line = alt.Chart(avg_scores).mark_line(strokeWidth=3).encode(
    x=alt.X('Year:O', title='Year'),
    y=alt.Y('Opinion 1 F-K Score:Q', title='Average F-K Score', scale=alt.Scale(domain=fk_range))
)

# Create scatter plot for F-K scores with points
fk_points = alt.Chart(avg_scores).mark_point(size=100).encode(
    x=alt.X('Year:O', title='Year'),
    y=alt.Y('Opinion 1 F-K Score:Q', title='Average F-K Score', scale=alt.Scale(domain=fk_range)),
    tooltip=[
        alt.Tooltip('Year:O', title='Year'),
        alt.Tooltip('Opinion 1 F-K Score:Q', title='Average F-K Score')
    ],
    opacity=alt.condition(nearest, alt.value(1), alt.value(0.5))
).add_params(
    nearest
)

# Combine the line chart and points for F-K chart
fk_chart = alt.layer(fk_line, fk_points).properties(
    title='Average F-K Score per Year',
    width=700,
    height=400
)

# Calculate the y-axis range for FRE scores
fre_min = avg_scores['Opinion 1 FRE Score'].min()
fre_max = avg_scores['Opinion 1 FRE Score'].max()
fre_range = [fre_min - (fre_max - fre_min) * 0.1, fre_max + (fre_max - fre_min) * 0.1]

# Create line chart for FRE scores
fre_line = alt.Chart(avg_scores).mark_line(strokeWidth=3).encode(
    x=alt.X('Year:O', title='Year'),
    y=alt.Y('Opinion 1 FRE Score:Q', title='Average FRE Score', scale=alt.Scale(domain=fre_range))
)

# Create scatter plot for FRE scores with points
fre_points = alt.Chart(avg_scores).mark_point(size=100).encode(
    x=alt.X('Year:O', title='Year'),
    y=alt.Y('Opinion 1 FRE Score:Q', title='Average FRE Score', scale=alt.Scale(domain=fre_range)),
    tooltip=[
        alt.Tooltip('Year:O', title='Year'),
        alt.Tooltip('Opinion 1 FRE Score:Q', title='Average FRE Score')
    ],
    opacity=alt.condition(nearest, alt.value(1), alt.value(0.5))
).add_params(
    nearest
)

# Combine
fre_chart = alt.layer(fre_line, fre_points).properties(
    title='Average FRE Score per Year',
    width=700,
    height=400
)

# Display
fre_chart & fk_chart

In [6]:
distinct_opinion1_authors = df['Opinion 1 Author(s)'].unique()
distinct_opinion1_authors = [author for author in distinct_opinion1_authors if ' and ' not in author]

# Create a violin plot for Opinion 1 FRE scores by authors
violin_plot_fre = alt.Chart(df).transform_filter(
    alt.FieldOneOfPredicate(field='Opinion 1 Author(s)', oneOf=distinct_opinion1_authors)
).transform_density(
    density='Opinion 1 FRE Score',
    as_=['Opinion 1 FRE Score', 'density'],
    extent=[df['Opinion 1 FRE Score'].min(), df['Opinion 1 FRE Score'].max()],
    groupby=['Opinion 1 Author(s)']
).mark_area(orient='horizontal').encode(
    alt.X('density:Q')
        .stack('center')
        .impute(None)
        .title(None)
        .axis(labels=False, values=[0], grid=False, ticks=True),
    alt.Y('Opinion 1 FRE Score:Q'),
    alt.Color('Opinion 1 Author(s):N', scale=alt.Scale(scheme='category20')),  # Use a color scheme with enough distinct colors
    alt.Column('Opinion 1 Author(s):N')
        .spacing(10)
        .header(titleOrient='bottom', labelOrient='bottom', labelPadding=0)
).properties(
    title='Violin Plot of Opinion 1 FRE Scores by Author',
    width=100,
    height=400
)

# Create a violin plot for Opinion 1 F-K scores by authors
violin_plot_fk = alt.Chart(df).transform_filter(
    alt.FieldOneOfPredicate(field='Opinion 1 Author(s)', oneOf=distinct_opinion1_authors)
).transform_density(
    density='Opinion 1 F-K Score',
    as_=['Opinion 1 F-K Score', 'density'],
    extent=[df['Opinion 1 F-K Score'].min(), df['Opinion 1 F-K Score'].max()],
    groupby=['Opinion 1 Author(s)']
).mark_area(orient='horizontal').encode(
    alt.X('density:Q')
        .stack('center')
        .impute(None)
        .title(None)
        .axis(labels=False, values=[0], grid=False, ticks=True),
    alt.Y('Opinion 1 F-K Score:Q'),
    alt.Color('Opinion 1 Author(s):N', scale=alt.Scale(scheme='category20')),  # Use a color scheme with enough distinct colors
    alt.Column('Opinion 1 Author(s):N')
        .spacing(10)
        .header(titleOrient='bottom', labelOrient='bottom', labelPadding=0)
).properties(
    title='Violin Plot of Opinion 1 F-K Scores by Author',
    width=100,
    height=400
)

# Concatenate the violin plots vertically and configure the view at the top level
violin_plots = alt.vconcat(violin_plot_fre, violin_plot_fk).configure_view(
    stroke=None
)

# Display the violin plots
violin_plots

In [8]:
# Create a violin plot for FRE scores by Controversial flag
violin_plot_controversial_fre = alt.Chart(df).transform_density(
    density='Opinion 1 FRE Score',
    as_=['Opinion 1 FRE Score', 'density'],
    extent=[df['Opinion 1 FRE Score'].min(), df['Opinion 1 FRE Score'].max()],
    groupby=['Controversial']
).mark_area(orient='horizontal').encode(
    alt.X('density:Q')
        .stack('center')
        .impute(None)
        .title(None)
        .axis(labels=False, values=[0], grid=False, ticks=True),
    alt.Y('Opinion 1 FRE Score:Q'),
    alt.Color('Controversial:N', scale=alt.Scale(domain=[0, 1], range=['#1f77b4', '#d62728'])),  # Blue for 0, Red for 1
    alt.Column('Controversial:N')
        .spacing(10)
        .header(titleOrient='bottom', labelOrient='bottom', labelPadding=0)
).properties(
    title='Violin Plot of Opinion 1 FRE Scores by Controversial Flag',
    width=100,
    height=400
)

# Create a violin plot for F-K scores by Controversial flag
violin_plot_controversial_fk = alt.Chart(df).transform_density(
    density='Opinion 1 F-K Score',
    as_=['Opinion 1 F-K Score', 'density'],
    extent=[df['Opinion 1 F-K Score'].min(), df['Opinion 1 F-K Score'].max()],
    groupby=['Controversial']
).mark_area(orient='horizontal').encode(
    alt.X('density:Q')
        .stack('center')
        .impute(None)
        .title(None)
        .axis(labels=False, values=[0], grid=False, ticks=True),
    alt.Y('Opinion 1 F-K Score:Q'),
    alt.Color('Controversial:N', scale=alt.Scale(domain=[0, 1], range=['#1f77b4', '#d62728'])),  # Blue for 0, Red for 1
    alt.Column('Controversial:N')
        .spacing(10)
        .header(titleOrient='bottom', labelOrient='bottom', labelPadding=0)
).properties(
    title='Violin Plot of Opinion 1 F-K Scores by Controversial Flag',
    width=100,
    height=400
)

# Concatenate the violin plots for controversial flag vertically and configure the view at the top level
violin_plots_controversial = alt.hconcat(
    violin_plot_controversial_fre, 
    violin_plot_controversial_fk
).configure_view(
    stroke=None
)

# Display both visualizations side by side
violin_plots_controversial.display()