**Title:** Datafun-06-Eda   
**Author:** Jacob Sanders  
**Date:** February 2026  

**Purpose:** To determine if there is a correlation between students' chapter  
 test scores and their State Test predictor scores.  I also added students'  
  attendance percentage for the year as an interesting data point.

In [45]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

# Load your CSV file
df = pd.read_csv('data/math_class_data.csv')

# Inspect first rows of the DataFrame
print(df.head())

print(df.describe())

pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.width', 200)         # Expand display width
pd.set_option('display.max_colwidth', None) # Show full content in each column

df.dtypes


   Student_ID Student_Name_First Student_Name_Last_Initial  Chapter_1_Test  Chapter_2_Test  Chapter_3_Test  Chapter_4_Test  Chapter_5_Test  Chapter_6_Test  Chapter_7_Test  Fall_MAP_score  \
0           1                Cat                        LA             100             100              92              93             100              89              95              84   
1           2              Viper                         A              91              70              56              57              96              78              85              75   
2           3            Chicken                        BA              81             100              83              67              34              67              75              53   
3           4               Goat                         C              81              50              64              80              60              33              80              70   
4           5                Hog                  

Student_ID                   int64
Student_Name_First             str
Student_Name_Last_Initial      str
Chapter_1_Test               int64
Chapter_2_Test               int64
Chapter_3_Test               int64
Chapter_4_Test               int64
Chapter_5_Test               int64
Chapter_6_Test               int64
Chapter_7_Test               int64
Fall_MAP_score               int64
Winter_MAP_score             int64
Attendance_Percent_YTD       int64
dtype: object

In [46]:
# ==============================
# Module 1: Overall Class Boxplot
# ==============================

import pandas as pd
import matplotlib.pyplot as plt

def create_class_boxplot(df, columns, title="Overall Class Distribution"):
    """
    Creates a horizontal boxplot for all student scores combined.
    """
    all_scores = df[columns].stack().dropna()

    # IQR and outliers
    Q1 = all_scores.quantile(0.25)
    Q3 = all_scores.quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    filtered_scores = all_scores[(all_scores >= lower_bound) & (all_scores <= upper_bound)]
    outliers = all_scores[(all_scores < lower_bound) | (all_scores > upper_bound)]

    # Plot
    fig, ax = plt.subplots(figsize=(12, 4))
    ax.set_facecolor('#f4f6f9')

    box = ax.boxplot(
        filtered_scores,
        vert=False,
        whis=[0, 100],
        patch_artist=True,
        widths=0.4,
        capwidths=0.4
    )

    # Styling
    for patch in box['boxes']:
        patch.set_facecolor('#4a90e2')
        patch.set_edgecolor('#4a90e2')
        patch.set_alpha(0.85)

    for median in box['medians']:
        median.set_color('#003366')
        median.set_linewidth(3)

    for whisker in box['whiskers']:
        whisker.set_color('#4a90e2')
        whisker.set_linewidth(2)

    for cap in box['caps']:
        cap.set_color('#4a90e2')
        cap.set_linewidth(2)

    ax.set_title(title, fontsize=24, fontweight='bold')
    ax.set_xlabel("Test Scores", fontsize=20)
    ax.tick_params(axis='x', labelsize=14)
    ax.set_yticks([])

    # Set x-axis range 50–100
    ax.set_xlim(40, 110)

    # Only display tick labels between 50 and 100
    ax.set_xticks(range(50, 101, 10))  # ticks at 50, 60, 70, ..., 100

    if len(outliers) > 0:
        outlier_text = "Outliers: " + ", ".join(map(str, sorted(outliers)))
    else:
        outlier_text = "No outliers detected."

    plt.figtext(0.5, -0.05, outlier_text, fontsize=16, ha="center")
    plt.tight_layout()
    plt.show()

In [47]:
# ==============================
# Module 2: Single Student Boxplot
# ==============================

import matplotlib.pyplot as plt
import pandas as pd

def create_student_boxplot(scores, title="Student Scores"):
    """
    Creates a horizontal boxplot for a single student row of scores.
    """
    if not isinstance(scores, pd.Series):
        scores = pd.Series(scores)

    scores = scores.dropna()

    # IQR and outliers
    Q1 = scores.quantile(0.25)
    Q3 = scores.quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    filtered_scores = scores[(scores >= lower_bound) & (scores <= upper_bound)]
    outliers = scores[(scores < lower_bound) | (scores > upper_bound)]

    # Plot
    fig, ax = plt.subplots(figsize=(12,4))
    ax.set_facecolor('#f4f6f9')

    box = ax.boxplot(
        filtered_scores,
        vert=False,
        whis=[0,100],
        patch_artist=True,
        widths=0.4,
        capwidths=0.4
    )

    # Styling
    for patch in box['boxes']:
        patch.set_facecolor('#4a90e2')
        patch.set_edgecolor('#4a90e2')
        patch.set_alpha(0.85)

    for median in box['medians']:
        median.set_color('#003366')
        median.set_linewidth(3)

    for whisker in box['whiskers']:
        whisker.set_color('#4a90e2')
        whisker.set_linewidth(2)

    for cap in box['caps']:
        cap.set_color('#4a90e2')
        cap.set_linewidth(2)

    ax.set_title(title, fontsize=24, fontweight='bold')
    ax.set_xlabel("Test Scores", fontsize=20)
    ax.tick_params(axis='x', labelsize=14)
    ax.set_yticks([])

    # Set x-axis range 50–100
    ax.set_xlim(40, 110)

    # Only display tick labels between 50 and 100
    ax.set_xticks(range(50, 101, 10))

    if len(outliers) > 0:
        outlier_text = "Outliers: " + ", ".join(map(str, sorted(outliers)))
    else:
        outlier_text = "No outliers detected."

    plt.figtext(0.5, -0.05, outlier_text, fontsize=16, ha="center")
    plt.tight_layout()
    plt.show()

In [48]:
# ==============================
# Module 3: Student + Class Boxplots
# ==============================

import pandas as pd
import matplotlib.pyplot as plt

def _prepare_scores(scores):
    """Removes outliers using IQR and returns filtered_scores and outliers"""
    scores = scores.dropna()

    Q1 = scores.quantile(0.25)
    Q3 = scores.quantile(0.75)
    IQR = Q3 - Q1

    lower = Q1 - 1.5 * IQR
    upper = Q3 + 1.5 * IQR

    filtered = scores[(scores >= lower) & (scores <= upper)]
    outliers = scores[(scores < lower) | (scores > upper)]

    return filtered, outliers

def _style_boxplot(ax, box):
    """Applies consistent styling to boxplots"""
    for patch in box['boxes']:
        patch.set_facecolor('#4a90e2')
        patch.set_edgecolor('#4a90e2')
        patch.set_alpha(0.85)

    for median in box['medians']:
        median.set_color('#003366')
        median.set_linewidth(3)

    for whisker in box['whiskers']:
        whisker.set_color('#4a90e2')
        whisker.set_linewidth(2)

    for cap in box['caps']:
        cap.set_color('#4a90e2')
        cap.set_linewidth(2)

def create_student_and_class_boxplots(df, student_idx):
    """
    Displays:
    1) Student boxplot
    2) Class boxplot
    """
    score_columns = [
        "Chapter_1_Test",
        "Chapter_2_Test",
        "Chapter_3_Test",
        "Chapter_4_Test",
        "Chapter_5_Test",
        "Chapter_6_Test",
        "Chapter_7_Test",
    ]

    first_name_col = "Student_Name_First"
    last_initial_col = "Student_Name_Last_Initial"

    fig, axes = plt.subplots(
        2, 1,
        figsize=(12, 8),
        gridspec_kw={"height_ratios": [1, 1], "hspace": 0.35}
    )

    # Student Boxplot
    student_scores = df.loc[student_idx, score_columns]
    filtered_student, student_outliers = _prepare_scores(student_scores)

    ax1 = axes[0]
    ax1.set_facecolor('#f4f6f9')

    box1 = ax1.boxplot(
        filtered_student,
        vert=False,
        whis=[0, 100],
        patch_artist=True,
        widths=0.4,
        capwidths=0.4
    )

    _style_boxplot(ax1, box1)

    ax1.set_title(
        f"Scores for {df.loc[student_idx, first_name_col]} {df.loc[student_idx, last_initial_col]}",
        fontsize=24,
        fontweight='bold'
    )
    ax1.set_xlabel("Test Scores", fontsize=20)
    ax1.set_yticks([])
    ax1.set_xlim(50, 100)  # Updated x-axis

    # Class Boxplot
    all_scores = df[score_columns].stack()
    filtered_class, class_outliers = _prepare_scores(all_scores)

    ax2 = axes[1]
    ax2.set_facecolor('#f4f6f9')

    box2 = ax2.boxplot(
        filtered_class,
        vert=False,
        whis=[0, 100],
        patch_artist=True,
        widths=0.4,
        capwidths=0.4
    )

    _style_boxplot(ax2, box2)

    ax2.set_title("Overall Class Distribution", fontsize=20, fontweight='bold')
    ax2.set_xlabel("Test Scores", fontsize=16)
    ax2.set_yticks([])
    ax2.set_xlim(50, 100)  # Updated x-axis

    # Class Outliers Text
    if len(class_outliers) > 0:
        outlier_text = "Outliers: " + ", ".join(map(str, sorted(class_outliers)))
    else:
        outlier_text = "No outliers detected."

    ax2.text(
        0.5, -0.30,
        outlier_text,
        fontsize=14,
        ha="center",
        transform=ax2.transAxes
    )

    plt.show()

# 8th Grade Students' Chapter Test Scores  
## Use the buttons below to select a student.  
## This will display a boxplot for the student  
## as well as the class average boxplot below.

In [49]:
# ==============================
# Module 4: Notebook Usage (Buttons Only)
# ==============================

import pandas as pd
import ipywidgets as widgets
from IPython.display import display, clear_output

# Load CSV
df = pd.read_csv("data/math_class_data.csv")

# Output area for plots
output = widgets.Output()

# Button callback generator
def make_button_callback(student_idx):
    def callback(button):
        with output:
            clear_output(wait=True)
            
            # Student boxplot
            create_student_boxplot(
                df.loc[student_idx, [
                    "Chapter_1_Test","Chapter_2_Test","Chapter_3_Test",
                    "Chapter_4_Test","Chapter_5_Test","Chapter_6_Test","Chapter_7_Test"
                ]],
                title=f"Scores for {df.loc[student_idx, 'Student_Name_First']} {df.loc[student_idx, 'Student_Name_Last_Initial']}"
            )
            
            # Overall class boxplot
            create_class_boxplot(
                df, 
                [
                    "Chapter_1_Test","Chapter_2_Test","Chapter_3_Test",
                    "Chapter_4_Test","Chapter_5_Test","Chapter_6_Test","Chapter_7_Test"
                ],
                title="Overall Class Distribution"
            )
    return callback

# Create buttons for each student
buttons = []
for idx, row in df.iterrows():
    btn = widgets.Button(
        description=f"{row['Student_Name_First']} {row['Student_Name_Last_Initial']}",
        layout=widgets.Layout(width='200px')
    )
    btn.on_click(make_button_callback(idx))
    buttons.append(btn)

# Arrange buttons in rows (4 per row)
buttons_box = widgets.VBox([widgets.HBox(buttons[i:i+4]) for i in range(0, len(buttons), 4)])

# Display buttons and output area
display(buttons_box, output)

VBox(children=(HBox(children=(Button(description='Cat LA', layout=Layout(width='200px'), style=ButtonStyle()),…

Output()