<a href="https://colab.research.google.com/github/rddelarosa/APM1111/blob/main/FA_3_Stats_Theory.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Use R or Python to find Q1, Q2, Q3, D9, and P95 and other measures for the following sample of test scores: (See expected table of descriptive measures below.)


88 45 53 86 33 86 85 30 89 53 41 96 56 38 62
71 51 86 68 29 28 47 33 37 25 36 33 94 73 46
42 34 79 72 88 99 82 62 57 42 28 55 67 62 60
96 61 57 75 93 34 75 53 32 28 73 51 69 91 35

In [None]:
import numpy as np
import pandas as pd
from collections import Counter
from scipy import stats
import math

# Data
data = [88,45,53,86,33,86,85,30,89,53,41,96,56,38,62,
        71,51,86,68,29,28,47,33,37,25,36,33,94,73,46,
        42,34,79,72,88,99,82,62,57,42,28,55,67,62,60,
        96,61,57,75,93,34,75,53,32,28,73,51,69,91,35]

arr = np.array(data)
n = arr.size
mean = arr.mean()
median = np.median(arr)
std = arr.std(ddof=1)
var = arr.var(ddof=1)
_min, _max = arr.min(), arr.max()

# Mode
counts = Counter(arr)
max_count = max(counts.values())
modes = sorted([k for k,v in counts.items() if v==max_count])
mode = modes[0]

# Percentiles
qs = np.percentile(arr, [25,50,75,90,95])
q1, q2, q3, p90, p95 = qs.tolist()

# Skewness & Kurtosis
skew = stats.skew(arr, bias=False)
kurt = stats.kurtosis(arr, fisher=True, bias=False)
se_skew = math.sqrt(6*n*(n-1)/((n-2)*(n+1)*(n+3)))
se_kurt = math.sqrt(24*n*(n-1)**2/((n-3)*(n-2)*(n+3)*(n+5)))

# Table rows
rows = [
    ("Valid", n),
    ("Mode", mode),
    ("Median", median),
    ("Mean", round(mean,3)),
    ("Std. Deviation", round(std,3)),
    ("Variance", round(var,3)),
    ("Skewness", round(skew,3)),
    ("Std. Error of Skewness", round(se_skew,3)),
    ("Kurtosis", round(kurt,3)),
    ("Std. Error of Kurtosis", round(se_kurt,3)),
    ("Minimum", _min),
    ("Maximum", _max),
    ("25th percentile", round(q1,3)),
    ("50th percentile", round(q2,3)),
    ("75th percentile", round(q3,3)),
    ("90th percentile", round(p90,3)),
    ("95th percentile", round(p95,3)),
]

# âœ… No index=False here
df = pd.DataFrame(rows, columns=["", "Score"])

# Styling like SPSS table
styled = (
    df.style.set_table_styles(
        [
            {'selector': 'th',
             'props': [('background-color', '#E6E6E6'),
                       ('color', 'black'),
                       ('text-align', 'center'),
                       ('border', '1px solid black')]},

            {'selector': 'td',
             'props': [('border', '1px solid black'),
                       ('text-align', 'center')]},

            # Alternate row shading
            {'selector': 'tr:nth-child(even)', 'props': [('background-color', '#F2F2F2')]},
            {'selector': 'tr:nth-child(odd)', 'props': [('background-color', 'white')]}
        ]
    )
    .set_caption("Descriptive Statistics")
    .hide(axis="index")  # ðŸ‘ˆ removes the 0,1,2,â€¦ index column
)

styled


Unnamed: 0,Score
Valid,60.0
Mode,28.0
Median,57.0
Mean,59.167
Std. Deviation,22.211
Variance,493.328
Skewness,0.167
Std. Error of Skewness,0.309
Kurtosis,-1.244
Std. Error of Kurtosis,0.608
