In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import re
import matplotlib.pyplot as plt
import seaborn as sns

# Statistical Analysis: Priority

### Import cleaned CAD data

In [2]:
cad = pd.read_csv("../data/cleaned_full_class_data.csv")

#### Convert `calltime` back to a datetime object

In [3]:
cad['calltime'] = pd.to_datetime(cad['calltime'], errors='coerce')

#### Create `results` DataFrame that will later be exported as a .csv file

In [4]:
results = pd.DataFrame(columns=['test', 'statistic', 'p_value'])

## Test #1: T-Test (10x)
Use: to determine if there is a significant difference between the means of two groups.

#### Hypotheses:
- H₀: The mean arrival time is the same for a priority, no matter if EPD or CAHOOTS has responded to it.
- H₁: The mean arrival time is different for a priority depending on whether EPD or CAHOOTS has responded to it.

**input**: the continuous response variable (`secs_to_arrv`) for each priority, from both groups, EPD and CAHOOTS

**output**: T-stat and p-value

#### Split into seperate arrays based on `agency`

In [5]:
cahoots = cad[cad['agency'] == 'CAHOOTS']
epd = cad[cad['agency'] == 'EPD']

cahoots_times = cahoots['secs_to_arrv']
epd_times = epd['secs_to_arrv']

#### Split each priority *and* agency into seperate arrays

In [6]:
cahoots_p1 = cahoots[cahoots['priority'] == '1']['secs_to_arrv']
cahoots_p2 = cahoots[cahoots['priority'] == '2']['secs_to_arrv']
cahoots_p3 = cahoots[cahoots['priority'] == '3']['secs_to_arrv']
cahoots_p4 = cahoots[cahoots['priority'] == '4']['secs_to_arrv']
cahoots_p5 = cahoots[cahoots['priority'] == '5']['secs_to_arrv']
cahoots_p6 = cahoots[cahoots['priority'] == '6']['secs_to_arrv']
cahoots_p7 = cahoots[cahoots['priority'] == '7']['secs_to_arrv']
cahoots_p8 = cahoots[cahoots['priority'] == '8']['secs_to_arrv']
cahoots_p9 = cahoots[cahoots['priority'] == '9']['secs_to_arrv']
cahoots_pp = cahoots[cahoots['priority'] == 'P']['secs_to_arrv']

epd_p1 = epd[epd['priority'] == '1']['secs_to_arrv']
epd_p2 = epd[epd['priority'] == '2']['secs_to_arrv']
epd_p3 = epd[epd['priority'] == '3']['secs_to_arrv']
epd_p4 = epd[epd['priority'] == '4']['secs_to_arrv']
epd_p5 = epd[epd['priority'] == '5']['secs_to_arrv']
epd_p6 = epd[epd['priority'] == '6']['secs_to_arrv']
epd_p7 = epd[epd['priority'] == '7']['secs_to_arrv']
epd_p8 = epd[epd['priority'] == '8']['secs_to_arrv']
epd_p9 = epd[epd['priority'] == '9']['secs_to_arrv']
epd_pp = epd[epd['priority'] == 'P']['secs_to_arrv']

#### Analysis Steps

1. Create dictionary, key is the priority itself and the value is a tuple consisting of (cahoots, epd)
2. Use `for` loop to...
3. Normalize each pair using combines mean and standard deviation
4. Use `scipy.stats`to get the t-stat and p-value

In [8]:
from scipy.stats import ttest_ind

priority_pairs = {
    '1': (cahoots_p1, epd_p1),
    '2': (cahoots_p2, epd_p2),
    '3': (cahoots_p3, epd_p3),
    '4': (cahoots_p4, epd_p4),
    '5': (cahoots_p5, epd_p5),
    '6': (cahoots_p6, epd_p6),
    '7': (cahoots_p7, epd_p7),
    '8': (cahoots_p8, epd_p8),
    '9': (cahoots_p9, epd_p9),
    'P': (cahoots_pp, epd_pp),
}

for pr, (c_group, e_group) in priority_pairs.items():
    c_group = c_group.dropna()
    e_group = e_group.dropna()
    
    combined = np.concatenate([c_group, e_group])
    mean_comb = np.mean(combined)
    std_comb = np.std(combined, ddof=0)
    
    c_norm = (c_group - mean_comb) / std_comb
    e_norm = (e_group - mean_comb) / std_comb
    
    t_stat, p_val = ttest_ind(c_norm, e_norm, equal_var=False)

    results.loc[len(results)] = np.array([f't_test_priority_{pr}', t_stat, p_val])
    
    print(f"Priority {pr}: t-stat = {t_stat:.3f}, p = {p_val:.3g}")

Priority 1: t-stat = 23.790, p = 1.36e-118
Priority 2: t-stat = 1.858, p = 0.0742
Priority 3: t-stat = -11.510, p = 1.62e-30
Priority 4: t-stat = -13.788, p = 2.07e-30
Priority 5: t-stat = -60.917, p = 0
Priority 6: t-stat = 8.428, p = 1.02e-15
Priority 7: t-stat = 0.263, p = 0.793
Priority 8: t-stat = 11.081, p = 9.28e-28
Priority 9: t-stat = 2.014, p = 0.0748
Priority P: t-stat = 0.586, p = 0.558


#### Take a quick look at the results so far

In [9]:
results

Unnamed: 0,test,statistic,p_value
0,t_test_priority_1,23.790154773261467,1.3561496607144252e-118
1,t_test_priority_2,1.8584567707792523,0.0742427222325805
2,t_test_priority_3,-11.509557948635326,1.6176541074831057e-30
3,t_test_priority_4,-13.787720283502995,2.0653150136919398e-30
4,t_test_priority_5,-60.916936226627385,0.0
5,t_test_priority_6,8.427591028814543,1.0231224689433485e-15
6,t_test_priority_7,0.2626908499800568,0.7928368673809696
7,t_test_priority_8,11.081161385080389,9.277154753561876e-28
8,t_test_priority_9,2.01427618515638,0.0747576586066936
9,t_test_priority_P,0.5856342080713479,0.5582135314924455


### For priorities where difference is *statistically significant*, look at the means of the two groups

In [10]:
print(f'cahoots priority 2 mean: {np.mean(cahoots_p2)}')
print(f'epd priority 2 mean: {np.mean(epd_p2)}')

cahoots priority 2 mean: 1895.148148148148
epd priority 2 mean: 1080.2974759615386


In [11]:
print(f'cahoots priority 7 mean: {np.mean(cahoots_p7)}')
print(f'epd priority 7 mean: {np.mean(epd_p7)}')

cahoots priority 7 mean: 8139.888491573108
epd priority 7 mean: 8027.045739910314


In [12]:
print(f'cahoots priority 9 mean: {np.mean(cahoots_p9)}')
print(f'epd priority 9 mean: {np.mean(epd_p9)}')

print(f'len of CAHOOTS: {len(cahoots_p9)}, len of EPD: {len(epd_p9)}')

cahoots priority 9 mean: 26114.0
epd priority 9 mean: 5446.336475707034
len of CAHOOTS: 10, len of EPD: 1379


In [13]:
print(f'cahoots priority P mean: {np.mean(cahoots_pp)}')
print(f'epd priority P mean: {np.mean(epd_pp)}')

cahoots priority P mean: 867.2190847127556
epd priority P mean: 838.4364406779661


### RESULTS AND INTERPRETATION:

| Priority | t-stat | p-value   | Interpretation                                  |
| -------- | ------ | --------- | ----------------------------------------------- |
| 1        | 23.790  | 1.36e-118 | Huge difference — EPD is **much faster** |
| 2        | 1.858   | 0.0742  | Marginal — not statistically significant |
| 3        | -11.510 | 1.62e-30  | CAHOOTS is **faster**                           |
| 4        | -13.788 | 2.07e-30  | CAHOOTS is **faster**                           |
| 5        | -60.917 | 0   | Huge difference — CAHOOTS is **much faster** |
| 6        | 8.428  | 1.02e-15 | EPD is **faster**                               |
| 7        | 0.263 | 0.793  | No difference                            |
| 8        | 11.081  | 9.28e-28  | EPD is **faster**                               |
| 9        | 2.014   | 0.0748   | No difference                            |
| P        | 0.586  | 0.558  | No difference                            |


### General consensus: 
- CAHOOTS faster for mid-priority calls
- EPD faster for (most) high priority calls
- **Not uniform across all priority levels**

### Save results to .csv file

In [14]:
results.to_csv('../results/priority_results.csv')