In [17]:
import scipy.stats as st
import math
import numpy as np
import pandas as pd

In [103]:
carrier_data = {
    "Value": ['Average', 'Variance', 'Boxes'],
    "Control": [0, 0, 0],
    "FedEx": [8, 26.8114, 8],
    "UPS": [14.475, 37.7879, 8],
    "DHL": [9, 13.1171, 8]
}

carrier_df = pd.DataFrame(carrier_data)
carrier_df.set_index('Value', inplace=True)
carrier_df

Unnamed: 0_level_0,Control,FedEx,UPS,DHL
Value,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Average,0,8.0,14.475,9.0
Variance,0,26.8114,37.7879,13.1171
Boxes,0,8.0,8.0,8.0


In [104]:
y_bar = carrier_df.loc['Average'].mean()

ssb = np.sum( carrier_df.loc['Boxes'] * ( ( carrier_df.loc['Average'] - y_bar ) ** 2 ) )
ssw = np.sum( (carrier_df.loc['Boxes'] - 1) * carrier_df.loc['Variance'] )
sst = ssb + ssw

In [105]:
print(f"SSB: {ssb}\nSSW: {ssw}\nSST: {sst}")

SSB: 359.51593749999995
SSW: 544.0148
SST: 903.5307375


In [106]:
df_1 = len(carrier_df.columns) - 1
df_t = (len(carrier_df.columns) * carrier_df.loc['Boxes'].max()) - 1
df_2 = df_t - df_1

print(f"df1: {df_1}\ndf2: {df_2}\ndfT: {df_t}")
# df_t

df1: 3
df2: 28.0
dfT: 31.0


In [107]:
msb = ssb / df_1
msw = ssw / df_2

print(f"MSB: {msb}\nMSW: {msw}")

MSB: 119.83864583333332
MSW: 19.429100000000002


In [108]:
f = msb / msw

print(f"F-stat: {f}")

F-stat: 6.167997788540555


In [109]:
alpha = 0.05

critical_val = st.f.ppf(1 - alpha, df_1, df_2)
print(f"Critical value: {critical_val}")

Critical value: 2.9466852660172655


In [208]:
# Bonferroni => controlling experimentwise error

bonf_d = {
    "Control": [1, 1, 1],
    "FedEx": [1, -1, -1],
    "UPS": [-1, 1, -1],
    "DHL": [-1, -1, 1]
}

bonf_carr = pd.DataFrame(bonf_d)
bonf_carr

Unnamed: 0,Control,FedEx,UPS,DHL
0,1,1,-1,-1
1,1,-1,1,-1
2,1,-1,-1,1


In [209]:
alpha_e = 0.05
conts = m = len(bonf_carr.columns)

alpha_l = alpha_e / m
print(f"Experimentwise error rate: {alpha_l}")

crit_l = st.f.ppf(1 - alpha_l, df_1, df_2)
print(f"Critical value of ex. error: {crit_l}")

Experimentwise error rate: 0.0125
Critical value of ex. error: 4.33388979275124


In [210]:
# bonf_carr['sum_a2'] = 2

In [211]:
bonf_carr.T

Unnamed: 0,0,1,2
Control,1,1,1
FedEx,1,-1,-1
UPS,-1,1,-1
DHL,-1,-1,1


In [212]:
# establish pairs 
pairs = pd.DataFrame()
# establish orthogonals
orth_df = pd.DataFrame()
# create dataframe to concat/build on
calcs = pd.DataFrame()

i = 0
while i < len(bonf_carr.columns):
    j = i + 1
    while j < len(bonf_carr.columns):
        # create pair dictionary to concat with pairs
        pair_dict = {
            "pair": f"{bonf_carr.columns[i]} and {bonf_carr.columns[j]}"
        }
        # create dataframe to append
        pair_df = pd.DataFrame(pair_dict, index=[len(pairs)])
        # concat/append to pairs df
        pairs = pd.concat([pairs, pair_df])
        # print(pairs)
        # concat/append to calcs df
        calcs = pd.concat([calcs, pd.DataFrame(bonf_carr.iloc[:, i] * bonf_carr.iloc[:, j]).sum()], ignore_index=True)
        # print(calcs)
        j += 1    
    i += 1

# join calculations to end df
orth_df = pd.concat([orth_df, calcs]) 
# update format for output
orth_df.rename(columns={0: "ord_calc"}, inplace=True)
# join with pair names
orth_df = pd.concat([orth_df, pairs], axis=1)
# set pair name to index for clean display
orth_df.set_index('pair', inplace=True)
# display orthogonality
orth_df["orth_check"] = orth_df["ord_calc"].apply(lambda x: "not orthogonal" if x != 0 else "orthogonal")
# display orthogonal df
orth_df

Unnamed: 0_level_0,ord_calc,orth_check
pair,Unnamed: 1_level_1,Unnamed: 2_level_1
Control and FedEx,-1.0,not orthogonal
Control and UPS,-1.0,not orthogonal
Control and DHL,-1.0,not orthogonal
FedEx and UPS,-1.0,not orthogonal
FedEx and DHL,-1.0,not orthogonal
UPS and DHL,-1.0,not orthogonal


In [213]:
joined_cont = pd.concat([bonf_carr.T, carrier_df.T], axis=1)
joined_cont

Unnamed: 0,0,1,2,Average,Variance,Boxes
Control,1,1,1,0.0,0.0,0.0
FedEx,1,-1,-1,8.0,26.8114,8.0
UPS,-1,1,-1,14.475,37.7879,8.0
DHL,-1,-1,1,9.0,13.1171,8.0


In [214]:
joined_cont['StdDev'] = joined_cont["Variance"].values**(1/2)
joined_cont

Unnamed: 0,0,1,2,Average,Variance,Boxes,StdDev
Control,1,1,1,0.0,0.0,0.0,0.0
FedEx,1,-1,-1,8.0,26.8114,8.0,5.177973
UPS,-1,1,-1,14.475,37.7879,8.0,6.147186
DHL,-1,-1,1,9.0,13.1171,8.0,3.621754


In [215]:
# joined_cont["lValue"] = (joined_cont[0] * joined_cont['Average']) + (joined_cont[1] * joined_cont['Average']) + (joined_cont[2] * joined_cont['Average'])

joined_cont["lValue"] = (joined_cont['Variance'] / joined_cont['Boxes'])
joined_cont

Unnamed: 0,0,1,2,Average,Variance,Boxes,StdDev,lValue
Control,1,1,1,0.0,0.0,0.0,0.0,
FedEx,1,-1,-1,8.0,26.8114,8.0,5.177973,3.351425
UPS,-1,1,-1,14.475,37.7879,8.0,6.147186,4.723488
DHL,-1,-1,1,9.0,13.1171,8.0,3.621754,1.639638


In [216]:
joined_cont["lValue"]['Control'] = joined_cont['lValue'].sum() * -1
joined_cont

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  joined_cont["lValue"]['Control'] = joined_cont['lValue'].sum() * -1


Unnamed: 0,0,1,2,Average,Variance,Boxes,StdDev,lValue
Control,1,1,1,0.0,0.0,0.0,0.0,-9.71455
FedEx,1,-1,-1,8.0,26.8114,8.0,5.177973,3.351425
UPS,-1,1,-1,14.475,37.7879,8.0,6.147186,4.723488
DHL,-1,-1,1,9.0,13.1171,8.0,3.621754,1.639638


In [217]:
# joined_cont

In [230]:
joined_cont['ssc'] = (joined_cont['Boxes'] * joined_cont['lValue']**2) / 3
joined_cont

Unnamed: 0,0,1,2,Average,Variance,Boxes,StdDev,lValue,ssc
Control,1,1,1,0.0,0.0,0.0,0.0,-9.71455,0.0
FedEx,1,-1,-1,8.0,26.8114,8.0,5.177973,3.351425,29.952132
UPS,-1,1,-1,14.475,37.7879,8.0,6.147186,4.723488,59.496891
DHL,-1,-1,1,9.0,13.1171,8.0,3.621754,1.639638,7.169096


In [228]:
np.sum(joined_cont.loc[:, 0:2]**2 ) - 1

0    3
1    3
2    3
dtype: int64

In [231]:
joined_cont['F_val'] = joined_cont['ssc'] / msw
joined_cont

Unnamed: 0,0,1,2,Average,Variance,Boxes,StdDev,lValue,ssc,F_val
Control,1,1,1,0.0,0.0,0.0,0.0,-9.71455,0.0,0.0
FedEx,1,-1,-1,8.0,26.8114,8.0,5.177973,3.351425,29.952132,1.541612
UPS,-1,1,-1,14.475,37.7879,8.0,6.147186,4.723488,59.496891,3.062257
DHL,-1,-1,1,9.0,13.1171,8.0,3.621754,1.639638,7.169096,0.368988


In [244]:
joined_cont["a2_n"] = pd.DataFrame(joined_cont.loc[:, 0:2]**2).sum(axis=1) / joined_cont["Boxes"]

In [245]:
joined_cont

Unnamed: 0,0,1,2,Average,Variance,Boxes,StdDev,lValue,ssc,F_val,a2_n
Control,1,1,1,0.0,0.0,0.0,0.0,-9.71455,0.0,0.0,inf
FedEx,1,-1,-1,8.0,26.8114,8.0,5.177973,3.351425,29.952132,1.541612,0.375
UPS,-1,1,-1,14.475,37.7879,8.0,6.147186,4.723488,59.496891,3.062257,0.375
DHL,-1,-1,1,9.0,13.1171,8.0,3.621754,1.639638,7.169096,0.368988,0.375


In [247]:
joined_cont["v_l"] = msw * joined_cont["a2_n"]
joined_cont

Unnamed: 0,0,1,2,Average,Variance,Boxes,StdDev,lValue,ssc,F_val,a2_n,v_l
Control,1,1,1,0.0,0.0,0.0,0.0,-9.71455,0.0,0.0,inf,inf
FedEx,1,-1,-1,8.0,26.8114,8.0,5.177973,3.351425,29.952132,1.541612,0.375,7.285913
UPS,-1,1,-1,14.475,37.7879,8.0,6.147186,4.723488,59.496891,3.062257,0.375,7.285913
DHL,-1,-1,1,9.0,13.1171,8.0,3.621754,1.639638,7.169096,0.368988,0.375,7.285913


In [255]:
alpha = 0.05

joined_cont["scheffe_crit"] = math.sqrt(msw * st.f.ppf(1 - alpha, df_1, df_2)) * (joined_cont["v_l"].values ** (1/2))
joined_cont

Unnamed: 0,0,1,2,Average,Variance,Boxes,StdDev,lValue,ssc,F_val,a2_n,v_l,scheffe_crit
Control,1,1,1,0.0,0.0,0.0,0.0,-9.71455,0.0,0.0,inf,inf,inf
FedEx,1,-1,-1,8.0,26.8114,8.0,5.177973,3.351425,29.952132,1.541612,0.375,7.285913,20.423736
UPS,-1,1,-1,14.475,37.7879,8.0,6.147186,4.723488,59.496891,3.062257,0.375,7.285913,20.423736
DHL,-1,-1,1,9.0,13.1171,8.0,3.621754,1.639638,7.169096,0.368988,0.375,7.285913,20.423736


In [270]:
# crit_val = round(abs(st.t.ppf(alpha, df_diff)), 4)
# print(f"Critical value: {crit_val}")


# prod = round(math.sqrt( var_diff / n_diff ), 4)
# print(f"Multiplier: {round(prod * crit_val, 4)}")

# lower = round(mean_diff - (crit_val * prod), 4)
# upper = round(mean_diff + (crit_val * prod), 4)


joined_cont["lower95"] = joined_cont["lValue"] - (joined_cont['F_val'] * (joined_cont['scheffe_crit'] / joined_cont["Boxes"]))
joined_cont["upper95"] = joined_cont["lValue"] + (joined_cont['F_val'] * (joined_cont['scheffe_crit'] / joined_cont["Boxes"]))
joined_cont

Unnamed: 0,0,1,2,Average,Variance,Boxes,StdDev,lValue,ssc,F_val,a2_n,v_l,scheffe_crit,lower95,upper95
Control,1,1,1,0.0,0.0,0.0,0.0,-9.71455,0.0,0.0,inf,inf,inf,,
FedEx,1,-1,-1,8.0,26.8114,8.0,5.177973,3.351425,29.952132,1.541612,0.375,7.285913,20.423736,-0.584259,7.287109
UPS,-1,1,-1,14.475,37.7879,8.0,6.147186,4.723488,59.496891,3.062257,0.375,7.285913,20.423736,-3.094353,12.541328
DHL,-1,-1,1,9.0,13.1171,8.0,3.621754,1.639638,7.169096,0.368988,0.375,7.285913,20.423736,0.697624,2.581651
