In [2]:
import scipy.stats as st
import math
import numpy as np
import pandas as pd

In [14]:
carrier_data = {
    "Value": ['Average', 'Variance', 'Boxes'],
    "FedEx": [8, 26.8114, 8],
    "UPS": [14.475, 37.7879, 8],
    "DHL": [9, 13.1171, 8]
}

carrier_df = pd.DataFrame(carrier_data)
carrier_df.set_index('Value', inplace=True)
carrier_df

Unnamed: 0_level_0,FedEx,UPS,DHL
Value,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Average,8.0,14.475,9.0
Variance,26.8114,37.7879,13.1171
Boxes,8.0,8.0,8.0


In [15]:
y_bar = carrier_df.loc['Average'].mean()

ssb = np.sum( carrier_df.loc['Boxes'] * ( ( carrier_df.loc['Average'] - y_bar ) ** 2 ) )
ssw = np.sum( (carrier_df.loc['Boxes'] - 1) * carrier_df.loc['Variance'] )
sst = ssb + ssw

In [16]:
print(f"SSB: {ssb}\nSSW: {ssw}\nSST: {sst}")

SSB: 194.4033333333333
SSW: 544.0148
SST: 738.4181333333333


In [18]:
df_1 = len(carrier_df.columns) - 1
df_t = (len(carrier_df.columns) * carrier_df.loc['Boxes'].max()) - 1
df_2 = df_t - df_1

print(f"df1: {df_1}\ndf2: {df_2}\ndfT: {df_t}")
# df_t

df1: 2
df2: 21.0
dfT: 23.0


In [19]:
msb = ssb / df_1
msw = ssw / df_2

print(f"MSB: {msb}\nMSW: {msw}")

MSB: 97.20166666666665
MSW: 25.90546666666667


In [20]:
f = msb / msw

print(f"F-stat: {f}")

F-stat: 3.7521681395432616


In [21]:
alpha = 0.05

critical_val = st.f.ppf(1 - alpha, df_1, df_2)
print(f"Critical value: {critical_val}")

Critical value: 3.4668001115424154


In [22]:
# Bonferroni => controlling experimentwise error

bonf_d = {
    # "Control": [1, 1, 1],
    "FedEx": [1, -1, -1],
    "UPS": [-1, 1, -1],
    "DHL": [-1, -1, 1]
}

bonf_carr = pd.DataFrame(bonf_d)
bonf_carr

Unnamed: 0,FedEx,UPS,DHL
0,1,-1,-1
1,-1,1,-1
2,-1,-1,1


In [23]:
alpha_e = 0.05
conts = m = len(bonf_carr.columns)

alpha_l = alpha_e / m
print(f"Experimentwise error rate: {alpha_l}")

crit_l = st.f.ppf(1 - alpha_l, df_1, df_2)
print(f"Critical value of ex. error: {crit_l}")

Experimentwise error rate: 0.016666666666666666
Critical value of ex. error: 5.007330296972289


In [24]:
# bonf_carr['sum_a2'] = 2

In [25]:
bonf_carr.T

Unnamed: 0,0,1,2
FedEx,1,-1,-1
UPS,-1,1,-1
DHL,-1,-1,1


In [26]:
# establish pairs 
pairs = pd.DataFrame()
# establish orthogonals
orth_df = pd.DataFrame()
# create dataframe to concat/build on
calcs = pd.DataFrame()

i = 0
while i < len(bonf_carr.columns):
    j = i + 1
    while j < len(bonf_carr.columns):
        # create pair dictionary to concat with pairs
        pair_dict = {
            "pair": f"{bonf_carr.columns[i]} and {bonf_carr.columns[j]}"
        }
        # create dataframe to append
        pair_df = pd.DataFrame(pair_dict, index=[len(pairs)])
        # concat/append to pairs df
        pairs = pd.concat([pairs, pair_df])
        # print(pairs)
        # concat/append to calcs df
        calcs = pd.concat([calcs, pd.DataFrame(bonf_carr.iloc[:, i] * bonf_carr.iloc[:, j]).sum()], ignore_index=True)
        # print(calcs)
        j += 1    
    i += 1

# join calculations to end df
orth_df = pd.concat([orth_df, calcs]) 
# update format for output
orth_df.rename(columns={0: "ord_calc"}, inplace=True)
# join with pair names
orth_df = pd.concat([orth_df, pairs], axis=1)
# set pair name to index for clean display
orth_df.set_index('pair', inplace=True)
# display orthogonality
orth_df["orth_check"] = orth_df["ord_calc"].apply(lambda x: "not orthogonal" if x != 0 else "orthogonal")
# display orthogonal df
orth_df

Unnamed: 0_level_0,ord_calc,orth_check
pair,Unnamed: 1_level_1,Unnamed: 2_level_1
FedEx and UPS,-1.0,not orthogonal
FedEx and DHL,-1.0,not orthogonal
UPS and DHL,-1.0,not orthogonal


In [27]:
joined_cont = pd.concat([bonf_carr.T, carrier_df.T], axis=1)
joined_cont

Unnamed: 0,0,1,2,Average,Variance,Boxes
FedEx,1,-1,-1,8.0,26.8114,8.0
UPS,-1,1,-1,14.475,37.7879,8.0
DHL,-1,-1,1,9.0,13.1171,8.0


In [28]:
joined_cont['StdDev'] = joined_cont["Variance"].values**(1/2)
joined_cont

Unnamed: 0,0,1,2,Average,Variance,Boxes,StdDev
FedEx,1,-1,-1,8.0,26.8114,8.0,5.177973
UPS,-1,1,-1,14.475,37.7879,8.0,6.147186
DHL,-1,-1,1,9.0,13.1171,8.0,3.621754


In [29]:
# joined_cont["lValue"] = (joined_cont[0] * joined_cont['Average']) + (joined_cont[1] * joined_cont['Average']) + (joined_cont[2] * joined_cont['Average'])

joined_cont["lValue"] = (joined_cont['Variance'] / joined_cont['Boxes'])
joined_cont

Unnamed: 0,0,1,2,Average,Variance,Boxes,StdDev,lValue
FedEx,1,-1,-1,8.0,26.8114,8.0,5.177973,3.351425
UPS,-1,1,-1,14.475,37.7879,8.0,6.147186,4.723488
DHL,-1,-1,1,9.0,13.1171,8.0,3.621754,1.639638


In [30]:
joined_cont["lValue"]['Control'] = joined_cont['lValue'].sum() * -1
joined_cont

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  joined_cont["lValue"]['Control'] = joined_cont['lValue'].sum() * -1


Unnamed: 0,0,1,2,Average,Variance,Boxes,StdDev,lValue
FedEx,1,-1,-1,8.0,26.8114,8.0,5.177973,3.351425
UPS,-1,1,-1,14.475,37.7879,8.0,6.147186,4.723488
DHL,-1,-1,1,9.0,13.1171,8.0,3.621754,1.639638


In [31]:
# joined_cont

In [32]:
joined_cont['ssc'] = (joined_cont['Boxes'] * joined_cont['lValue']**2) / 3
joined_cont

Unnamed: 0,0,1,2,Average,Variance,Boxes,StdDev,lValue,ssc
FedEx,1,-1,-1,8.0,26.8114,8.0,5.177973,3.351425,29.952132
UPS,-1,1,-1,14.475,37.7879,8.0,6.147186,4.723488,59.496891
DHL,-1,-1,1,9.0,13.1171,8.0,3.621754,1.639638,7.169096


In [33]:
np.sum(joined_cont.loc[:, 0:2]**2 ) - 1

0    2
1    2
2    2
dtype: int64

In [34]:
joined_cont['F_val'] = joined_cont['ssc'] / msw
joined_cont

Unnamed: 0,0,1,2,Average,Variance,Boxes,StdDev,lValue,ssc,F_val
FedEx,1,-1,-1,8.0,26.8114,8.0,5.177973,3.351425,29.952132,1.156209
UPS,-1,1,-1,14.475,37.7879,8.0,6.147186,4.723488,59.496891,2.296693
DHL,-1,-1,1,9.0,13.1171,8.0,3.621754,1.639638,7.169096,0.276741


In [35]:
joined_cont["a2_n"] = pd.DataFrame(joined_cont.loc[:, 0:2]**2).sum(axis=1) / joined_cont["Boxes"]

In [36]:
joined_cont

Unnamed: 0,0,1,2,Average,Variance,Boxes,StdDev,lValue,ssc,F_val,a2_n
FedEx,1,-1,-1,8.0,26.8114,8.0,5.177973,3.351425,29.952132,1.156209,0.375
UPS,-1,1,-1,14.475,37.7879,8.0,6.147186,4.723488,59.496891,2.296693,0.375
DHL,-1,-1,1,9.0,13.1171,8.0,3.621754,1.639638,7.169096,0.276741,0.375


In [37]:
joined_cont["v_l"] = msw * joined_cont["a2_n"]
joined_cont

Unnamed: 0,0,1,2,Average,Variance,Boxes,StdDev,lValue,ssc,F_val,a2_n,v_l
FedEx,1,-1,-1,8.0,26.8114,8.0,5.177973,3.351425,29.952132,1.156209,0.375,9.71455
UPS,-1,1,-1,14.475,37.7879,8.0,6.147186,4.723488,59.496891,2.296693,0.375,9.71455
DHL,-1,-1,1,9.0,13.1171,8.0,3.621754,1.639638,7.169096,0.276741,0.375,9.71455


In [38]:
alpha = 0.05

joined_cont["scheffe_crit"] = math.sqrt(msw * st.f.ppf(1 - alpha, df_1, df_2)) * (joined_cont["v_l"].values ** (1/2))
joined_cont

Unnamed: 0,0,1,2,Average,Variance,Boxes,StdDev,lValue,ssc,F_val,a2_n,v_l,scheffe_crit
FedEx,1,-1,-1,8.0,26.8114,8.0,5.177973,3.351425,29.952132,1.156209,0.375,9.71455,29.537345
UPS,-1,1,-1,14.475,37.7879,8.0,6.147186,4.723488,59.496891,2.296693,0.375,9.71455,29.537345
DHL,-1,-1,1,9.0,13.1171,8.0,3.621754,1.639638,7.169096,0.276741,0.375,9.71455,29.537345


In [41]:
# crit_val = round(abs(st.t.ppf(alpha, df_diff)), 4)
# print(f"Critical value: {crit_val}")


# prod = round(math.sqrt( var_diff / n_diff ), 4)
# print(f"Multiplier: {round(prod * crit_val, 4)}")

# lower = round(mean_diff - (crit_val * prod), 4)
# upper = round(mean_diff + (crit_val * prod), 4)


joined_cont["lower95"] = joined_cont["lValue"] - joined_cont['scheffe_crit']
joined_cont["upper95"] = joined_cont["lValue"] + joined_cont['scheffe_crit']
joined_cont

Unnamed: 0,0,1,2,Average,Variance,Boxes,StdDev,lValue,ssc,F_val,a2_n,v_l,scheffe_crit,lower95,upper95
FedEx,1,-1,-1,8.0,26.8114,8.0,5.177973,3.351425,29.952132,1.156209,0.375,9.71455,29.537345,-26.18592,32.88877
UPS,-1,1,-1,14.475,37.7879,8.0,6.147186,4.723488,59.496891,2.296693,0.375,9.71455,29.537345,-24.813857,34.260832
DHL,-1,-1,1,9.0,13.1171,8.0,3.621754,1.639638,7.169096,0.276741,0.375,9.71455,29.537345,-27.897707,31.176982
