# 🌷Dataset

In [1]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols

# Baca file CSV
df = pd.read_csv('SuperStore_Orders.csv', encoding='cp1252')
# Lihat beberapa kolom awal
print(df.columns)

Index(['order_id', 'order_date', 'ship_date', 'ship_mode', 'customer_name',
       'segment', 'state', 'country', 'market', 'region', 'product_id',
       'category', 'sub_category', 'product_name', 'sales', 'quantity',
       'discount', 'profit', 'shipping_cost', 'order_priority', 'year'],
      dtype='object')


In [2]:
df.head(5)

Unnamed: 0,order_id,order_date,ship_date,ship_mode,customer_name,segment,state,country,market,region,...,category,sub_category,product_name,sales,quantity,discount,profit,shipping_cost,order_priority,year
0,AG-2011-2040,01-01-2011,06-01-2011,Standard Class,Toby Braunhardt,Consumer,Constantine,Algeria,Africa,Africa,...,Office Supplies,Storage,"Tenex Lockers, Blue",408,2,0.0,106.14,35.46,Medium,2011
1,IN-2011-47883,01-01-2011,08-01-2011,Standard Class,Joseph Holt,Consumer,New South Wales,Australia,APAC,Oceania,...,Office Supplies,Supplies,"Acme Trimmer, High Speed",120,3,0.1,36.036,9.72,Medium,2011
2,HU-2011-1220,01-01-2011,05-01-2011,Second Class,Annie Thurman,Consumer,Budapest,Hungary,EMEA,EMEA,...,Office Supplies,Storage,"Tenex Box, Single Width",66,4,0.0,29.64,8.17,High,2011
3,IT-2011-3647632,01-01-2011,05-01-2011,Second Class,Eugene Moren,Home Office,Stockholm,Sweden,EU,North,...,Office Supplies,Paper,"Enermax Note Cards, Premium",45,3,0.5,-26.055,4.82,High,2011
4,IN-2011-47883,01-01-2011,08-01-2011,Standard Class,Joseph Holt,Consumer,New South Wales,Australia,APAC,Oceania,...,Furniture,Furnishings,"Eldon Light Bulb, Duo Pack",114,5,0.1,37.77,4.7,Medium,2011


In [3]:
df['sales'] = df['sales'].str.replace(',', '', regex=False).astype(float)

In [4]:
df.isnull().sum()

order_id          0
order_date        0
ship_date         0
ship_mode         0
customer_name     0
segment           0
state             0
country           0
market            0
region            0
product_id        0
category          0
sub_category      0
product_name      0
sales             0
quantity          0
discount          0
profit            0
shipping_cost     0
order_priority    0
year              0
dtype: int64

# 🌷One-Way ANOVA

In [5]:
# One-Way ANOVA
model_1way = ols('sales ~ C(order_priority)', data=df).fit()
anova_1way = sm.stats.anova_lm(model_1way, typ=2)
# Tampilkan dengan format notasi ilmiah (sci)
pd.options.display.float_format = '{:.6e}'.format
print("One-Way ANOVA:\n", anova_1way)

One-Way ANOVA:
                         sum_sq           df            F       PR(>F)
C(order_priority) 4.710560e+05 3.000000e+00 6.605014e-01 5.762584e-01
Residual          1.219204e+10 5.128600e+04          NaN          NaN


### P-value > a, mmenerima H0. Terdapat perbedaan yg tidak signifikan terhapa kolom order_priority terhadap kolom sales

In [6]:
# One-Way ANOVA
model_1way = ols('sales ~ C(sub_category)', data=df).fit()
anova_1way = sm.stats.anova_lm(model_1way, typ=2)
# Tampilkan dengan format notasi ilmiah (sci)
pd.options.display.float_format = '{:.6e}'.format
print("One-Way ANOVA:\n", anova_1way)

One-Way ANOVA:
                       sum_sq           df            F       PR(>F)
C(sub_category) 2.583494e+09 1.600000e+01 8.615832e+02 0.000000e+00
Residual        9.609017e+09 5.127300e+04          NaN          NaN


### P-value < a, tolak H0. Terdapat perbedaan yg signifikan terhapa kolom sub_category terhadap kolom sales

## 🌷Two-Way ANOVA

In [None]:
# Two-Way ANOVA tanpa interaksi
model_2way = ols('sales ~ C(sub_category) * C(region)', data=df).fit()
anova_2way = sm.stats.anova_lm(model_2way, typ=2)
print("Two-Way ANOVA:\n",anova_2way)

Two-Way ANOVA:
                                 sum_sq           df            F        PR(>F)
C(sub_category)           2.540239e+09 1.600000e+01 8.970765e+02  0.000000e+00
C(region)                 1.130401e+08 1.200000e+01 5.322627e+01 3.865614e-128
C(sub_category):C(region) 4.577648e+08 1.920000e+02 1.347150e+01  0.000000e+00
Residual                  9.038212e+09 5.106900e+04          NaN           NaN


### p-value < a, tolak H0. Terdapat perbedaan yang signifikan antara sub_cateogery di region yang berbeda terhadap kolom sales

## 🌷N-Way ANOVA

In [8]:
# Model: N-Way ANOVA dengan 3 faktor + interaksi
model_nway = ols('sales ~ C(sub_category) * C(region) * C(order_priority)', data=df).fit()
anova_nway = sm.stats.anova_lm(model_nway, typ=2)

# Tampilkan hasil dengan format scientific
pd.options.display.float_format = '{:.6e}'.format
print("N-Way ANOVA (3 faktor):\n", anova_nway)



N-Way ANOVA (3 faktor):
                                                   sum_sq           df  \
C(sub_category)                             1.188796e+09 1.600000e+01   
C(region)                                   5.607816e+07 1.200000e+01   
C(order_priority)                           2.833498e+05 3.000000e+00   
C(sub_category):C(region)                   4.529534e+08 1.920000e+02   
C(sub_category):C(order_priority)           5.445273e+06 4.800000e+01   
C(region):C(order_priority)                 7.697670e+06 3.600000e+01   
C(sub_category):C(region):C(order_priority) 1.588394e+08 5.760000e+02   
Residual                                    8.883007e+09 5.042900e+04   

                                                       F       PR(>F)  
C(sub_category)                             4.218011e+02 0.000000e+00  
C(region)                                   2.652973e+01 1.041364e-60  
C(order_priority)                           5.361941e-01 6.574476e-01  
C(sub_category):C(region)    



## P-value < a, tolak H0. Terdapat perbedaan yg signifikan antara kolom sub_category dengan kolom sales
## P-value < a, tolak H0. Terdapat perbedaan yg signifikan antara kolom sub_category dengan kolom sales tergantung region-nya
## P-value < a, tolak H0. Terdapat perbedaan yg signifikan antara kolom sub_category dengan kolom sales tergantung region-nya dan order_prioritynya
## P-value > a, menerima H0. Terdapat perbedaan yg tidak signifikan antara kolom-kolom dengan sales
