In [None]:
import numpy as np
import pandas as pd

import seaborn as sns

import matplotlib.pyplot as plt

import math
import scipy as sp
from scipy import stats

import warnings
warnings.filterwarnings("ignore")

In [None]:
year_list = ['2019', '2020', '2021']
start_list = ['08-01', '01-01', '01-01']
end_list = ['12-31', '12-31', '07-31']

In [None]:
#상위 디렉토리에서 모듈 가져오기
import os,sys,inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0,parentdir)

from delivery.delivery_data import get_dataframe as get_delivery

delivery_list = []

for year, start, end in zip(year_list, start_list, end_list):
    delivery_list.append(get_delivery(year, start, end, '../delivery/original_data/'))

In [None]:
from weather.weather_data import get_dataframe as get_weather

weather_list = []

for year, start, end in zip(year_list, start_list, end_list):
    weather_df = get_weather(year, start, end, '../weather/original_data/')
    del weather_df['cloud']
    del weather_df['snow']
    del weather_df['wind']

    # 배달 데이터에서 누락된 행 제거
    if year == '2020':
        weather_df = weather_df.drop('05-18')
    elif year == '2021':
        missing_values = ['03-27', '03-28', '03-29', '03-30', '03-31', '07-20', '07-21']
        for missing_value in missing_values:
            weather_df = weather_df.drop(missing_value)

    weather_list.append(weather_df)

In [None]:
sunny_days, rainy_days = [], []
warm_days, hot_days, cold_days = [], [], []

for weather_df in weather_list:
    weather_df = weather_df.reset_index()
    sunny_days.append(weather_df[ weather_df['rain'] < 3.0 ]['date'].tolist())
    rainy_days.append(weather_df[ weather_df['rain'] >= 3.0 ]['date'].tolist())
    warm_days.append(weather_df[ (weather_df['temp'] <= 24.0) & (weather_df['temp'] >= 1.2) ]['date'].tolist())
    hot_days.append(weather_df[ weather_df['temp'] > 24.0 ]['date'].tolist())
    cold_days.append(weather_df[ weather_df['temp'] < 1.2 ]['date'].tolist())
    weather_df = weather_df.set_index('date')

In [None]:
sunny_list, rainy_list = [], []
warm_list, hot_list, cold_list = [], [], []

rain_zip = zip(delivery_list, sunny_days, rainy_days)
temp_zip = zip(delivery_list, warm_days, hot_days, cold_days)

for delivery_df, sunny_day, rainy_day in rain_zip:
    sunny_list.append(delivery_df.loc[sunny_day])
    rainy_list.append(delivery_df.loc[rainy_day])

for delivery_df, warm_day, hot_day, cold_day in temp_zip:
    warm_list.append(delivery_df.loc[warm_day])
    hot_list.append(delivery_df.loc[hot_day])
    cold_list.append(delivery_df.loc[cold_day])

## 이항검정(binom_test)

#### 귀무가설 : 온도가 내려간 날에 심부름양이 변하지 않았다.
#### 대립가설 : 온도가 내려간 날에 심부름양이 증가할 것이다

In [None]:
# 배달 음식 카테고리 리스트로 가져오기(총 배달횟수 포함)
categories_s = delivery_list[0].columns.tolist()[:]
# 온도 데이터 가져옴
weather_df = weather_list[0]

# 전날 대비 온도 증감 계산
temp_diff = np.diff(weather_df['temp']).tolist()
temp_diff.insert(0, 0)

# 반올림
for i in range(len(temp_diff)):
    temp_diff[i] = round(temp_diff[i],1)

# weather_df에 추가하고 원본 데이터 drop
weather_df['temp_diff'] = temp_diff
weather_df = weather_df.drop(columns={'temp', 'rain'})

# 그냥 심부름 배달량 데이터 가져옴
all_delivery_list = delivery_list.copy()

sim_delivery = pd.DataFrame(all_delivery_list[0])
for category in categories_s:
    if category != 'simburum':
        sim_delivery = sim_delivery.drop(columns={category})

# 전날 대비 심부름 증감 계산
sim_diff = np.diff(sim_delivery['simburum']).tolist()
sim_diff.insert(0, 0)

# 전날대비 심부름 증감 추가
sim_delivery['simburum_diff'] = sim_diff

# 원본 심부름 데이터 삭제
sim_delivery = sim_delivery.drop(columns={'simburum'})

# 온도 즘감량 추가
sim_delivery = sim_delivery.join(weather_df)

# 전날 대비 온도가 낮아진 날이면서 전날대비 배달량 증가한 데이터 뽑아서 모아줌
both = (sim_delivery.simburum_diff > 0) & (sim_delivery.temp_diff < 0)
cold_simburum_df = sim_delivery[both]

# 전날대비 온도가 낮아진 날
colder_days = weather_df[weather_df.temp_diff<0]

# 온도가 내려간 날 중 심부름 배달이 증가한 날
sim_increase = cold_simburum_df[cold_simburum_df.simburum_diff > 0]




In [None]:
cold_simburum_df

Unnamed: 0_level_0,simburum_diff,temp_diff
date,Unnamed: 1_level_1,Unnamed: 2_level_1
08-27,6.0,-0.7
08-29,7.0,-2.7
08-30,12.0,-0.8
09-30,9.0,-0.2
10-05,1.0,-3.7
10-12,43.0,-0.9
10-14,94.0,-2.8
10-23,3.0,-1.1
10-25,4.0,-1.6
10-30,4.0,-1.3


In [None]:
print("전날대비 온도가 낮아진 날: ", colder_days.size)
print("온도가 내려간 날 중 심부름 배달이 증가한 날: ", sim_increase.size)

전날대비 온도가 낮아진 날:  71
온도가 내려간 날 중 심부름 배달이 증가한 날:  52


In [None]:
증가한날 = 60
감소한날 = 40
전체일수 = 증가한날 + 감소한날
sp.stats.binom_test(증가한날, 전체일수, p=0.5, alternative="greater")

0.02844396682049044

In [None]:
증가한날 = 60
감소한날 = 40
전체일수 = 증가한날 + 감소한날
sp.stats.binom_test(52, 71, p=0.5, alternative="greater")

5.6134323447494e-05

##### 결론: 

2-1)  stats.binom_test(x, p=, alternative="greater") : 단측검정

In [None]:
p_value = sp.stats.binom_test(sim_increase.size, colder_days.size, p=0.5, alternative = "greater")
print(p_value)

5.6134323447494e-05


In [None]:
p_value = sp.stats.binom_test(sim_increase.size, colder_days.size, p=0.5, alternative = "less")
print(p_value)

0.9999805762086031


In [None]:
print(cold_simburum_df.simburum_diff.size)
print(colder_days.size)

50


AttributeError: 'int' object has no attribute 'size'

2-2)  stats.binom_test(x, p=, alternative="less") : 단측검정(조건 반전)

In [None]:
p_value = sp.stats.binom_test([colder_days.size, cold_simburum_df.simburum_diff.size], p=0.5, alternative = "less")
print(p_value)

1.0


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=aeda0379-ef07-4599-92da-f5608bf4c48d' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>