## 월별-시간별 기온과 매장 이용 비중 비교

<hr>

### 00. 기본 설정

In [12]:
# 데이터 분석을 위해 pandas를, 수치계산을 위해 numpy를, 시각화를 위해 seaborn을 import

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# 경고 메시지는 출력되지 않게 합니다.
import warnings
warnings.filterwarnings("ignore")

In [13]:
# 주피터 노트북에서 그래프가 보이는 설정
%matplotlib notebook

In [14]:
# 한글 font 설정

import platform
import matplotlib.font_manager as fm

#matplotlib 패키지 한글 깨짐 처리 시작
#------------------------------------------------------------------------------------
# 운영체제별 한글 폰트 설정

if platform.system() == 'Darwin': # Mac 환경 폰트 설정
    plt.rc('font', family='AppleGothic')
elif platform.system() == 'Windows': # Windows 환경 폰트 설정
    plt.rc('font', family='Malgun Gothic')
    
plt.rcParams['axes.unicode_minus'] = False #한글 폰트 사용시 마이너스 폰트 깨짐 해결

<hr>

### 01. 데이터 불러오기

In [15]:
df = pd.read_csv('../data/data_prep_with_avg_temp_final.csv')
df.head(3)

Unnamed: 0,YM,MCT_NM,OP_YMD,TYPE,MCT_TYPE,temp_05_11,temp_12_13,temp_14_17,temp_18_22,temp_23_04,...,HR_18_22_UE_CNT_RAT,HR_23_4_UE_CNT_RAT,LOCAL_UE_CNT_RAT,RC_M12_MAL_CUS_CNT_RAT,RC_M12_FME_CUS_CNT_RAT,RC_M12_AGE_UND_20_CUS_CNT_RAT,RC_M12_AGE_30_CUS_CNT_RAT,RC_M12_AGE_40_CUS_CNT_RAT,RC_M12_AGE_50_CUS_CNT_RAT,RC_M12_AGE_OVR_60_CUS_CNT_RAT
0,202301,희야네식당,20000916,T1,가정식,6.720276,9.508065,9.43871,7.305806,6.284865,...,0.0,0.0,0.381616,0.644,0.356,0.183,0.232,0.351,0.171,0.063
1,202301,희신이네,20060515,T1,가정식,6.720276,9.508065,9.43871,7.305806,6.284865,...,0.264706,0.0,0.701183,0.686,0.314,0.118,0.261,0.342,0.19,0.089
2,202301,흥미,20030814,T1,가정식,6.233641,8.633871,8.46129,6.666452,5.849189,...,0.09434,0.0,0.736842,0.562,0.438,0.107,0.299,0.336,0.226,0.032


In [16]:
# 컬럼명 확인
df.columns

Index(['YM', 'MCT_NM', 'OP_YMD', 'TYPE', 'MCT_TYPE', 'temp_05_11',
       'temp_12_13', 'temp_14_17', 'temp_18_22', 'temp_23_04', 'TEMP_AVG',
       'latitude', 'longitude', 'Polygon', 'area', 'ADDR', 'UE_CNT_GRP',
       'RANK_CNT', 'UE_AMT_GRP', 'RANK_AMT', 'UE_AMT_PER_TRSN_GRP',
       'RANK_MEAN', 'MON_UE_CNT_RAT', 'TUE_UE_CNT_RAT', 'WED_UE_CNT_RAT',
       'THU_UE_CNT_RAT', 'FRI_UE_CNT_RAT', 'SAT_UE_CNT_RAT', 'SUN_UE_CNT_RAT',
       'HR_5_11_UE_CNT_RAT', 'HR_12_13_UE_CNT_RAT', 'HR_14_17_UE_CNT_RAT',
       'HR_18_22_UE_CNT_RAT', 'HR_23_4_UE_CNT_RAT', 'LOCAL_UE_CNT_RAT',
       'RC_M12_MAL_CUS_CNT_RAT', 'RC_M12_FME_CUS_CNT_RAT',
       'RC_M12_AGE_UND_20_CUS_CNT_RAT', 'RC_M12_AGE_30_CUS_CNT_RAT',
       'RC_M12_AGE_40_CUS_CNT_RAT', 'RC_M12_AGE_50_CUS_CNT_RAT',
       'RC_M12_AGE_OVR_60_CUS_CNT_RAT'],
      dtype='object')

In [17]:
df.shape

(67857, 42)

<hr>

### 1월-12월 데이터프레임

In [31]:
df_jan = df[df['YM'] == 202301]
df_jan = df
df_jan

df_jan['area'].nunique()   # 비양도 없음

10

<hr>

### 02. 1월 데이터

In [7]:
# df_jan = df[df['YM'] == 202301]
df_jan = df
df_jan

Unnamed: 0,YM,MCT_NM,OP_YMD,TYPE,MCT_TYPE,temp_05_11,temp_12_13,temp_14_17,temp_18_22,temp_23_04,...,HR_18_22_UE_CNT_RAT,HR_23_4_UE_CNT_RAT,LOCAL_UE_CNT_RAT,RC_M12_MAL_CUS_CNT_RAT,RC_M12_FME_CUS_CNT_RAT,RC_M12_AGE_UND_20_CUS_CNT_RAT,RC_M12_AGE_30_CUS_CNT_RAT,RC_M12_AGE_40_CUS_CNT_RAT,RC_M12_AGE_50_CUS_CNT_RAT,RC_M12_AGE_OVR_60_CUS_CNT_RAT
0,202301,희야네식당,20000916,T1,가정식,6.720276,9.508065,9.438710,7.305806,6.284865,...,0.000000,0.000000,0.381616,0.644,0.356,0.183,0.232,0.351,0.171,0.063
1,202301,희신이네,20060515,T1,가정식,6.720276,9.508065,9.438710,7.305806,6.284865,...,0.264706,0.000000,0.701183,0.686,0.314,0.118,0.261,0.342,0.190,0.089
2,202301,흥미,20030814,T1,가정식,6.233641,8.633871,8.461290,6.666452,5.849189,...,0.094340,0.000000,0.736842,0.562,0.438,0.107,0.299,0.336,0.226,0.032
3,202301,흑심가,20220401,T1,가정식,6.538710,7.964516,7.733871,6.761935,6.317838,...,0.733333,0.033333,0.123656,0.544,0.456,0.239,0.300,0.233,0.139,0.089
4,202301,흑섬,20160419,T1,가정식,6.233641,8.633871,8.461290,6.666452,5.849189,...,0.806452,0.000000,0.317136,0.585,0.415,0.175,0.278,0.283,0.206,0.059
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67852,202312,한솥도시락 제주법원점,20190520,T8,도시락,9.019048,11.520000,10.990833,9.248000,8.204420,...,0.283237,0.000000,0.823671,0.510,0.490,0.192,0.213,0.338,0.223,0.033
67853,202312,한솥도시락 제주대정점,20160930,T8,도시락,9.367143,10.893333,10.532500,9.358000,8.856354,...,0.040000,0.000000,0.559322,0.659,0.341,0.167,0.227,0.273,0.197,0.136
67854,202312,포베이이도점,20130715,T9,동남아/인도음식,9.019048,11.520000,10.990833,9.248000,8.204420,...,0.245763,0.000000,0.872796,0.399,0.601,0.081,0.204,0.418,0.218,0.078
67855,202312,포베이서귀포신시가지점,20180821,T9,동남아/인도음식,9.402857,12.151667,11.806667,9.794667,8.750276,...,0.218045,0.000000,0.753606,0.479,0.521,0.089,0.258,0.398,0.175,0.080


In [8]:
df_jan['TEMP_AVG'].unique()

array([ 7.8515444 ,  7.16888853,  7.06337402,  6.30366372,  6.35512596,
        0.608851  ,  8.58062212,  8.94039589,  8.33031029,  7.64729592,
        9.84545408,  8.21786394,  7.85447449,  2.4145068 ,  9.5227315 ,
       10.33288265,  9.22591326, 11.88911674, 13.86466513, 12.74675883,
       12.74403533, 13.25893241, 11.21137327, 13.11534409,  8.34115361,
       14.30553456, 14.95217936, 15.95824762, 16.1812746 , 15.50952222,
       15.69267619, 15.67052857, 13.71741587, 10.81172857, 16.95146119,
       18.05383564, 19.53381874, 18.62236406, 19.09338402, 17.17768203,
       18.4417573 , 18.30029647, 15.05986561, 20.11798464, 21.44781429,
       23.58417335, 22.86936984, 22.75360476, 21.79508889, 18.82296984,
       21.00470118, 21.81807619, 23.52296782, 25.93473426, 26.3632765 ,
       26.73972043, 28.47346851, 25.21642059, 26.35662673, 28.17673272,
       27.65165131, 29.12419201, 28.29694163, 28.1121106 , 28.584298  ,
       27.57632361, 22.34784332, 29.6838725 , 25.23625565, 25.83

In [9]:
df_jan['area'].unique()   # 비양도 없음

array(['남부', '북부', '서부', '동부', '추자도', '산지', '마라도', '우도', '가파도', '비양도'],
      dtype=object)

<hr>

### 02-1. 지역별 월평균/시간대평균 기온과 MCT_TYPE별 순위

- `RANK_CNT`: 월별_업종별_이용건수_순위 분위수 구간
- `RANK_AMT`: 월별_업종별_이용금액_순위 분위수 구간
- `RANK_MEAN`: 월별_업종별_건당_평균_이용금액_순위 분위수 구간
    - 1: 상위 10% 이하
    - 2: 상위 10~25%
    - 3: 상위 25~50%
    - 4: 상위 50~75%
    - 5: 상위 75~90%
    - 6: 상위 90% 초과 (하위 10% 이하)

In [10]:
df_jan_rank = df_jan[['MCT_TYPE','area', 'temp_05_11','temp_12_13', 'temp_14_17',
                      'temp_18_22', 'temp_23_04', 'RANK_CNT', 'RANK_AMT', 'RANK_MEAN']]
df_jan_rank

Unnamed: 0,MCT_TYPE,area,temp_05_11,temp_12_13,temp_14_17,temp_18_22,temp_23_04,RANK_CNT,RANK_AMT,RANK_MEAN
0,가정식,남부,6.720276,9.508065,9.438710,7.305806,6.284865,3,4,4
1,가정식,남부,6.720276,9.508065,9.438710,7.305806,6.284865,3,4,4
2,가정식,북부,6.233641,8.633871,8.461290,6.666452,5.849189,4,4,4
3,가정식,서부,6.538710,7.964516,7.733871,6.761935,6.317838,5,3,2
4,가정식,북부,6.233641,8.633871,8.461290,6.666452,5.849189,5,3,2
...,...,...,...,...,...,...,...,...,...,...
67852,도시락,북부,9.019048,11.520000,10.990833,9.248000,8.204420,2,4,6
67853,도시락,서부,9.367143,10.893333,10.532500,9.358000,8.856354,5,4,2
67854,동남아/인도음식,북부,9.019048,11.520000,10.990833,9.248000,8.204420,3,3,4
67855,동남아/인도음식,남부,9.402857,12.151667,11.806667,9.794667,8.750276,2,3,4


In [11]:
# MCT_TYPE & area 별 그룹화하여 상관계수 계산
grouped_corrs = {}

for mct_type, group in df_jan_rank.groupby('MCT_TYPE'):
    # 시간대별 기온 열 선택
    temp_cols = ['temp_05_11', 'temp_12_13', 'temp_14_17', 'temp_18_22', 'temp_23_04']
    rank_cols = ['RANK_CNT', 'RANK_AMT', 'RANK_MEAN']
    
    # Spearman 상관계수 계산
    corr_matrix = group[temp_cols + rank_cols].corr(method='spearman')
    
    # 기온과 순위 열 간의 상관관계만 선택
    temp_rank_corr = corr_matrix.loc[temp_cols, rank_cols]
    
    # 결과 저장
    grouped_corrs[mct_type] = temp_rank_corr

In [12]:
# 결과 출력
for mct_type, corr in grouped_corrs.items():
    print(f"=== MCT_TYPE: {mct_type} ===")
    print(corr)
    print("\n")

=== MCT_TYPE: 가정식 ===
            RANK_CNT  RANK_AMT  RANK_MEAN
temp_05_11 -0.003531 -0.004937  -0.004535
temp_12_13 -0.000383 -0.002825  -0.005203
temp_14_17 -0.000447 -0.002737  -0.005242
temp_18_22 -0.001820 -0.003323  -0.005599
temp_23_04 -0.004260 -0.005604  -0.004652


=== MCT_TYPE: 구내식당/푸드코트 ===
            RANK_CNT  RANK_AMT  RANK_MEAN
temp_05_11 -0.007314 -0.012597   0.035438
temp_12_13  0.009625  0.016305   0.057481
temp_14_17  0.008504  0.017230   0.059610
temp_18_22  0.001575  0.001480   0.042486
temp_23_04 -0.014195 -0.026045   0.025164


=== MCT_TYPE: 기사식당 ===
            RANK_CNT  RANK_AMT  RANK_MEAN
temp_05_11 -0.057670 -0.303280  -0.279975
temp_12_13 -0.014417 -0.305096  -0.337406
temp_14_17 -0.014417 -0.305096  -0.337406
temp_18_22 -0.057670 -0.315387  -0.294333
temp_23_04 -0.043252 -0.284514  -0.251259


=== MCT_TYPE: 기타세계요리 ===
            RANK_CNT  RANK_AMT  RANK_MEAN
temp_05_11 -0.277074 -0.127853   0.271826
temp_12_13 -0.274149 -0.127917   0.266095
temp_14_17 -0.

In [41]:
df_jan_rank[df_jan_rank['MCT_TYPE'] == '주스'][['RANK_CNT']]


Unnamed: 0,RANK_CNT
2853,3
2854,2
2855,2
2856,3
2857,3
2858,4
5789,4
