In [None]:
# 이 프로젝트는 [Kaggle Groceries Dataset](https://www.kaggle.com/datasets/heeraldedhia/groceries-dataset) (GPL v2 라이선스) 기반으로 분석되었습니다.
# 이 분석 결과 및 파생 데이터/코드는 GPL v2 라이선스의 조건을 따릅니다.

In [None]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules
file_path = '/content/drive/MyDrive/Groceries_dataset.csv'
df = pd.read_csv(file_path)
df.head()

Unnamed: 0,Member_number,Date,itemDescription
0,1808,21-07-2015,tropical fruit
1,2552,05-01-2015,whole milk
2,2300,19-09-2015,pip fruit
3,1187,12-12-2015,other vegetables
4,3037,01-02-2015,whole milk


In [None]:
# Date 컬럼을 datetime 형식으로 변환 ( '21-07-2015' = '%d-%m-%Y')
df['Date'] = pd.to_datetime(df['Date'], format='%d-%m-%Y')

# 연도, 월, 일, 요일 컬럼 추가
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
df['Day'] = df['Date'].dt.day
df['Weekday'] = df['Date'].dt.day_name()   # (ex. Monday, Tuesday)

df.head()

Unnamed: 0,Member_number,Date,itemDescription,Year,Month,Day,Weekday
0,1808,2015-07-21,tropical fruit,2015,7,21,Tuesday
1,2552,2015-01-05,whole milk,2015,1,5,Monday
2,2300,2015-09-19,pip fruit,2015,9,19,Saturday
3,1187,2015-12-12,other vegetables,2015,12,12,Saturday
4,3037,2015-02-01,whole milk,2015,2,1,Sunday


In [None]:
# 계절(Season) 컬럼 추가 함수

def get_season(month):
    if month in [3, 4, 5]:
        return 'Spring'
    elif month in [6, 7, 8]:
        return 'Summer'
    elif month in [9, 10, 11]:
        return 'Autumn'
    elif month in [12, 1, 2]:
        return 'Winter'


df['Season'] = df['Month'].apply(get_season)

df.head()

Unnamed: 0,Member_number,Date,itemDescription,Year,Month,Day,Weekday,Season
0,1808,2015-07-21,tropical fruit,2015,7,21,Tuesday,Summer
1,2552,2015-01-05,whole milk,2015,1,5,Monday,Winter
2,2300,2015-09-19,pip fruit,2015,9,19,Saturday,Autumn
3,1187,2015-12-12,other vegetables,2015,12,12,Saturday,Winter
4,3037,2015-02-01,whole milk,2015,2,1,Sunday,Winter


In [None]:
df['YearMonth'] = df['Date'].dt.to_period('M')
df['WeekOfYear'] = df['Date'].dt.isocalendar().week
df['Quarter'] = df['Date'].dt.quarter
df['IsMonthStart'] = df['Date'].dt.is_month_start
df['IsMonthEnd'] = df['Date'].dt.is_month_end
df['IsWeekend'] = df['Date'].dt.weekday >= 5
df['DayOfYear'] = df['Date'].dt.dayofyear

df.head()

Unnamed: 0,Member_number,Date,itemDescription,Year,Month,Day,Weekday,Season,YearMonth,WeekOfYear,Quarter,IsMonthStart,IsMonthEnd,IsWeekend,DayOfYear
0,1808,2015-07-21,tropical fruit,2015,7,21,Tuesday,Summer,2015-07,30,3,False,False,False,202
1,2552,2015-01-05,whole milk,2015,1,5,Monday,Winter,2015-01,2,1,False,False,False,5
2,2300,2015-09-19,pip fruit,2015,9,19,Saturday,Autumn,2015-09,38,3,False,False,True,262
3,1187,2015-12-12,other vegetables,2015,12,12,Saturday,Winter,2015-12,50,4,False,False,True,346
4,3037,2015-02-01,whole milk,2015,2,1,Sunday,Winter,2015-02,5,1,True,False,True,32


In [None]:
years = df['Date'].dt.year.unique()
print(years)

[2014 2015]


In [None]:
# 데이터 셋을 올린 Heeral Dedhia 가 인도사람이기에 인도의 holiday로 가정

!pip install holidays

import pandas as pd
import holidays

# 날짜 파싱
df['Date'] = pd.to_datetime(df['Date'], format='%d-%m-%Y')

# 인도 공휴일 객체 만들기 (2014, 2015년만)
indian_holidays = holidays.India(years=[2014, 2015])

# 휴일 여부 플래그 생성
df['IsHoliday'] = df['Date'].isin(indian_holidays)

# 결과 확인
print(df[['Date', 'IsHoliday']].head())

            Date  IsHoliday
13331 2014-06-24      False
29480 2014-06-24      False
32851 2014-06-24      False
4843  2015-03-15      False
8395  2015-03-15      False


  df['IsHoliday'] = df['Date'].isin(indian_holidays)


In [None]:
df = df.sort_values(['Member_number', 'Date'])
df['PrevPurchaseDelta'] = df.groupby('Member_number')['Date'].diff().dt.days

df.head()

Unnamed: 0,Member_number,Date,itemDescription,Year,Month,Day,Weekday,Season,YearMonth,WeekOfYear,Quarter,IsMonthStart,IsMonthEnd,IsWeekend,DayOfYear,PrevPurchaseDelta,IsHoliday
13331,1000,2014-06-24,whole milk,2014,6,24,Tuesday,Summer,2014-06,26,2,False,False,False,175,,False
29480,1000,2014-06-24,pastry,2014,6,24,Tuesday,Summer,2014-06,26,2,False,False,False,175,0.0,False
32851,1000,2014-06-24,salty snack,2014,6,24,Tuesday,Summer,2014-06,26,2,False,False,False,175,0.0,False
4843,1000,2015-03-15,sausage,2015,3,15,Sunday,Spring,2015-03,11,1,False,False,True,74,264.0,False
8395,1000,2015-03-15,whole milk,2015,3,15,Sunday,Spring,2015-03,11,1,False,False,True,74,0.0,False


In [None]:
import os
print(os.getcwd())

/content


In [None]:
import os
print(os.listdir('/content'))


['.config', '.git', 'drive', '.ipynb_checkpoints', 'Association_Rules_Groceries', 'sample_data']


In [24]:
for root, dirs, files in os.walk('/content'):
    for file in files:
        print(os.path.join(root, file))

/content/.config/config_sentinel
/content/.config/.last_update_check.json
/content/.config/active_config
/content/.config/.last_opt_in_prompt.yaml
/content/.config/gce
/content/.config/default_configs.db
/content/.config/hidden_gcloud_config_universe_descriptor_data_cache_configs.db
/content/.config/.last_survey_prompt.yaml
/content/.config/configurations/config_default
/content/.config/logs/2025.06.17/13.36.54.470285.log
/content/.config/logs/2025.06.17/13.36.53.766289.log
/content/.config/logs/2025.06.17/13.36.10.034228.log
/content/.config/logs/2025.06.17/13.36.43.880673.log
/content/.config/logs/2025.06.17/13.36.32.451069.log
/content/.config/logs/2025.06.17/13.36.41.886635.log
/content/.git/index
/content/.git/description
/content/.git/HEAD
/content/.git/config
/content/.git/info/exclude
/content/.git/hooks/pre-merge-commit.sample
/content/.git/hooks/pre-push.sample
/content/.git/hooks/fsmonitor-watchman.sample
/content/.git/hooks/pre-applypatch.sample
/content/.git/hooks/pre-reba