In [1]:
!pip install pmdarima

Collecting pmdarima
  Downloading pmdarima-2.0.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pmdarima
Successfully installed pmdarima-2.0.4


In [2]:
import pandas as pd
import warnings
from statsmodels.tools.sm_exceptions import ConvergenceWarning

# SettingWithCopyWarning 숨기기
warnings.simplefilter(action='ignore', category=pd.errors.SettingWithCopyWarning)

# UserWarning 숨기기 (ValueWarning 포함)
warnings.simplefilter(action='ignore', category=UserWarning)

# ConvergenceWarning 숨기기
warnings.simplefilter(action='ignore', category=ConvergenceWarning)

# FutureWarning 숨기기
warnings.simplefilter(action='ignore', category=FutureWarning)


In [3]:
import pandas as pd
from pmdarima import auto_arima
from statsmodels.tsa.arima.model import ARIMA

# 데이터 로드
train = pd.read_csv('/content/drive/MyDrive/Dev_course/3차 팀 프로젝트/train_pre.csv')
test = pd.read_csv('/content/drive/MyDrive/Dev_course/3차 팀 프로젝트/test.csv')
oil = pd.read_csv('/content/drive/MyDrive/Dev_course/3차 팀 프로젝트/oil_pre.csv')
holidays = pd.read_csv('/content/drive/MyDrive/Dev_course/3차 팀 프로젝트/holidays_events.csv')

# 기름 데이터 전처리
oil['date'] = pd.to_datetime(oil['date'])
oil.set_index('date', inplace=True)

# 공휴일 데이터 전처리
holidays['date'] = pd.to_datetime(holidays['date'])
national_holidays = holidays[(holidays['locale'] == 'National') & (holidays['type'] == 'Holiday')]
national_holidays = national_holidays[['date']].drop_duplicates()

# 공휴일 여부를 나타내는 함수
def is_holiday(date):
    return 1 if date in national_holidays['date'].values else 0

# 매장 번호 범위를 지정하고 예측값을 담을 리스트 초기화
store_numbers = range(30, 55)
all_predictions = []

# 각 매장 및 제품군별 반복
for store_nbr in store_numbers:
    unique_families = train[train['store_nbr'] == store_nbr]['family'].unique()

    for family in unique_families:
        # 판매 데이터 필터링 및 인덱스 설정
        train_filtered = train[(train['store_nbr'] == store_nbr) & (train['family'] == family)]
        train_filtered['date'] = pd.to_datetime(train_filtered['date'])
        train_filtered.set_index('date', inplace=True)
        train_filtered.sort_index(inplace=True)

        # 기름 데이터 병합
        train_exog = train_filtered.join(oil, how='left')
        train_exog['dcoilwtico'].fillna(method='ffill', inplace=True)
        train_exog['dcoilwtico'].fillna(method='bfill', inplace=True)

        # 공휴일 여부 열 추가
        train_exog['is_holiday'] = train_exog.index.to_series().apply(is_holiday)

        # Auto ARIMA로 최적의 파라미터 찾기
        auto_arima_model = auto_arima(
            train_exog['sales'],
            exogenous=train_exog[['dcoilwtico', 'is_holiday']],
            start_p=1, start_q=1,
            max_p=3, max_q=3,
            seasonal=False,
            d=None,
            trace=False,
            error_action='ignore',
            suppress_warnings=True,
            stepwise=False
        )
        best_p, best_d, best_q = auto_arima_model.order

        print(f"Store {store_nbr}, Family '{family}' - Parameters: p={best_p}, d={best_d}, q={best_q}")

        # 외생 변수를 포함하여 ARIMA 모델 학습
        arima_model = ARIMA(train_exog['sales'], order=(best_p, best_d, best_q), exog=train_exog[['dcoilwtico', 'is_holiday']])
        arima_fitted = arima_model.fit()

        # 테스트 데이터 준비 및 기름 데이터 병합
        test_filtered = test[(test['store_nbr'] == store_nbr) & (test['family'] == family)]
        test_filtered['date'] = pd.to_datetime(test_filtered['date'])
        test_filtered.set_index('date', inplace=True)
        test_filtered.sort_index(inplace=True)

        test_exog = test_filtered.join(oil, how='left')
        test_exog['dcoilwtico'].fillna(method='ffill', inplace=True)

        # 테스트 데이터에 공휴일 여부 추가
        test_exog['is_holiday'] = test_exog.index.to_series().apply(is_holiday)

        # 외생 변수를 사용하여 예측
        forecast_sales = arima_fitted.forecast(steps=len(test_exog), exog=test_exog[['dcoilwtico', 'is_holiday']])
        forecast_sales = forecast_sales.clip(lower=0)

        # 예측 결과를 테스트 데이터에 추가
        test_filtered['predicted_sales'] = forecast_sales.values
        test_filtered.reset_index(inplace=True)

        # 예측 결과를 최종 결과에 추가
        all_predictions.append(test_filtered[['id', 'predicted_sales']].rename(columns={'predicted_sales': 'sales'}))


# 결과를 CSV 파일로 저장
output_path = '/content/drive/MyDrive/Dev_course/3차 팀 프로젝트/submission_30_54.csv'
all_predictions.to_csv(output_path, index=False)

Store 30, Family 'AUTOMOTIVE' - Parameters: p=1, d=1, q=1
Store 30, Family 'BEAUTY' - Parameters: p=0, d=1, q=3
Store 30, Family 'BEVERAGES' - Parameters: p=2, d=1, q=3
Store 30, Family 'BREAD/BAKERY' - Parameters: p=2, d=1, q=3
Store 30, Family 'CLEANING' - Parameters: p=3, d=1, q=1
Store 30, Family 'DAIRY' - Parameters: p=2, d=1, q=3
Store 30, Family 'DELI' - Parameters: p=2, d=1, q=3
Store 30, Family 'EGGS' - Parameters: p=2, d=1, q=3
Store 30, Family 'FROZEN FOODS' - Parameters: p=1, d=0, q=2
Store 30, Family 'GROCERY I' - Parameters: p=2, d=1, q=3
Store 30, Family 'GROCERY II' - Parameters: p=1, d=0, q=1
Store 30, Family 'HARDWARE' - Parameters: p=0, d=1, q=3
Store 30, Family 'LINGERIE' - Parameters: p=3, d=1, q=2
Store 30, Family 'LIQUOR,WINE,BEER' - Parameters: p=3, d=1, q=0
Store 30, Family 'MEATS' - Parameters: p=2, d=1, q=3
Store 30, Family 'PERSONAL CARE' - Parameters: p=0, d=1, q=1
Store 30, Family 'POULTRY' - Parameters: p=2, d=1, q=3
Store 30, Family 'PREPARED FOODS' - Pa

AttributeError: 'list' object has no attribute 'to_csv'

In [4]:
submission = pd.concat(all_predictions, ignore_index=True)
output_path = '/content/drive/MyDrive/Dev_course/3차 팀 프로젝트/submission_30_54.csv'
submission.to_csv(output_path, index=False)

In [5]:
import pandas as pd
from pmdarima import auto_arima
from statsmodels.tsa.arima.model import ARIMA

# 데이터 로드
train = pd.read_csv('/content/drive/MyDrive/Dev_course/3차 팀 프로젝트/train_pre.csv')
test = pd.read_csv('/content/drive/MyDrive/Dev_course/3차 팀 프로젝트/test.csv')
oil = pd.read_csv('/content/drive/MyDrive/Dev_course/3차 팀 프로젝트/oil_pre.csv')

# 기름 데이터 전처리
oil['date'] = pd.to_datetime(oil['date'])
oil.set_index('date', inplace=True)

# 매장 번호 범위를 지정하고 예측값을 담을 리스트 초기화
store_numbers = range(1, 38)
all_predictions = []

# 각 매장 및 제품군별 반복
for store_nbr in store_numbers:
    unique_families = train[train['store_nbr'] == store_nbr]['family'].unique()

    for family in unique_families:
        # 판매 데이터 필터링 및 인덱스 설정
        train_filtered = train[(train['store_nbr'] == store_nbr) & (train['family'] == family)]
        train_filtered['date'] = pd.to_datetime(train_filtered['date'])
        train_filtered.set_index('date', inplace=True)
        train_filtered.sort_index(inplace=True)

        # 기름 데이터 병합
        train_exog = train_filtered.join(oil, how='left')
        train_exog['dcoilwtico'].fillna(method='ffill', inplace=True)
        train_exog['dcoilwtico'].fillna(method='bfill', inplace=True)

        # Auto ARIMA로 최적의 파라미터 찾기
        auto_arima_model = auto_arima(
            train_exog['sales'],
            exogenous=train_exog[['dcoilwtico']],
            start_p=1, start_q=1,
            max_p=3, max_q=3,
            seasonal=False,
            d=None,
            trace=False,
            error_action='ignore',
            suppress_warnings=True,
            stepwise=False
        )
        best_p, best_d, best_q = auto_arima_model.order

        print(f"Store {store_nbr}, Family '{family}' - Parameters: p={best_p}, d={best_d}, q={best_q}")

        # ARIMA 모델 학습
        arima_model = ARIMA(train_exog['sales'], order=(best_p, best_d, best_q), exog=train_exog[['dcoilwtico']])
        arima_fitted = arima_model.fit()

        # 예측 결과를 테스트 데이터에 추가
        test_filtered['predicted_sales'] = forecast_sales.values
        test_filtered.reset_index(inplace=True)

        # 예측 결과를 최종 결과에 추가
        all_predictions.append(test_filtered[['id', 'predicted_sales']].rename(columns={'predicted_sales': 'sales'}))

submission = pd.concat(all_predictions, ignore_index=True)
# 결과를 CSV 파일로 저장
output_path = '/content/drive/MyDrive/Dev_course/3차 팀 프로젝트/submission_nor1_27.csv'
all_predictions.to_csv(output_path, index=False)

Store 1, Family 'AUTOMOTIVE' - Parameters: p=0, d=1, q=3
Store 1, Family 'BEAUTY' - Parameters: p=2, d=1, q=3
Store 1, Family 'BEVERAGES' - Parameters: p=2, d=1, q=3


ValueError: cannot insert level_0, already exists