In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm

dataset = pd.read_csv('주유소_평균판매가격_제품별.csv', encoding='cp949')

# FE

In [2]:
def FE(dataset):
    # 년, 월, 일
    data = dataset.__deepcopy__()
    data['year'] = data['구분'].str.split('년').str[0]
    data['month'] = data['구분'].str.split('년').str[1].str.split('월').str[0]
    data['day'] = data['구분'].str.split('년').str[1].str.split('월').str[1].str.split('일').str[0]
    
    # timestemp
    data['date'] = pd.to_datetime(data[['year', 'month', 'day']])
    data.set_index('date', inplace=True)
    
    def calculate_rolling(data, window_sizes):
        for window in window_sizes:
            # 평균
            data[f'고급휘발유_{window}주평균'] = data['고급휘발유'].rolling(window=window).mean()
            data[f'보통휘발유_{window}주평균'] = data['보통휘발유'].rolling(window=window).mean()
            data[f'자동차용경유_{window}주평균'] = data['자동차용경유'].rolling(window=window).mean()
            data[f'실내등유_{window}주평균'] = data['실내등유'].rolling(window=window).mean()
            
            # 표준편차
            data[f'고급휘발유_{window}주표준편차'] = data['고급휘발유'].rolling(window=window).std()
            data[f'보통휘발유_{window}주표준편차'] = data['보통휘발유'].rolling(window=window).std()
            data[f'자동차용경유_{window}주표준편차'] = data['자동차용경유'].rolling(window=window).std()
            data[f'실내등유_{window}주표준편차'] = data['실내등유'].rolling(window=window).std()

            # 중위수
            data[f'고급휘발유_{window}주표준편차'] = data['고급휘발유'].rolling(window=window).median()
            data[f'보통휘발유_{window}주표준편차'] = data['보통휘발유'].rolling(window=window).median()
            data[f'자동차용경유_{window}주표준편차'] = data['자동차용경유'].rolling(window=window).median()
            data[f'실내등유_{window}주표준편차'] = data['실내등유'].rolling(window=window).median()
            
            # 시계열분해
            data[f'고급휘발유_{window}추세'] = sm.tsa.seasonal_decompose(data[['고급휘발유']], model='additive', period=window).trend
            data[f'보통휘발유_{window}추세'] = sm.tsa.seasonal_decompose(data[['보통휘발유']], model='additive', period=window).trend
            data[f'자동차용경유_{window}추세'] = sm.tsa.seasonal_decompose(data[['자동차용경유']], model='additive', period=window).trend
            data[f'실내등유_{window}추세'] = sm.tsa.seasonal_decompose(data[['실내등유']], model='additive', period=window).trend
            
            data[f'고급휘발유_{window}계절성'] = sm.tsa.seasonal_decompose(data[['고급휘발유']], model='additive', period=window).seasonal
            data[f'보통휘발유_{window}계절성'] = sm.tsa.seasonal_decompose(data[['보통휘발유']], model='additive', period=window).seasonal
            data[f'자동차용경유_{window}계절성'] = sm.tsa.seasonal_decompose(data[['자동차용경유']], model='additive', period=window).seasonal
            data[f'실내등유_{window}계절성'] = sm.tsa.seasonal_decompose(data[['실내등유']], model='additive', period=window).seasonal
            
            data[f'고급휘발유_{window}불규칙'] = sm.tsa.seasonal_decompose(data[['고급휘발유']], model='additive', period=window).resid
            data[f'보통휘발유_{window}불규칙'] = sm.tsa.seasonal_decompose(data[['보통휘발유']], model='additive', period=window).resid
            data[f'자동차용경유_{window}불규칙'] = sm.tsa.seasonal_decompose(data[['자동차용경유']], model='additive', period=window).resid
            data[f'실내등유_{window}불규칙'] = sm.tsa.seasonal_decompose(data[['실내등유']], model='additive', period=window).resid
        return data
    
    # 주 단위 및 월 단위 윈도우 크기 설정
    window_sizes = [7, 14, 21, 28, 28*3, 28*6, 28*9, 28*12]

    # 평균 및 표준편차 계산
    data = calculate_rolling(data, window_sizes)
    data.drop('구분', axis=1, inplace=True)
    
    data['year'] = data['year'].astype('int')
    data['month'] = data['month'].astype('int')
    data['day'] = data['day'].astype('int')

    return data


train = FE(dataset.iloc[:-30]).iloc[28*12:]
test = []
for i in range(1, 30, 1):
    test.append(FE(dataset.iloc[:-30+i]).iloc[28*12:])

# data = FE(dataset)
# data['구분'].str.split('년').str[1].str.split('월').str[1].str.split('일').str[0]
# data['고급휘발유'].rolling(window=7).mean().shift(14)
# pd.to_datetime(data[['년', '월', '일']])
# sm.tsa.seasonal_decompose(data['고급휘발유'], model='additive')
# data

  data[f'실내등유_{window}계절성'] = sm.tsa.seasonal_decompose(data[['실내등유']], model='additive', period=window).seasonal
  data[f'고급휘발유_{window}불규칙'] = sm.tsa.seasonal_decompose(data[['고급휘발유']], model='additive', period=window).resid
  data[f'보통휘발유_{window}불규칙'] = sm.tsa.seasonal_decompose(data[['보통휘발유']], model='additive', period=window).resid
  data[f'자동차용경유_{window}불규칙'] = sm.tsa.seasonal_decompose(data[['자동차용경유']], model='additive', period=window).resid
  data[f'실내등유_{window}불규칙'] = sm.tsa.seasonal_decompose(data[['실내등유']], model='additive', period=window).resid
  data[f'고급휘발유_{window}주평균'] = data['고급휘발유'].rolling(window=window).mean()
  data[f'보통휘발유_{window}주평균'] = data['보통휘발유'].rolling(window=window).mean()
  data[f'자동차용경유_{window}주평균'] = data['자동차용경유'].rolling(window=window).mean()
  data[f'실내등유_{window}주평균'] = data['실내등유'].rolling(window=window).mean()
  data[f'고급휘발유_{window}주표준편차'] = data['고급휘발유'].rolling(window=window).std()
  data[f'보통휘발유_{window}주표준편차'] = data['보통휘발유'].rolling(win

In [3]:
train

Unnamed: 0_level_0,고급휘발유,보통휘발유,자동차용경유,실내등유,year,month,day,고급휘발유_7주평균,보통휘발유_7주평균,자동차용경유_7주평균,실내등유_7주평균,고급휘발유_7주표준편차,보통휘발유_7주표준편차,자동차용경유_7주표준편차,실내등유_7주표준편차,고급휘발유_7추세,보통휘발유_7추세,자동차용경유_7추세,실내등유_7추세,고급휘발유_7계절성,보통휘발유_7계절성,자동차용경유_7계절성,실내등유_7계절성,고급휘발유_7불규칙,보통휘발유_7불규칙,자동차용경유_7불규칙,실내등유_7불규칙,고급휘발유_14주평균,보통휘발유_14주평균,자동차용경유_14주평균,실내등유_14주평균,고급휘발유_14주표준편차,보통휘발유_14주표준편차,자동차용경유_14주표준편차,실내등유_14주표준편차,고급휘발유_14추세,보통휘발유_14추세,자동차용경유_14추세,실내등유_14추세,고급휘발유_14계절성,...,고급휘발유_252주평균,보통휘발유_252주평균,자동차용경유_252주평균,실내등유_252주평균,고급휘발유_252주표준편차,보통휘발유_252주표준편차,자동차용경유_252주표준편차,실내등유_252주표준편차,고급휘발유_252추세,보통휘발유_252추세,자동차용경유_252추세,실내등유_252추세,고급휘발유_252계절성,보통휘발유_252계절성,자동차용경유_252계절성,실내등유_252계절성,고급휘발유_252불규칙,보통휘발유_252불규칙,자동차용경유_252불규칙,실내등유_252불규칙,고급휘발유_336주평균,보통휘발유_336주평균,자동차용경유_336주평균,실내등유_336주평균,고급휘발유_336주표준편차,보통휘발유_336주표준편차,자동차용경유_336주표준편차,실내등유_336주표준편차,고급휘발유_336추세,보통휘발유_336추세,자동차용경유_336추세,실내등유_336추세,고급휘발유_336계절성,보통휘발유_336계절성,자동차용경유_336계절성,실내등유_336계절성,고급휘발유_336불규칙,보통휘발유_336불규칙,자동차용경유_336불규칙,실내등유_336불규칙
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
2009-03-17,1734.19,1536.08,1306.57,935.21,2009,3,17,1734.327143,1535.368571,1307.304286,937.991429,1734.26,1535.66,1307.57,938.19,1731.250000,1534.431429,1305.557143,934.090000,0.257801,0.088190,0.090871,-0.021431,2.682199,1.560382,0.921986,1.141431,1732.132857,1533.027857,1306.880714,937.637143,1734.000,1533.985,1307.550,938.440,1729.674643,1532.686071,1304.116429,933.607500,0.332672,...,1786.630992,1584.563333,1510.376786,1171.781627,1740.220,1545.425,1431.955,1148.005,1700.657897,1498.029821,1339.800833,957.008175,6.177965,6.538359,8.450824,2.228584,27.354139,31.511819,-41.681657,-24.026759,1843.193988,1646.416369,1584.007500,1228.057351,1902.975,1705.550,1636.305,1270.245,1739.803646,1538.113408,1380.128601,998.958348,-12.221746,-15.124098,-13.073579,-2.171892,6.608100,13.090690,-60.485023,-61.576457
2009-03-18,1733.94,1535.66,1305.93,934.04,2009,3,18,1734.271429,1535.571429,1307.051429,937.170000,1734.19,1535.66,1307.14,937.24,1729.711429,1533.414286,1304.355714,931.965714,0.263981,0.075868,0.109752,0.068822,3.964590,2.169847,1.464533,2.005463,1733.199286,1533.796429,1307.166429,937.922143,1734.000,1534.275,1307.550,938.440,1728.748214,1532.225714,1303.367857,933.208929,0.420137,...,1785.096984,1583.020198,1507.936032,1169.409365,1737.265,1539.090,1425.515,1142.820,1700.962044,1498.331091,1339.715159,956.349544,5.750443,6.159119,8.087719,2.142278,27.227513,31.169790,-41.872878,-24.451822,1842.784911,1645.950625,1583.129881,1227.359643,1902.975,1705.550,1636.305,1270.245,1739.769211,1538.084583,1379.613259,998.034583,-13.028574,-15.700029,-14.014759,-2.524514,7.199363,13.275446,-59.668500,-61.470069
2009-03-19,1725.60,1531.65,1303.46,928.88,2009,3,19,1732.840000,1535.191429,1306.525714,935.975714,1734.03,1535.66,1306.74,935.25,1727.432857,1531.998571,1302.868571,930.290000,0.306047,-0.003286,0.011005,0.056574,-2.138904,-0.345286,0.580423,-1.466574,1732.956429,1534.055000,1307.047857,937.434286,1734.000,1534.275,1307.550,938.440,1728.516429,1532.000357,1302.835714,932.922143,0.622107,...,1783.455040,1581.418690,1505.443571,1166.963095,1735.845,1536.040,1416.965,1137.990,1701.305992,1498.662897,1339.647579,955.703909,5.521522,5.628304,7.579524,1.830198,18.772486,27.358799,-43.767103,-28.654107,1842.341637,1645.489583,1582.263601,1226.629405,1902.975,1705.550,1636.305,1270.245,1739.705937,1538.044926,1379.085595,997.101577,-13.464032,-16.449018,-15.121121,-2.935523,-0.641905,10.054093,-60.504475,-65.286054
2009-03-20,1722.90,1530.09,1300.93,927.74,2009,3,20,1731.250000,1534.431429,1305.557143,934.090000,1733.94,1535.66,1306.57,935.21,1725.777143,1530.800000,1301.542857,928.981429,-0.098284,-0.102149,-0.018002,0.144984,-2.778859,-0.607851,-0.594856,-1.386412,1732.466429,1534.031429,1306.620000,936.652143,1734.000,1534.275,1307.550,937.855,1729.044643,1532.068929,1302.675000,932.656786,0.148412,...,1781.781111,1579.787103,1502.917976,1164.480952,1735.575,1535.940,1409.565,1133.615,1701.711389,1499.046230,1339.609881,955.071845,5.103487,5.297761,7.305779,1.795056,16.085124,25.746008,-45.985660,-29.126901,1841.880595,1645.014613,1581.367113,1225.880685,1902.975,1705.550,1636.305,1270.245,1739.614568,1537.993542,1378.546339,996.160432,-13.966059,-16.898096,-15.866262,-3.112384,-2.748509,8.994554,-61.750078,-65.308047
2009-03-21,1721.56,1528.54,1299.16,924.45,2009,3,21,1729.711429,1533.414286,1304.355714,931.965714,1733.94,1535.66,1305.93,934.04,1723.998571,1529.542857,1300.177143,928.768571,-0.327009,-0.052137,-0.054137,0.131229,-2.111562,-0.950720,-0.963006,-4.449800,1731.836429,1533.796429,1305.998571,935.545000,1734.000,1534.275,1307.335,937.380,1729.832143,1532.285714,1302.787500,932.331071,-0.128674,...,1780.101389,1578.132937,1500.367937,1161.961111,1734.930,1535.770,1405.570,1130.115,1702.157460,1499.476091,1339.600119,954.462103,4.813732,5.181892,7.036690,1.779230,14.588808,23.882017,-47.476809,-31.791333,1841.407887,1644.525417,1580.451786,1225.103482,1902.975,1705.550,1636.305,1270.245,1739.527976,1537.937753,1378.005789,995.220000,-14.153179,-17.028319,-16.517895,-3.256666,-3.814798,7.630566,-62.327894,-67.513334
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-07-15,1958.78,1713.04,1548.59,1353.66,2024,7,15,1957.458571,1710.624286,1545.508571,1352.864286,1957.44,1710.92,1546.05,1352.81,1957.885714,1712.530000,1548.002857,1353.305714,-0.153414,-0.018943,-0.003076,-0.221650,1.047700,0.528943,0.590219,0.575936,1951.125000,1702.621429,1536.678571,1351.818571,1955.890,1706.225,1540.115,1351.975,,,,,-0.229075,...,1908.380397,1644.891865,1531.211190,1370.302738,1910.715,1644.185,1535.680,1365.090,,,,,4.269573,0.789588,1.226334,3.791686,,,,,1930.511369,1674.474315,1564.783333,1377.895119,1926.880,1669.195,1541.130,1365.930,,,,,-19.538423,-24.520544,-22.102645,-15.298438,,,,
2024-07-16,1959.23,1713.32,1548.83,1353.62,2024,7,16,1957.714286,1711.421429,1546.502857,1353.064286,1958.01,1711.48,1546.79,1352.82,1958.048571,1712.957143,1548.570000,1353.482857,0.257801,0.088190,0.090871,-0.021431,0.923628,0.274667,0.169129,0.158574,1953.186429,1704.757143,1539.007857,1352.098571,1956.325,1708.450,1542.655,1352.345,,,,,0.332672,...,1908.309802,1644.861349,1530.760913,1369.977738,1910.715,1644.185,1535.680,1365.010,,,,,5.566182,1.446502,1.854660,3.928708,,,,,1930.503839,1674.428958,1564.662917,1377.935804,1926.880,1669.195,1541.130,1365.930,,,,,-17.815847,-23.599749,-20.745956,-14.541380,,,,
2024-07-17,1958.34,1713.46,1549.23,1353.22,2024,7,17,1957.732857,1712.035714,1547.330000,1353.171429,1958.01,1711.93,1547.09,1353.22,,,,,0.263981,0.075868,0.109752,0.068822,,,,,1955.099286,1706.705000,1541.129286,1352.318571,1956.810,1709.630,1544.085,1352.550,,,,,0.420137,...,1908.251071,1644.852341,1530.324802,1369.653849,1910.715,1644.185,1535.680,1364.975,,,,,6.211332,2.090825,2.409690,4.473884,,,,,1930.486964,1674.375744,1564.525208,1377.971071,1926.880,1669.195,1541.130,1365.930,,,,,-17.342882,-22.843121,-19.460448,-13.893016,,,,
2024-07-18,1958.19,1713.56,1549.44,1353.57,2024,7,18,1957.885714,1712.530000,1548.002857,1353.305714,1958.19,1713.04,1548.59,1353.44,,,,,0.306047,-0.003286,0.011005,0.056574,,,,,1956.061429,1708.265000,1542.886429,1352.552857,1957.280,1710.510,1545.390,1352.720,,,,,0.622107,...,1908.207619,1644.868532,1529.908214,1369.335675,1910.715,1644.185,1535.680,1364.965,,,,,7.183433,2.630384,2.939390,4.860945,,,,,1930.456964,1674.314345,1564.370536,1378.000506,1926.880,1669.195,1541.130,1365.930,,,,,-17.072636,-22.493189,-18.521783,-13.250697,,,,


In [4]:
y = dataset.shift(-1).iloc[28*12:]
y

Unnamed: 0,구분,고급휘발유,보통휘발유,자동차용경유,실내등유
336,2009년03월18일,1733.94,1535.66,1305.93,934.04
337,2009년03월19일,1725.60,1531.65,1303.46,928.88
338,2009년03월20일,1722.90,1530.09,1300.93,927.74
339,2009년03월21일,1721.56,1528.54,1299.16,924.45
340,2009년03월22일,1719.58,1526.09,1297.29,923.52
...,...,...,...,...,...
5965,2024년08월15일,1943.15,1694.46,1532.19,1350.97
5966,2024년08월16일,1943.09,1693.88,1531.77,1351.06
5967,2024년08월17일,1939.98,1692.92,1530.64,1350.89
5968,2024년08월18일,1939.83,1692.56,1530.03,1350.48


In [5]:
train = train.fillna(0)

In [8]:
print(train.dtypes)
train.select_dtypes(exclude=['object'])

고급휘발유            float64
보통휘발유            float64
자동차용경유           float64
실내등유             float64
year               int32
                  ...   
실내등유_336계절성      float64
고급휘발유_336불규칙     float64
보통휘발유_336불규칙     float64
자동차용경유_336불규칙    float64
실내등유_336불규칙      float64
Length: 167, dtype: object


Unnamed: 0_level_0,고급휘발유,보통휘발유,자동차용경유,실내등유,year,month,day,고급휘발유_7주평균,보통휘발유_7주평균,자동차용경유_7주평균,...,자동차용경유_336추세,실내등유_336추세,고급휘발유_336계절성,보통휘발유_336계절성,자동차용경유_336계절성,실내등유_336계절성,고급휘발유_336불규칙,보통휘발유_336불규칙,자동차용경유_336불규칙,실내등유_336불규칙
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2009-03-17,1734.19,1536.08,1306.57,935.21,2009,3,17,1734.327143,1535.368571,1307.304286,...,1380.128601,998.958348,-12.221746,-15.124098,-13.073579,-2.171892,6.608100,13.090690,-60.485023,-61.576457
2009-03-18,1733.94,1535.66,1305.93,934.04,2009,3,18,1734.271429,1535.571429,1307.051429,...,1379.613259,998.034583,-13.028574,-15.700029,-14.014759,-2.524514,7.199363,13.275446,-59.668500,-61.470069
2009-03-19,1725.60,1531.65,1303.46,928.88,2009,3,19,1732.840000,1535.191429,1306.525714,...,1379.085595,997.101577,-13.464032,-16.449018,-15.121121,-2.935523,-0.641905,10.054093,-60.504475,-65.286054
2009-03-20,1722.90,1530.09,1300.93,927.74,2009,3,20,1731.250000,1534.431429,1305.557143,...,1378.546339,996.160432,-13.966059,-16.898096,-15.866262,-3.112384,-2.748509,8.994554,-61.750078,-65.308047
2009-03-21,1721.56,1528.54,1299.16,924.45,2009,3,21,1729.711429,1533.414286,1304.355714,...,1378.005789,995.220000,-14.153179,-17.028319,-16.517895,-3.256666,-3.814798,7.630566,-62.327894,-67.513334
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-07-15,1958.78,1713.04,1548.59,1353.66,2024,7,15,1957.458571,1710.624286,1545.508571,...,0.000000,0.000000,-19.538423,-24.520544,-22.102645,-15.298438,0.000000,0.000000,0.000000,0.000000
2024-07-16,1959.23,1713.32,1548.83,1353.62,2024,7,16,1957.714286,1711.421429,1546.502857,...,0.000000,0.000000,-17.815847,-23.599749,-20.745956,-14.541380,0.000000,0.000000,0.000000,0.000000
2024-07-17,1958.34,1713.46,1549.23,1353.22,2024,7,17,1957.732857,1712.035714,1547.330000,...,0.000000,0.000000,-17.342882,-22.843121,-19.460448,-13.893016,0.000000,0.000000,0.000000,0.000000
2024-07-18,1958.19,1713.56,1549.44,1353.57,2024,7,18,1957.885714,1712.530000,1548.002857,...,0.000000,0.000000,-17.072636,-22.493189,-18.521783,-13.250697,0.000000,0.000000,0.000000,0.000000


# Train

In [14]:
import xgboost as xgb
from sklearn.model_selection import train_test_split

X_train, X_valid, y_train, y_valid = train_test_split(train, y.iloc[:-30]['자동차용경유'],  test_size=0.036, random_state=23, shuffle=False)


model = xgb.XGBRegressor(
    objective='reg:squarederror',
    n_estimators=1000000,  # 최대 에포크 수
    early_stopping_rounds=10  # 조기 중단 기준
    , eval_metric="mape"
    , 
)

# 조기 중단을 위한 평가 데이터 설정
evals = [(X_train, y_train), (X_valid, y_valid)]

# 모델 학습
model.fit(X_train, y_train, eval_set=evals, verbose=True)



[0]	validation_0-mape:0.09358	validation_1-mape:0.01784
[1]	validation_0-mape:0.06567	validation_1-mape:0.01252
[2]	validation_0-mape:0.04608	validation_1-mape:0.00893
[3]	validation_0-mape:0.03234	validation_1-mape:0.00632
[4]	validation_0-mape:0.02271	validation_1-mape:0.00468
[5]	validation_0-mape:0.01595	validation_1-mape:0.00358
[6]	validation_0-mape:0.01122	validation_1-mape:0.00283
[7]	validation_0-mape:0.00790	validation_1-mape:0.00227
[8]	validation_0-mape:0.00559	validation_1-mape:0.00190
[9]	validation_0-mape:0.00397	validation_1-mape:0.00172
[10]	validation_0-mape:0.00286	validation_1-mape:0.00173
[11]	validation_0-mape:0.00211	validation_1-mape:0.00170
[12]	validation_0-mape:0.00163	validation_1-mape:0.00173
[13]	validation_0-mape:0.00132	validation_1-mape:0.00176
[14]	validation_0-mape:0.00113	validation_1-mape:0.00177
[15]	validation_0-mape:0.00101	validation_1-mape:0.00179
[16]	validation_0-mape:0.00093	validation_1-mape:0.00180
[17]	validation_0-mape:0.00087	validation

In [16]:
test1 = test[0]
# model.predict(test1)
model.predict(test1.iloc[-1:])

array([1705.8013], dtype=float32)

In [7]:
from tpot import TPOTRegressor
from sklearn.model_selection import train_test_split

X_train, X_valid, y_train, y_valid = train_test_split(train, y.iloc[:-30]['자동차용경유'],  test_size=0.036, random_state=23, shuffle=False)


tpot = TPOTRegressor(generations=5, population_size=50, verbosity=3)
tpot.fit(X_train, y_train)

                                                                               
Generation 1 - Current best internal CV score: -0.45151167089426336
                                                                                
Generation 2 - Current best internal CV score: -0.4499683097063425
                                                                                  
Generation 3 - Current best internal CV score: -0.4499683097063425
Optimization Progress:  73%|███████▎  | 220/300 [1:30:12<08:50,  6.63s/pipeline]

# dummy

In [None]:
def FE(data):
    # 년, 월, 일
    data['year'] = data['구분'].str.split('년').str[0]
    data['month'] = data['구분'].str.split('년').str[1].str.split('월').str[0]
    data['day'] = data['구분'].str.split('년').str[1].str.split('월').str[1].str.split('일').str[0]
    
    # 지난 1주 평균
    data = data['고급휘발유_1주'].rolling(window=7).mean()
    data = data['보통휘발유_1주'].rolling(window=7).mean()
    data = data['자동차용경유_1주'].rolling(window=7).mean()
    data = data['실내등유_1주'].rolling(window=7).mean()
    
    # 지난 2주 평균
    data = data['고급휘발유_2주'].rolling(window=14).mean()
    data = data['보통휘발유_2주'].rolling(window=14).mean()
    data = data['자동차용경유_2주'].rolling(window=14).mean()
    data = data['실내등유_2주'].rolling(window=14).mean()
    
    # 지난 3주 평균
    data = data['고급휘발유_3주'].rolling(window=21).mean()
    data = data['보통휘발유_3주'].rolling(window=21).mean()
    data = data['자동차용경유_3주'].rolling(window=21).mean()
    data = data['실내등유_3주'].rolling(window=21).mean()
    
    # 지난 4주 평균
    data = data['고급휘발유_4주'].rolling(window=28).mean()
    data = data['보통휘발유_4주'].rolling(window=28).mean()
    data = data['자동차용경유_4주'].rolling(window=28).mean()
    data = data['실내등유_4주'].rolling(window=28).mean()
    
    # 지난 3달 평균
    data = data['고급휘발유_3달'].rolling(window=28*3).mean()
    data = data['보통휘발유_3달'].rolling(window=28*3).mean()
    data = data['자동차용경유_3달'].rolling(window=28*3).mean()
    data = data['실내등유_3달'].rolling(window=28*3).mean()

    # 지난 6달 평균
    data = data['고급휘발유_6달'].rolling(window=28*6).mean()
    data = data['보통휘발유_6달'].rolling(window=28*6).mean()
    data = data['자동차용경유_6달'].rolling(window=28*6).mean()
    data = data['실내등유_6달'].rolling(window=28*6).mean()

    # 지난 9달 평균    
    data = data['고급휘발유_9달'].rolling(window=28*9).mean()
    data = data['보통휘발유_9달'].rolling(window=28*9).mean()
    data = data['자동차용경유_9달'].rolling(window=28*9).mean()
    data = data['실내등유_9달'].rolling(window=28*9).mean()

    # 지난 1년 평균
    data = data['고급휘발유_1년'].rolling(window=28*12).mean()
    data = data['보통휘발유_1년'].rolling(window=28*12).mean()
    data = data['자동차용경유_1년'].rolling(window=28*12).mean()
    data = data['실내등유_1년'].rolling(window=28*12).mean()


    pass

# data['구분'].str.split('년').str[1].str.split('월').str[1].str.split('일').str[0]
data['고급휘발유'].rolling(window=7).mean().shift(14)
# data

0               NaN
1               NaN
2               NaN
3               NaN
4               NaN
           ...     
5965    1955.511429
5966    1955.167143
5967    1954.858571
5968    1954.540000
5969    1954.165714
Name: 고급휘발유, Length: 5970, dtype: float64