In [9]:
import pandas as pd

df = pd.read_csv('../data/양파요약데이터_직팜정리.csv', comment='#', encoding='cp949')
df['날짜(YYYY-MM-DD)'] = pd.to_datetime(df['날짜(YYYY-MM-DD)'])
df = df.sort_values('날짜(YYYY-MM-DD)')

# 단가 컬럼 추가
df['단가'] = df['총가격(원)'] / df['단위총물량(kg)']


conda create -n ml-dl-nlp python=3.10 
python.exe -m pip install --upgrade pip 
pip install jupyter numpy==1.23.5 pandas==1.5.3  matplotlib==3.7.0 seaborn==0.12.2 scikit-learn==1.2.1
pip install tensorflow==2.10.0
conda install ipykernel
python -m ipykernel install --user --name ml-dl-nlp --display-name "ml-dl-nlp" 
conda install -c conda-forge imbalanced-learn=0.9.1
pip install xgboost==1.7.3
pip install lightgbm
pip install catboost==1.2

In [10]:
df = df[df['단위총물량(kg)'] != 0]
df = df.dropna(subset=['총가격(원)', '단위총물량(kg)'])


In [11]:
df['단가'] = df['총가격(원)'] / df['단위총물량(kg)']


In [12]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

df = df.replace([np.inf, -np.inf], np.nan)
df = df.dropna(subset=['단가'])
scaler = MinMaxScaler()
df['단가_scaled'] = scaler.fit_transform(df[['단가']])


In [13]:
def create_sequences(data, seq_length):
    X, y = [], []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)

seq_length = 30
prices = df['단가_scaled'].values
X, y = create_sequences(prices, seq_length)

# LSTM 입력 형태 맞추기: (samples, timesteps, features)
X = X.reshape((X.shape[0], X.shape[1], 1))


In [14]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

model = Sequential()
model.add(LSTM(64, activation='relu', input_shape=(seq_length, 1)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')


In [15]:
model.fit(X, y, epochs=20, batch_size=32)


Epoch 1/20

KeyboardInterrupt: 

In [None]:
predicted = model.predict(X[-1].reshape(1, seq_length, 1))
predicted_price = scaler.inverse_transform(predicted)[0][0]

print(f"예측된 1kg당 가격: {int(predicted_price):,} 원")
