In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from collections import deque
import matplotlib.pyplot as plt
import random
import gym
from gym import spaces

from sklearn.preprocessing import LabelEncoder

In [2]:
class ReplayBuffer:
    def __init__(self, capacity):
        self.buffer = deque(maxlen=capacity)

    def push(self, state, action, reward, next_state, done):
        self.buffer.append((state, action, reward, next_state, done))

    def sample(self, batch_size):
        my_buffer = []
        for i in range(batch_size):
            my_buffer.append(self.buffer[-batch_size+i])
        return my_buffer

    def __len__(self):
        return len(self.buffer)

In [3]:
import csv

menu_dict = {}
first = True
with open("./menu.csv", 'r', encoding='EUC-KR') as file:
    reader = csv.reader(file)
    for row in reader:
        if first:
          first=False
          continue
        idx, menu = row
        menu_dict[int(idx)] = menu
menu_dict

{0: '1명 닭꼬치',
 1: '간장치킨',
 2: '갈비탕',
 3: '강된장보리밥',
 4: '고구마치즈돈가스',
 5: '고구마피자',
 6: '고기',
 7: '고등어회',
 8: '곱창',
 9: '국밥',
 10: '김치볶음밥',
 11: '김치전',
 12: '김치찌개',
 13: '낙곱새',
 14: '냉면',
 15: '냉소바',
 16: '닭갈비',
 17: '닭강정',
 18: '닭꼬치',
 19: '닭똥집튀김',
 20: '닭발',
 21: '닭볶음탕',
 22: '닭우동',
 23: '대창',
 24: '덮밥',
 25: '돈가스',
 26: '돼지갈비',
 27: '돼지고기김치찜',
 28: '돼지국밥',
 29: '된장찌개',
 30: '두부김치',
 31: '떡볶이',
 32: '라면',
 33: '로제떡볶이',
 34: '로제마라샹궈',
 35: '마라떡볶이',
 36: '마라탕',
 37: '마제소바',
 38: '막국수',
 39: '멘보샤',
 40: '무뼈닭발',
 41: '바게트',
 42: '바나나 한개',
 43: '밥버거',
 44: '백반',
 45: '봉구스밥버거',
 46: '부대찌개',
 47: '분짜',
 48: '불고기백반',
 49: '불고기피자',
 50: '불닭게티',
 51: '비빔국수',
 52: '사케동',
 53: '삼각김밥',
 54: '삼겹살',
 55: '새우초밥',
 56: '샌드위치',
 57: '선지해장국',
 58: '설렁탕',
 59: '소고기덮밥',
 60: '소고기무국',
 61: '소곱창',
 62: '소바',
 63: '솥밥',
 64: '순대국',
 65: '순대국밥',
 66: '순두부찌개',
 67: '스테이크',
 68: '스파게티',
 69: '쌀국수',
 70: '씨리얼',
 71: '야끼소바',
 72: '양꼬치',
 73: '양념치킨',
 74: '양송이수프',
 75: '연어덮밥',
 76: '연어초밥',
 77: '연어회',
 78: '열무국수',
 7

In [4]:
# DQN 모델 정의 (이전에 정의한 것과 동일해야 함)
class DQN(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_dim, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, action_dim)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        return self.fc3(x)

# 에이전트 정의 (이전에 정의한 것과 동일해야 함)
class DQNAgent:
    def __init__(self, state_dim, action_dim, replay_buffer):
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.replay_buffer = replay_buffer
        self.model = DQN(state_dim, action_dim)
        self.optimizer = optim.Adam(self.model.parameters(), lr=0.0005)
        self.epsilon = 0.1  # 사용자 입력에서는 낮은 탐색 확률 사용
        self.epsilon_min = 0.01

    def select_action(self, state):
        state = torch.FloatTensor(state).unsqueeze(0)
        with torch.no_grad():
            q_values = self.model(state)
        action = q_values.max(1)[1].item()
        # top_k = 3
        # _, top_k_indices = q_values.topk(top_k, dim=1, largest=True, sorted=True)

        # # top_k_indices는 (1, 3) 형태의 텐서이므로, 이를 리스트로 변환
        # top_k_actions = top_k_indices.squeeze().tolist()

        return action #q_values #top_k_actions

    def update_model(self, batch_size):
        if len(self.replay_buffer) < batch_size:
            return
        batch = self.replay_buffer.sample(batch_size)
        states, actions, rewards, next_states, dones = zip(*batch)

        states = torch.FloatTensor(states)
        next_states = torch.FloatTensor(next_states)
        actions = torch.LongTensor(actions)
        rewards = torch.FloatTensor(rewards)
        dones = torch.FloatTensor(dones)

        print(self.model(states), actions.view(-1, 1))
        current_q_values = self.model(states).gather(1, actions.view(-1, 1)).squeeze(1)
        max_next_q_values = self.model(next_states).max(1)[0]
        expected_q_values = rewards + (1 - dones) * 0.99 * max_next_q_values

        loss = nn.MSELoss()(current_q_values, expected_q_values.detach())
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

# 모델 및 에이전트 초기화
state_dim = 6  # 상태 차원 (예시)
action_dim = len(menu_dict)  # 행동 차원 (예시, 메뉴 수)
replay_buffer = ReplayBuffer(10000)
agent = DQNAgent(state_dim, action_dim, replay_buffer)

# 체크포인트에서 모델 불러오기
checkpoint_path = './model/model_90000.pth'  # 예시 체크포인트 경로
checkpoint = torch.load(checkpoint_path)
agent.model.load_state_dict(checkpoint)
agent.model.eval()


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.0 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "C:\Users\gsuhy\AppData\Roaming\Python\Python312\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "C:\Users\gsuhy\AppData\Roaming\Python\Python312\site-packages\traitlets\config\application.py", line 1075, in launch_instance
    app.start()
  File "C:\Users\gsuhy\AppData\Roaming\Python\Python312\site-packages\ipykernel\kernelapp.py", line 739, in start
    self.io_lo

DQN(
  (fc1): Linear(in_features=6, out_features=128, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=128, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=116, bias=True)
)

In [5]:
import os
import torch

os.makedirs('./model', exist_ok=True)  # 디렉토리 생성
dummy_model = DQN(state_dim, action_dim)
torch.save(dummy_model.state_dict(), './model/model_90000.pth')

In [6]:
def get_user_input():
    feeling = float(input("기분을 숫자로 입력하세요 (예: 1~10, 1: 최악, 10: 최고): "))
    weather = float(input("날씨를 숫자로 입력하세요 (예: 0: 맑음, 1: 구름, 2: 흐림, 3: 비, 4: 눈): "))
    temp = float(input("체감 온도를 숫자로 입력하세요 (예: 1~10, 1: 추움, 10: 더움): "))
    time = float(input("체감 식사 시간대를 입력하세요 (0: 아침, 1: 점심, 2: 저녁, 3: 야식): "))
    num_people = float(input("인원수를 숫자로 입력하세요 (예: 1, 2, 3, 4 or 5명 이상: 5): "))
    gender = float(input("성별을 입력하세요 (0: 남성, 1: 여성): "))
    state = [feeling, weather, temp, time, num_people, gender]
    return state

def get_user_feedback(action):
    recommended_menus = menu_dict[action]
    feedback = int(input(f"추천된 메뉴: {recommended_menus}. 만족하셨나요? (1: 예, 0: 아니오): "))
    return feedback

In [7]:
# 사용자 입력 받기
state = get_user_input()

# 모델을 통해 메뉴 추천 받기
action = agent.select_action(state)

# 사용자 피드백 받기
reward = get_user_feedback(action)
done = True  # 단일 스텝이므로 에피소드 종료

# 리플레이 버퍼에 경험 추가
next_state = state  # 단일 스텝이므로 상태가 변하지 않음
agent.replay_buffer.push(state, action, reward, next_state, done)

In [8]:
# 사용자 피드백을 바탕으로 모델 업데이트   아직 구현 안됨
batch_size = 1  # 단일 사용자 피드백이므로 배치 크기를 1로 설정
print(action)
agent.update_model(batch_size)

20
tensor([[-1.4122, -1.5660, -1.2281, -2.0386, -1.6604, -1.4472, -1.4020, -1.6494,
         -2.3369, -1.8981, -0.6411, -1.4296, -1.6069, -2.1603, -2.4301, -1.0928,
         -2.3253, -1.0223, -1.2555, -1.3618, -0.5871, -1.1123, -1.1898, -1.2031,
         -1.4502, -1.1365, -1.2287, -1.3831, -1.1331, -1.7496, -1.5211, -3.8733,
         -1.4520, -1.3026, -3.1416, -1.9053, -1.4878, -2.6930, -1.6315, -1.5955,
         -1.5204, -1.0321, -1.1125, -1.2198, -1.4470, -1.2533, -1.2647, -1.0715,
         -1.4154, -1.0198, -1.8268, -1.2914, -0.9636, -1.1304, -2.8836, -1.6659,
         -1.7569, -1.3234, -1.1676, -1.2117, -3.6915, -1.4619, -1.3402, -1.2731,
         -1.0949, -1.1181, -1.7502, -1.2766, -1.2915, -1.6100, -1.8751, -1.1566,
         -1.3509, -1.6176, -1.5440, -1.4688, -1.2786, -1.7951, -1.3809, -1.6352,
         -1.5459, -1.5249, -1.2734, -1.3204, -1.6438, -1.3328, -1.4230, -1.5726,
         -0.9635, -1.1192, -1.7532, -1.3060, -1.5110, -1.0371, -1.7964, -1.6511,
         -2.9423, -1.2098