In [1]:
import pandas as pd

data_1 = pd.read_csv('./input/payco_23.csv')
data_2 = pd.read_csv('./input/payco_2304.csv')
df = pd.concat([data_1,data_2])
df = df[['사원번호','사용처']].rename({'사원번호':'userid', '사용처':'itemid'}, axis=1).reset_index()

In [2]:
# Step 1: Create user-item matrix
from pandas.api.types import CategoricalDtype
from scipy.sparse import csr_matrix

# Create a new DataFrame with frequency count for each user-item pair
df_grouped = df.groupby(['userid', 'itemid']).size().reset_index(name='frequency')

user_u = list(sorted(df_grouped.userid.unique()))
item_u = list(sorted(df_grouped.itemid.unique()))

user_c = CategoricalDtype(sorted(df_grouped['userid'].unique()), ordered=True)
item_c = CategoricalDtype(sorted(df_grouped['itemid'].unique()), ordered=True)

row = df_grouped['userid'].astype(user_c).cat.codes
col = df_grouped['itemid'].astype(item_c).cat.codes
data = df_grouped['frequency'].tolist()

sparse_matrix = csr_matrix((data, (row, col)), shape=(len(user_u), len(item_u)))

df_user_item = pd.DataFrame.sparse.from_spmatrix(sparse_matrix, index=user_u, columns=item_u)

In [3]:
# Step 2: Define AutoRec model
import torch
from torch import nn

class AutoRec(nn.Module):
    def __init__(self, num_inputs, hidden_units):
        super(AutoRec, self).__init__()

        self.encoder = nn.Linear(num_inputs, hidden_units)
        self.decoder = nn.Linear(hidden_units, num_inputs)
        
    def forward(self, x):
        x = torch.sigmoid(self.encoder(x))
        x = self.decoder(x)
        return x

In [4]:
# Step 3: Train and Test AutoRec model
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader, TensorDataset

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
# Hyperparameters
hidden_units = 500
num_epochs = 100
batch_size = 64
learning_rate = 1e-3

model = AutoRec(df_user_item.shape[1], hidden_units).to(device)
criterion = nn.MSELoss()
optimizer = Adam(model.parameters(), lr=learning_rate)


In [6]:
# Create DataLoaders
data = torch.FloatTensor(df_user_item.values).to(device)
dataset = TensorDataset(data)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [7]:
# Training
model.train()
for epoch in range(num_epochs):
    for i, (inputs,) in enumerate(dataloader):
        inputs = inputs.to(device)

        outputs = model(inputs)
        loss = criterion(outputs, inputs)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}")


Epoch 1/100, Loss: 0.17751044034957886
Epoch 2/100, Loss: 0.10845330357551575
Epoch 3/100, Loss: 0.0975332260131836
Epoch 4/100, Loss: 0.05658097192645073
Epoch 5/100, Loss: 0.031433962285518646
Epoch 6/100, Loss: 0.09726953506469727
Epoch 7/100, Loss: 0.03251245990395546
Epoch 8/100, Loss: 0.018965357914566994
Epoch 9/100, Loss: 0.015315351076424122
Epoch 10/100, Loss: 0.009196609258651733
Epoch 11/100, Loss: 0.010769040323793888
Epoch 12/100, Loss: 0.009781679138541222
Epoch 13/100, Loss: 0.005064076744019985
Epoch 14/100, Loss: 0.007658516056835651
Epoch 15/100, Loss: 0.005749749951064587
Epoch 16/100, Loss: 0.00856492854654789
Epoch 17/100, Loss: 0.0035773131530731916
Epoch 18/100, Loss: 0.0034561119973659515
Epoch 19/100, Loss: 0.0032567933667451143
Epoch 20/100, Loss: 0.007260635029524565
Epoch 21/100, Loss: 0.0023437871132045984
Epoch 22/100, Loss: 0.0031590082217007875
Epoch 23/100, Loss: 0.008594038896262646
Epoch 24/100, Loss: 0.0021084053441882133
Epoch 25/100, Loss: 0.00551

In [8]:
# Testing
model.eval()
with torch.no_grad():
    inputs = data
    outputs = model(inputs)
    print(outputs)


tensor([[ 0.0107,  0.0004, -0.0245,  ..., -0.0036, -0.0033,  0.0127],
        [ 0.0117,  0.0085, -0.0224,  ..., -0.0070, -0.0041,  0.0104],
        [ 0.0124, -0.0017, -0.0269,  ..., -0.0041, -0.0018,  0.0110],
        ...,
        [ 0.0124,  0.0393, -0.0340,  ...,  0.0030,  0.0040,  0.0100],
        [ 0.0132, -0.0047, -0.0287,  ..., -0.0029, -0.0035,  0.0104],
        [ 0.0136,  0.0089,  0.0094,  ..., -0.0087, -0.0093,  0.0099]],
       device='cuda:0')


In [18]:
# Step 4: Generate recommendations
import numpy as np

def user_free_inference(items, df_user_item, model, top_k=10):
    # Create a new user vector
    user_vector = np.zeros(df_user_item.shape[1])
    item_indices = []
    
    # Set the chosen items to the maximum value
    for item in items:
        if item in df_user_item.columns:
            item_index = df_user_item.columns.get_loc(item)
            user_vector[item_index] = df_user_item.values.max()
            item_indices.append(item_index)

    # Convert to tensor and move to the correct device
    user_vector = torch.FloatTensor([user_vector]).to(device)
    
    # Generate recommendations
    with torch.no_grad():
        outputs = model(user_vector)
        predicted_ratings = outputs.cpu().numpy()[0]

    # Remove the chosen items from the predictions
    predicted_ratings[item_indices] = -np.inf

    top_k_item_indices = np.argsort(-predicted_ratings)[:top_k]
    recommended_items = df_user_item.columns[top_k_item_indices]
    recommended_scores = predicted_ratings[top_k_item_indices]

    # Convert item and score to dictionary
    item_score_dict = dict(zip(recommended_items.tolist(), recommended_scores.tolist()))

    return item_score_dict

In [26]:
# Get recommendations for user with ID 1
print(user_free_inference(['최고야 전국5대짬뽕','버거킹(판교유스페이스)','서호돈가스'
                           ], 
                           df_user_item,
                            model))


{'킨파': 1.2579219341278076, '제주은희네해장국(판교점)': 1.2477407455444336, '(주)엔바이콘 판교순대': 1.152186393737793, '(주)엔바이콘 하이포크': 0.8855391144752502, '(주)엔바이콘 혼키라멘': 0.8552912473678589, '맥도날드(판교테크노밸리점)': 0.7202772498130798, '차이나오라': 0.6833038926124573, '하코야(판교우림점)': 0.6160076856613159, '춘업순대국과 0627 부대찌개': 0.5844380855560303, '조마루감자탕(판교점)': 0.5349248647689819}
