In [3]:
import pandas as pd

data_1 = pd.read_csv('../input/payco_23.csv')
data_2 = pd.read_csv('../input/payco_2304.csv')
df = pd.concat([data_1,data_2])
df = df[['사원번호','사용처']].rename({'사원번호':'userid', '사용처':'itemid'}, axis=1).reset_index()

In [4]:
# Step 1: Create user-item matrix
from pandas.api.types import CategoricalDtype
from scipy.sparse import csr_matrix

# Create a new DataFrame with frequency count for each user-item pair
df_grouped = df.groupby(['userid', 'itemid']).size().reset_index(name='frequency')

user_u = list(sorted(df_grouped.userid.unique()))
item_u = list(sorted(df_grouped.itemid.unique()))

user_c = CategoricalDtype(sorted(df_grouped['userid'].unique()), ordered=True)
item_c = CategoricalDtype(sorted(df_grouped['itemid'].unique()), ordered=True)

row = df_grouped['userid'].astype(user_c).cat.codes
col = df_grouped['itemid'].astype(item_c).cat.codes
data = df_grouped['frequency'].tolist()

sparse_matrix = csr_matrix((data, (row, col)), shape=(len(user_u), len(item_u)))

df_user_item = pd.DataFrame.sparse.from_spmatrix(sparse_matrix, index=user_u, columns=item_u)

In [5]:
# Step 2: Define AutoRec model
import torch
from torch import nn

class AutoRec(nn.Module):
    def __init__(self, num_inputs, hidden_units):
        super(AutoRec, self).__init__()

        self.encoder = nn.Linear(num_inputs, hidden_units)
        self.decoder = nn.Linear(hidden_units, num_inputs)
        
    def forward(self, x):
        x = torch.sigmoid(self.encoder(x))
        x = self.decoder(x)
        return x

In [6]:
# Step 3: Train and Test AutoRec model
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader, TensorDataset

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [7]:
# Hyperparameters
hidden_units = 500
num_epochs = 100
batch_size = 64
learning_rate = 1e-3

model = AutoRec(df_user_item.shape[1], hidden_units).to(device)
criterion = nn.MSELoss()
optimizer = Adam(model.parameters(), lr=learning_rate)


In [8]:
# Create DataLoaders
data = torch.FloatTensor(df_user_item.values).to(device)
dataset = TensorDataset(data)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [9]:
# Training
model.train()
for epoch in range(num_epochs):
    for i, (inputs,) in enumerate(dataloader):
        inputs = inputs.to(device)

        outputs = model(inputs)
        loss = criterion(outputs, inputs)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}")


Epoch 1/100, Loss: 0.1675979197025299
Epoch 2/100, Loss: 0.1058153510093689
Epoch 3/100, Loss: 0.06619088351726532
Epoch 4/100, Loss: 0.05609368532896042
Epoch 5/100, Loss: 0.042968522757291794
Epoch 6/100, Loss: 0.03719198331236839
Epoch 7/100, Loss: 0.017582252621650696
Epoch 8/100, Loss: 0.011145561933517456
Epoch 9/100, Loss: 0.013153713196516037
Epoch 10/100, Loss: 0.010720187798142433
Epoch 11/100, Loss: 0.0063661918975412846
Epoch 12/100, Loss: 0.007771084550768137
Epoch 13/100, Loss: 0.006385880522429943
Epoch 14/100, Loss: 0.00468917703256011
Epoch 15/100, Loss: 0.005302347242832184
Epoch 16/100, Loss: 0.004053828772157431
Epoch 17/100, Loss: 0.007856742478907108
Epoch 18/100, Loss: 0.005359386559575796
Epoch 19/100, Loss: 0.01242919359356165
Epoch 20/100, Loss: 0.0037751856725662947
Epoch 21/100, Loss: 0.008395659737288952
Epoch 22/100, Loss: 0.0026499277446419
Epoch 23/100, Loss: 0.002478748792782426
Epoch 24/100, Loss: 0.002113355090841651
Epoch 25/100, Loss: 0.004415900446

In [10]:
# Testing
model.eval()
with torch.no_grad():
    inputs = data
    outputs = model(inputs)
    print(outputs)


tensor([[ 2.8427e-03, -4.7674e-03, -1.2007e-03,  ..., -1.2878e-02,
         -1.5792e-03,  1.9722e-03],
        [ 1.3305e-03, -5.4564e-03, -6.8760e-03,  ..., -1.5616e-02,
         -3.1894e-03,  4.6495e-05],
        [ 2.8625e-03, -9.9125e-03, -1.0187e-03,  ..., -1.2099e-02,
         -3.4488e-03,  3.7934e-03],
        ...,
        [-6.1278e-04,  3.1359e-02,  1.1385e-02,  ..., -1.1529e-02,
         -4.4342e-04,  1.3491e-02],
        [ 4.0145e-03, -1.8813e-02, -6.6315e-03,  ..., -1.2514e-02,
         -1.7078e-03,  3.6300e-03],
        [ 3.6387e-03,  3.4081e-02, -7.7633e-03,  ..., -1.2935e-02,
         -9.5587e-03,  8.4596e-03]], device='cuda:0')


In [12]:
# Step 4: Generate recommendations
import numpy as np

def user_free_inference(items, df_user_item, model, top_k=10):
    # Create a new user vector
    user_vector = np.zeros(df_user_item.shape[1])
    item_indices = []

    # Set the chosen items to the maximum value
    for item in items:
        if item in df_user_item.columns:
            item_index = df_user_item.columns.get_loc(item)
            user_vector[item_index] = df_user_item.values.max()
            item_indices.append(item_index)
        else:
            raise ValueError(f"Item {item} not found in the data")

    # Convert to tensor and move to the correct device
    user_vector = torch.FloatTensor([user_vector]).to(device)

    # Generate recommendations
    with torch.no_grad():
        outputs = model(user_vector)
        predicted_ratings = outputs.cpu().numpy()[0]

    # Remove the chosen items from the predictions
    predicted_ratings[item_indices] = -np.inf

    top_k_item_indices = np.argsort(-predicted_ratings)[:top_k]
    recommended_items = df_user_item.columns[top_k_item_indices]
    recommended_scores = predicted_ratings[top_k_item_indices]

    # Convert item and score to dictionary
    item_score_dict = dict(zip(recommended_items.tolist(), recommended_scores.tolist()))

    # Print each item and it score
    for item, score in item_score_dict.items():
        print(f"{item}: {score}")

In [13]:
# Get recommendations
item_list = [
    '킨파',
    '서호돈가스',
    '버거킹(판교유스페이스)',
    '일상화식'
]

user_free_inference(item_list, df_user_item, model)


(주)엔바이콘 판교순대: 2.1938366889953613
써브웨이(판교브릿지타워점): 1.1317442655563354
봉추찜닭(판교테크노밸리점): 0.9974612593650818
제주은희네해장국(판교점): 0.9058394432067871
(주)엔바이콘 하림닭요리: 0.8551080822944641
(주)엔바이콘 왕스덕: 0.758247435092926
듬박이찌개(판교점): 0.7211755514144897
(주)오전오후: 0.48437389731407166
일류(판교점): 0.45968949794769287
쿠차라(판교카카오점): 0.409889280796051
