In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split


In [3]:
# Sample data: user_id, item_id, and rating (or quantity)
data = {
    'user_id': [1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4],
    'item_id': [101, 102, 103, 101, 104, 102, 104, 105, 103, 104, 106],
    'rating': [5, 3, 4, 2, 5, 3, 4, 5, 2, 4, 5]  # Using ratings here, but it can be quantity as well
}

In [4]:
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/synthetic_supermarket_data.csv')

In [5]:
# Convert the data into a DataFrame
#df = pd.DataFrame(data)
print(df)


     user_id  item_id  rating
0         10       96       1
1         10       31       4
2          7       21       2
3         10       47       3
4          7       51       5
..       ...      ...     ...
995        4       78       1
996        8       15       2
997        5       79       4
998        7       81       3
999        2       17       4

[1000 rows x 3 columns]


In [6]:
# Map users and items to a unique ID
user_ids = df['user_id'].unique().tolist()
item_ids = df['item_id'].unique().tolist()

print(user_ids)
print(item_ids)

[10, 7, 9, 5, 8, 2, 4, 6, 1, 3]
[96, 31, 21, 47, 51, 67, 49, 80, 23, 78, 10, 77, 48, 25, 36, 20, 15, 24, 63, 94, 11, 42, 72, 19, 79, 12, 30, 84, 28, 38, 52, 16, 8, 88, 39, 83, 55, 95, 32, 92, 13, 69, 53, 7, 2, 57, 43, 74, 27, 100, 35, 87, 17, 61, 65, 34, 71, 76, 59, 18, 54, 40, 56, 4, 73, 75, 93, 90, 5, 68, 26, 1, 97, 85, 44, 98, 46, 66, 58, 64, 41, 45, 89, 37, 22, 29, 99, 9, 3, 91, 62, 50, 82, 14, 6, 33, 86, 70, 60, 81]


In [7]:
user_map = {user_id: idx for idx, user_id in enumerate(user_ids)}
item_map = {item_id: idx for idx, item_id in enumerate(item_ids)}

print(user_map)
print(item_map)

{10: 0, 7: 1, 9: 2, 5: 3, 8: 4, 2: 5, 4: 6, 6: 7, 1: 8, 3: 9}
{96: 0, 31: 1, 21: 2, 47: 3, 51: 4, 67: 5, 49: 6, 80: 7, 23: 8, 78: 9, 10: 10, 77: 11, 48: 12, 25: 13, 36: 14, 20: 15, 15: 16, 24: 17, 63: 18, 94: 19, 11: 20, 42: 21, 72: 22, 19: 23, 79: 24, 12: 25, 30: 26, 84: 27, 28: 28, 38: 29, 52: 30, 16: 31, 8: 32, 88: 33, 39: 34, 83: 35, 55: 36, 95: 37, 32: 38, 92: 39, 13: 40, 69: 41, 53: 42, 7: 43, 2: 44, 57: 45, 43: 46, 74: 47, 27: 48, 100: 49, 35: 50, 87: 51, 17: 52, 61: 53, 65: 54, 34: 55, 71: 56, 76: 57, 59: 58, 18: 59, 54: 60, 40: 61, 56: 62, 4: 63, 73: 64, 75: 65, 93: 66, 90: 67, 5: 68, 68: 69, 26: 70, 1: 71, 97: 72, 85: 73, 44: 74, 98: 75, 46: 76, 66: 77, 58: 78, 64: 79, 41: 80, 45: 81, 89: 82, 37: 83, 22: 84, 29: 85, 99: 86, 9: 87, 3: 88, 91: 89, 62: 90, 50: 91, 82: 92, 14: 93, 6: 94, 33: 95, 86: 96, 70: 97, 60: 98, 81: 99}


In [8]:
df['user_idx'] = df['user_id'].map(user_map)
df['item_idx'] = df['item_id'].map(item_map)


In [9]:
# Train-test split
train, test = train_test_split(df, test_size=0.2, random_state=42)

In [19]:
# Hyperparameters
num_users = len(user_ids)
num_items = len(item_ids)
embedding_dim = 100  # Size of latent factors
learning_rate = 0.001
batch_size = 256
epochs = 50

In [11]:
# TensorFlow model for matrix factorization
class MatrixFactorization(tf.keras.Model):
    def __init__(self, num_users, num_items, embedding_dim):
        super(MatrixFactorization, self).__init__()
        self.user_embedding = tf.keras.layers.Embedding(input_dim=num_users, output_dim=embedding_dim)
        self.item_embedding = tf.keras.layers.Embedding(input_dim=num_items, output_dim=embedding_dim)

    def call(self, inputs):
        user_vector = self.user_embedding(inputs[:, 0])
        item_vector = self.item_embedding(inputs[:, 1])
        dot_product = tf.reduce_sum(user_vector * item_vector, axis=1)
        return dot_product

In [12]:
# Prepare training data
train_data = tf.data.Dataset.from_tensor_slices((train[['user_idx', 'item_idx']].values, train['rating'].values))
train_data = train_data.shuffle(buffer_size=len(train)).batch(batch_size)

In [13]:
# Instantiate the model
model = MatrixFactorization(num_users, num_items, embedding_dim)


In [14]:
# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
              loss='mean_squared_error')

In [20]:
# Train the model
model.fit(train_data, epochs=epochs)

Epoch 1/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 7.0044
Epoch 2/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 6.5991
Epoch 3/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 6.2194
Epoch 4/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 5.5809
Epoch 5/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 5.3046
Epoch 6/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 4.8209
Epoch 7/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 4.3294 
Epoch 8/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 3.8886
Epoch 9/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 3.5452
Epoch 10/50
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 3.1829
Epoch 11/50
[1m4/4[0m [32

<keras.src.callbacks.history.History at 0x7ae5f41054e0>

In [16]:
# Function to recommend items for a given user
def recommend_items(user_id, model, num_recommendations=5):
    user_idx = user_map[user_id]

    # Predict scores for all items
    user_array = np.array([user_idx] * num_items)
    item_array = np.arange(num_items)

    predictions = model.predict(np.vstack((user_array, item_array)).T)

    # Get top N recommendations
    top_items = np.argsort(predictions)[-num_recommendations:][::-1]
    recommended_item_ids = [item_ids[i] for i in top_items]

    return recommended_item_ids


In [23]:
# Example: Recommend items for user 1
recommended_items = recommend_items(user_id=1, model=model)

print(f"Recommended items for user 1: {recommended_items}")

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
Recommended items for user 1: [57, 82, 79, 44, 38]
