### Training and Uploading Models to Supabase

In [1]:
import pickle
import lzma
import tempfile
import io
import pandas as pd
import numpy as np
from dotenv import load_dotenv
from sklearn.metrics.pairwise import cosine_similarity
import os
from collections import defaultdict
from sklearn.cluster import KMeans
from supabase import create_client
import sklearn
import time
import tensorflow as tf
from tensorflow import keras

In [2]:
# Loading environment variables

load_dotenv()

url = os.getenv('SUPABASE_URL')
key = os.getenv('SUPABASE_KEY')
supabase = create_client(url, key)

# Function for loading rating data

def load_rating(batch_size=1000):

    response = supabase.table("Ratings").select("user", count="exact").execute()
    total_rows = response.count

    all_data = []
    for start in range(0, total_rows, batch_size):
        end = min(start + batch_size - 1, total_rows - 1)
        batch_response = supabase.table("Ratings").select("*").range(start, end).execute()
        
        if batch_response.data:
            all_data.extend(batch_response.data)
        else:
            break 

    df = pd.DataFrame(all_data)
    return df

# Function for loading user model map data

def load_user_model_map_by_userid(userid):
    response = supabase.table("User_Model_Map").select("*").eq("userid", userid).execute()

    if response.data:
        return pd.DataFrame(response.data)
    else:
        return pd.DataFrame()
    
# Function for loading course data 

def load_course():
    data = supabase.table("Course_Info").select("*").execute()
    return pd.DataFrame(data.data)

# Function for loading course bag of words data

def load_course_BOW(batch_size=1000):

    response = supabase.table("Course_BOW").select("doc_id", count="exact").execute()
    total_rows = response.count

    all_data = []
    for start in range(0, total_rows, batch_size):
        end = min(start + batch_size - 1, total_rows - 1)
        batch_response = supabase.table("Course_BOW").select("*").range(start, end).execute()
        
        if batch_response.data:
            all_data.extend(batch_response.data)
        else:
            break 

    df = pd.DataFrame(all_data)
    return df

# Function for loading course genre data

def load_course_genre():
    data = supabase.table("Course Genres").select("*").execute()
    return pd.DataFrame(data.data)

### Course Similarity Model (Initialization and Storing in Supabase Storage)

In [3]:
def course_similarity_train():

    bucket = "course-recommendation-models"
    file_name = "course_similarity_model.xz"

    course_df = load_course()
    bow_df = load_course_BOW()

    course_ids = course_df['COURSE_ID'].tolist()

    def get_id_idx_dict(bow_df):

        grouped_df = bow_df.groupby(['doc_index', 'doc_id']).max().reset_index(drop=False)
        idx_id_dict = grouped_df['doc_id'].to_dict()
        id_idx_dict = {v: k for k, v in idx_id_dict.items()}
        return id_idx_dict
    
    id_idx_dict = get_id_idx_dict(bow_df)


    bows_df = bow_df[['doc_id', 'token', 'bow']]
    dtm = bows_df.pivot_table(index='doc_id', columns='token', values='bow', fill_value=0)

    dtm = dtm.reindex(course_ids).fillna(0)
    similarity_matrix = cosine_similarity(dtm)

    obj = {
        "similarity_matrix": similarity_matrix,
        "id_idx_dict": id_idx_dict
    }

    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".xz")
    tmp.close()

    try:

        with lzma.open(tmp.name, "wb") as f:
            pickle.dump(obj, f)

        existing_files = [file["name"] for file in supabase.storage.from_(bucket).list()]

        with open(tmp.name, "rb") as f:

            if file_name in existing_files:
                supabase.storage.from_(bucket).update(file_name, f)
                status = "✅ Trained and Updated Course Similarity on Supabase Storage"
            else:
                f.seek(0)
                supabase.storage.from_(bucket).upload(file_name, f)
                status = "✅ Trained and Uploaded Course Similarity to Supabase Storage"

    except Exception as e:
        status = f"❌ Error during training or upload: {e}"
    finally:
        os.remove(tmp.name)

    return status

course_similarity_train()

'✅ Trained and Uploaded Course Similarity to Supabase Storage'

### User Profile Model (Initialization and Storing in Supabase Storage)

In [4]:
def user_profile_train():

    bucket = "course-recommendation-models"
    file_name = "user_profile_matrix.xz"

    users_df = load_rating()
    users_df.columns = ['User_ID', 'COURSE_ID', 'Rating']
    course_genres_df = load_course_genre()

    user_course_rating = users_df.pivot_table(index='User_ID', columns='COURSE_ID', values='Rating', fill_value=0.0)

    course_ids = course_genres_df['COURSE_ID'].values
    course_genres_matrix = course_genres_df.iloc[:, 2:].astype(float).values
    user_course_rating = user_course_rating.reindex(columns=course_ids, fill_value=0.0).astype(float)
    user_profiles = np.dot(user_course_rating.values, course_genres_matrix)

    profile_df = pd.DataFrame(
        user_profiles,
        columns=course_genres_df.columns[2:]
    )
    profile_df.insert(0, 'User_ID', user_course_rating.index)
    profile_df.reset_index(drop=True, inplace=True)

    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".xz")
    tmp.close()

    try:
        
        with lzma.open(tmp.name, "wb") as f:
            pickle.dump(profile_df, f)

        existing_files = [file["name"] for file in supabase.storage.from_(bucket).list()]

        with open(tmp.name, "rb") as f:

            if file_name in existing_files:
                supabase.storage.from_(bucket).update(file_name, f)
                status = "✅ Trained and Updated User Profiles on Supabase"
            else:
                f.seek(0)
                supabase.storage.from_(bucket).upload(file_name, f)
                status = "✅ Trained and Uploaded User Profiles to Supabase"

    except Exception as e:
        status = f"❌ Error during training/upload: {e}"
    finally:
        os.remove(tmp.name)

    return status

user_profile_train()

'✅ Trained and Uploaded User Profiles to Supabase'

### Clustering with and without PCA Model (Initialization and Storing in Supabase Storage)

In [5]:
def do_PCA(user_features_df, expected_variance = 90):

    expected_variance = expected_variance / 100
    n_com = 0

    for n_components in range(1, user_features_df.shape[1]):
        n_com = n_components
        pca = sklearn.decomposition.PCA(n_components=n_components)
        transformed_matrix = pca.fit_transform(user_features_df)
        if (sum(pca.explained_variance_ratio_) >= expected_variance): break

    transformed_df = pd.DataFrame(transformed_matrix)
    transformed_df.columns = [f"PC_{i}" for i in range(n_com)]

    return transformed_df

In [6]:
def kMeans_train(kMeans_model , n_clusters=25):

    bucket = "course-recommendation-models"

    if kMeans_model == 'Clustering with PCA':
        file_name = "kMeans_PCA_model.xz"
    else:
        file_name = "kMeans_model.xz"
    
    rating_df = load_rating()
    course_genres_df = load_course_genre()

    course_ids = course_genres_df['COURSE_ID'].values
    genre_cols = course_genres_df.columns[2:]
    course_genres_matrix = course_genres_df.iloc[:, 2:].astype(float).to_numpy()

    user_course_rating = (
        rating_df.pivot(index='user', columns='item', values='rating')
        .reindex(columns=course_ids, fill_value=0.0)
        .fillna(0.0)
        .astype(float)
    )

    user_course_rating = user_course_rating.sort_index()
    
    user_profile_matrix = np.dot(user_course_rating.values, course_genres_matrix)
    profile_df = pd.DataFrame(user_profile_matrix, columns=genre_cols)
    profile_df['User_ID'] = user_course_rating.index.values

    profile_df = profile_df[['User_ID'] + genre_cols.tolist()]
    
    feature_names = list(genre_cols)
    scaler = sklearn.preprocessing.StandardScaler()
    profile_df[feature_names] = scaler.fit_transform(profile_df[feature_names])

    user_ids2_idx = profile_df[['User_ID']]
    user_features_df = profile_df.drop(columns=['User_ID'])

    if kMeans_model == 'Clustering with PCA':
        user_features_df = do_PCA(user_features_df, 90)

    kmeans = KMeans(n_clusters=n_clusters)
    kmeans.fit(user_features_df)

    user_cluster_label = kmeans.labels_
    user_cluster_label_df = pd.DataFrame(user_cluster_label)
    user_cluster_label_df = pd.merge(user_ids2_idx, user_cluster_label_df, left_index=True, right_index=True)
    user_cluster_label_df.columns = ['user', 'cluster']
    
    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".xz")
    tmp.close()
    try:

        with lzma.open(tmp.name, "wb") as f:
            pickle.dump(user_cluster_label_df, f)

        existing_files = [file["name"] for file in supabase.storage.from_(bucket).list()]

        with open(tmp.name, "rb") as f:

            if file_name in existing_files:
                supabase.storage.from_(bucket).update(file_name, f)
                status = f"✅ Trained and Updated {kMeans_model} on Supabase"
            else:
                f.seek(0)
                supabase.storage.from_(bucket).upload(file_name, f)
                status = f"✅ Trained and Uploaded {kMeans_model} to Supabase"
        
    except Exception as e:
        status = f"❌ Error during training/upload: {e}"
    finally:
        os.remove(tmp.name)

    return status

kMeans_train('Clustering')

'✅ Trained and Uploaded Clustering to Supabase'

In [7]:
kMeans_train("Clustering with PCA")

'✅ Trained and Uploaded Clustering with PCA to Supabase'

### Neural Collaborative Filtering Model (Initialization and Storing in Supabase Storage)

In [8]:
def ncf_create(n_users: int, n_items: int,                           
               latent_dim_mf: int = 32, latent_dim_mlp: int = 32,   
               reg_mf: int = 0, reg_mlp: int = 0.001,                
               dense_layers: list = [16, 8, 4],                       
               reg_layers: list = [0.01, 0.01, 0.01],                    
               activation_dense: str = 'relu'                     
) -> keras.Model:

    user = keras.Input(shape=(), dtype='int32', name='user_id')
    item = keras.Input(shape=(), dtype='int32', name='item_id')

    mf_user_embedding = keras.layers.Embedding(input_dim = n_users,
                                  output_dim = latent_dim_mf,
                                  name = 'mf_user_embedding',
                                  embeddings_initializer = 'RandomNormal',
                                  embeddings_regularizer = keras.regularizers.l2(reg_mf)
                                 )
    
    mf_item_embedding = keras.layers.Embedding(input_dim = n_items,
                                  output_dim = latent_dim_mf,
                                  name = 'mf_item_embedding',
                                  embeddings_initializer = 'RandomNormal',
                                  embeddings_regularizer = keras.regularizers.l2(reg_mf)
                                 )

    mlp_user_embedding = keras.layers.Embedding(input_dim = n_users,
                                   output_dim = latent_dim_mlp,
                                   name = 'mlp_user_embedding',
                                   embeddings_initializer = 'RandomNormal',
                                   embeddings_regularizer = keras.regularizers.l2(reg_mlp)
                                  )
    mlp_item_embedding = keras.layers.Embedding(input_dim = n_items,
                                  output_dim = latent_dim_mlp,
                                  name = 'mlp_item_embedding',
                                  embeddings_initializer = 'RandomNormal',
                                  embeddings_regularizer = keras.regularizers.l2(reg_mlp)
                                 )

    mf_user_latent = keras.layers.Flatten()(mf_user_embedding(user))
    mf_item_latent = keras.layers.Flatten()(mf_item_embedding(item))

    mlp_user_latent = keras.layers.Flatten()(mlp_user_embedding(user))
    mlp_item_latent = keras.layers.Flatten()(mlp_item_embedding(item))

    mf_cat_latent = keras.layers.Multiply()([mf_user_latent, mf_item_latent])
    mlp_cat_latent = keras.layers.Concatenate()([mlp_user_latent, mlp_item_latent])

    mlp_vector = mlp_cat_latent
    for i in range(len(dense_layers)):
        layer = keras.layers.Dense(
                      units = dense_layers[i],
                      activation = activation_dense,
                      activity_regularizer = keras.regularizers.l2(reg_layers[i]),
                      name = 'layer%d' % i,
                     )
        mlp_vector = layer(mlp_vector)
    
    predict_layer = keras.layers.Concatenate()([mf_cat_latent, mlp_vector])
    result = keras.layers.Dense(
                   units = 1, 
                   activation = 'sigmoid',
                   kernel_initializer = 'lecun_uniform',
                   name = 'interaction' 
                  )

    output = result(predict_layer)

    model = keras.Model(inputs = [user, item],
                  outputs = [output]
                 )

    return model

In [9]:
def ncf_data_prep(df: pd.DataFrame) -> pd.DataFrame:

    df_uim = (df.pivot(index='user', columns='item', values='rating')
            .reset_index()
            .rename_axis(columns=None, index=None)
            .fillna(0)
        )

    old_cols = df_uim.columns[1:]
    new_cols = [i for i in range(len(old_cols))]
    items_id2idx = {old_cols[i]: new_cols[i] for i in range(len(old_cols))}
    df_uim = df_uim.rename(mapper=items_id2idx, axis=1)

    original_user_ids = df_uim['user'].tolist()
    user_id2idx = {user_id: idx for idx, user_id in enumerate(original_user_ids)}
    df_uim['user'] = df_uim['user'].map(user_id2idx)

    df_train = (pd.DataFrame(df_uim.iloc[:, 1:].stack())
                .reset_index()
                .sort_values(by='level_0')
                .rename({'level_0': 'user_id', 'level_1': 'item_id', 0: 'interaction'}, axis=1)
               )
    df_train['interaction'] = df_train['interaction'].apply(lambda x: 1.0 if x > 0 else 0.0)

    df_train['user_id'] = df_train['user_id'].astype('int')
    df_train['item_id'] = df_train['item_id'].astype('int')
    df_train['interaction'] = df_train['interaction'].astype('float32')

    return df_train.sort_values(by=['user_id', 'item_id']), user_id2idx, items_id2idx


def ncf_build_train_val_dataset(df: pd.DataFrame, val_split: float = 0.1, batch_size: int = 512, rs: int = 42):
    
    df['user_id'] = df['user_id'].astype('int32')
    df['item_id'] = df['item_id'].astype('int32')
    df['interaction'] = df['interaction'].astype('float32')

    if rs:
        df = df.sample(frac=1, random_state=rs).reset_index(drop=True)

    n_val = round(len(df) * val_split)
    x = {
        'user_id': df['user_id'].values,
        'item_id': df['item_id'].values
    }
    y = df['interaction'].values

    ds = tf.data.Dataset.from_tensor_slices((x, y))

    ds_val = ds.take(n_val).batch(batch_size)
    ds_train = ds.skip(n_val).batch(batch_size)

    return ds_train, ds_val

In [10]:
def ncf_train_model(ds_train, ds_val, n_epochs: int = 10):

    n_users, n_items = (load_rating()
                        .pivot(index='user', columns='item', values='rating')
                        .reset_index()
                        .rename_axis(index=None, columns=None)
                        .shape)
    
    ncf_model = ncf_create(n_users=n_users, n_items=n_items)
    ncf_model.compile(optimizer = "adam",
                    loss = 'binary_crossentropy',
                    metrics = [
                                tf.keras.metrics.TruePositives(name="tp"),
                                tf.keras.metrics.FalsePositives(name="fp"),
                                tf.keras.metrics.TrueNegatives(name="tn"),
                                tf.keras.metrics.FalseNegatives(name="fn"),
                                tf.keras.metrics.BinaryAccuracy(name="accuracy"),
                                tf.keras.metrics.Precision(name="precision"),
                                tf.keras.metrics.Recall(name="recall"),
                                tf.keras.metrics.AUC(name="auc"),
                                ]
                    )

    ncf_model._name = 'neural_collaborative_filtering'
    ncf_hist = ncf_model.fit(x=ds_train, 
                             validation_data=ds_val,
                             epochs=n_epochs,
                             verbose=1
                            )
    return ncf_model, ncf_hist

In [11]:
def check_file_exists(bucket: str, file_path: str) -> bool:
    
    try:
        if "/" in file_path:
            folder, file_name = file_path.rsplit("/", 1)
        else:
            folder, file_name = "", file_path
            
        file_list = supabase.storage.from_(bucket).list(path=folder)

        if not isinstance(file_list, list):
            return False

        file_names = [file['name'] for file in file_list]
        return file_name in file_names

    except Exception as e:
        return False
    
def load_model_metadata_from_supabase(bucket, file_name):
    try:
        res = supabase.storage.from_(bucket).download(file_name)
        raw = res if isinstance(res, bytes) else getattr(res, "data", None)
        buf = io.BytesIO(raw)
        with lzma.open(buf, "rb") as f:
            obj = pickle.load(f)

        return obj.get("trained_on", pd.DataFrame()), obj.get("user_id2idx", {})

    except Exception:
        return pd.DataFrame(), {}

def upload_model_and_mappings_to_supabase(model, df, user_id2idx, item_id2idx, bucket, file_name):
    status = ""

    with tempfile.NamedTemporaryFile(suffix=".keras", delete=False) as tmp_model:
        model.save(tmp_model.name) 
        model_path = tmp_model.name

    with open(model_path, "rb") as f_model:
        model_binary = f_model.read()

    obj = {
        "model": model_binary,
        "trained_on": df,
        "user_id2idx": user_id2idx,
        "item_id2idx": item_id2idx
    }

    tmp_bundle = tempfile.NamedTemporaryFile(delete=False, suffix=".xz")
    tmp_bundle.close()

    try:
        
        with lzma.open(tmp_bundle.name, "wb") as f:
            pickle.dump(obj, f)

        existing_files = [file["name"] for file in supabase.storage.from_(bucket).list()]

        with open(tmp_bundle.name, "rb") as f:

            if file_name in existing_files:
                supabase.storage.from_(bucket).update(file_name, f)
                status = f"✅ Trained and Updated NCF model to `{file_name}`"
            else:
                f.seek(0)
                supabase.storage.from_(bucket).upload(file_name, f)
                status = f"✅ Trained and Uploaded NCF model to `{file_name}`"

    except Exception as e:
        status = f"❌ Error during saving or upload: {e}"
    finally:
        os.remove(model_path)
        os.remove(tmp_bundle.name)

    return status

In [12]:
def NCF_train():
    
    bucket = "course-recommendation-models"
    file_name = "ncf_model.xz"

    def train_and_upload():

        df = load_rating()
        df_train, user_id2idx, item_id2idx = ncf_data_prep(df)
        ds_train, ds_val = ncf_build_train_val_dataset(df=df_train, val_split=0.1, rs=42)

        model, _ = ncf_train_model(ds_train=ds_train, ds_val=ds_val, n_epochs=10)

        return upload_model_and_mappings_to_supabase(
            model,
            df,
            user_id2idx=user_id2idx,
            item_id2idx=item_id2idx,
            bucket=bucket,
            file_name=file_name
        )

    if check_file_exists(bucket, file_name):

        ratings_df = load_rating()
        trained_on, user_map = load_model_metadata_from_supabase(bucket, file_name)

        new_users = set(ratings_df["user"].unique())
        old_users = set(user_map.keys())

        if new_users.issubset(old_users) and ratings_df.shape == trained_on.shape :
            return f"✅ No new users. NCF model already up-to-date at `{file_name}`"
        else:
            return train_and_upload()
    else:
        return train_and_upload()

NCF_train()

Epoch 1/10
[1m7509/7509[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m372s[0m 49ms/step - accuracy: 0.9433 - auc: 0.7030 - fn: 101772.8438 - fp: 2623.6624 - loss: 0.3700 - precision: 0.2831 - recall: 0.0187 - tn: 1815456.6250 - tp: 2962.7554 - val_accuracy: 0.9531 - val_auc: 0.9174 - val_fn: 17403.0000 - val_fp: 2626.0000 - val_loss: 0.1307 - val_precision: 0.6978 - val_recall: 0.2584 - val_tn: 401060.0000 - val_tp: 6064.0000
Epoch 2/10
[1m7509/7509[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m323s[0m 43ms/step - accuracy: 0.9579 - auc: 0.9371 - fn: 65778.8438 - fp: 13097.3809 - loss: 0.1157 - precision: 0.7405 - recall: 0.3485 - tn: 1804982.8750 - tp: 38956.7617 - val_accuracy: 0.9568 - val_auc: 0.9336 - val_fn: 14085.0000 - val_fp: 4372.0000 - val_loss: 0.1198 - val_precision: 0.6821 - val_recall: 0.3998 - val_tn: 399314.0000 - val_tp: 9382.0000
Epoch 3/10
[1m7509/7509[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m355s[0m 47ms/step - accuracy: 0.9666 - auc: 0.9640 - fn: 492

'✅ Trained and Uploaded NCF model to `ncf_model.xz`'

### Regression and Classification with Embedding Features Model (Initialization and Storing in Supabase Storage)

In [13]:
def emb_create(n_user, n_item, n_user_latent_dim: int = 16, n_item_latent_dim: int = 16, reg_users: int = 1e-6, reg_items: int = 1e-6) -> keras.Model:

    user_input = keras.layers.Input(shape=(), dtype='int32', name='user')
    item_input = keras.layers.Input(shape=(), dtype='int32', name='item')

    # USER
    user_embedding = keras.layers.Embedding(input_dim=n_user,
                    output_dim=n_user_latent_dim,
                    name='user_embedding',
                    embeddings_initializer="he_normal",
                    embeddings_regularizer=keras.regularizers.l2(reg_users)
                    )(user_input)
    
    user_vec = keras.layers.Flatten(name='user_flat')(user_embedding)

    user_bias = keras.layers.Embedding(input_dim=n_user,
                    output_dim=1,
                    name='user_bias',
                    embeddings_initializer="he_normal",
                    embeddings_regularizer=keras.regularizers.l2(reg_users)
                    )(user_input)
    
    user_model = keras.models.Model(inputs=user_input, outputs=user_vec)

    # ITEM
    item_embedding = keras.layers.Embedding(input_dim=n_item,
                    output_dim=n_item_latent_dim,
                    name='item_embedding',
                    embeddings_initializer="he_normal",
                    embeddings_regularizer=keras.regularizers.l2(reg_items)
                    )(item_input)

    item_bias = keras.layers.Embedding(input_dim=n_user,
                    output_dim=1,
                    name='item_bias',
                    embeddings_initializer="he_normal",
                    embeddings_regularizer=keras.regularizers.l2(reg_users)
                    )(item_input)

    merged = keras.layers.Dot(name='dot', normalize=True, axes=1)([user_embedding, item_embedding])
    merged_dropout = keras.layers.Dropout(0.2)(merged)

    #hidden layers
    dense_1 = keras.layers.Dense(units=64, name='Dense_1')(merged_dropout)
    do_1 = keras.layers.Dropout(0.2, name='Dropout_1')(dense_1)

    dense_2 = keras.layers.Dense(units=32, name='Dense_2')(do_1)
    do_2 = keras.layers.Dropout(0.2, name='Dropout_2')(dense_2)

    dense_3 = keras.layers.Dense(units=16, name='Dense_3')(do_2)
    do_3 = keras.layers.Dropout(0.2, name='Dropout_3')(dense_3)

    dense_4 = keras.layers.Dense(units=8, name='Dense_4')(do_3)

    result = keras.layers.Dense(1, name='rating', activation='relu')(dense_4)

    model = keras.models.Model(inputs=[user_input, item_input], outputs=[result])
    model._name = 'embedding_extraction_model'
    
    return model

In [14]:
def emb_data_prep(raw_data):
    
    encoded_data = raw_data.copy()

    user_id2idx = {x: i for i, x in enumerate(encoded_data["user"].unique())}
    item_id2idx = {x: i for i, x in enumerate(encoded_data["item"].unique())}

    encoded_data['user'] = encoded_data['user'].map(user_id2idx)
    encoded_data['item'] = encoded_data['item'].map(item_id2idx)

    return encoded_data, user_id2idx, item_id2idx

In [15]:
def emb_ds_create(df, val_split = 0.1, batch_size = 512, rs = 42):
    
    if rs: 
        df = df.sample(frac=1, random_state=rs).reset_index(drop=True)

    n_val = round(len(df) * val_split)

    x = {
        'user': df['user'].values,
        'item': df['item'].values
    }
    y = df['rating'].values

    ds = tf.data.Dataset.from_tensor_slices((x, y))

    ds_val = ds.take(n_val).batch(batch_size)
    ds_train = ds.skip(n_val).batch(batch_size)
    
    return ds_train, ds_val

In [16]:
def emb_train(ds_train, ds_val, num_users, num_items, epochs = 10, embedding_size = 16):

    emb_model = emb_create(n_user=num_users, n_item=num_items, n_item_latent_dim=embedding_size, n_user_latent_dim=embedding_size)
    emb_model.compile(loss=tf.keras.losses.MeanSquaredError(), optimizer=keras.optimizers.Adam(), metrics=[tf.keras.metrics.MeanSquaredError()])
    emb_model_hist = emb_model.fit(x = ds_train, validation_data = ds_val, epochs=epochs, verbose=1)

    return emb_model, emb_model_hist

In [17]:
def Embedding_train(model_name):

    bucket = "course-recommendation-models"

    if model_name == "Regression with Embedding Features":
        file_name = "regression_emb_model.xz"
    else:
        file_name = "classification_emb_model.xz"

    ratings_df = load_rating()

    num_users = len(ratings_df['user'].unique())
    num_items = len(ratings_df['item'].unique())

    encoded_data, user_id2idx, course_id2idx = emb_data_prep(ratings_df)
    ds_train, ds_val = emb_ds_create(encoded_data)

    emb, _ = emb_train(ds_train=ds_train, ds_val=ds_val, num_users=num_users, num_items=num_items)

    user_latent_features = emb.get_layer('user_embedding').get_weights()[0]
    item_latent_features = emb.get_layer('item_embedding').get_weights()[0]

    user_emb = pd.DataFrame(user_latent_features, columns=[f'User_Feature_{i}' for i in range(user_latent_features.shape[1])])
    user_emb.insert(0, 'User_ID', list(user_id2idx.keys()))

    item_emb = pd.DataFrame(item_latent_features, columns= [f'Course_Feature_{i}' for i in range(item_latent_features.shape[1])])
    item_emb.insert(0, 'Course_ID', list(course_id2idx.keys()))

    train_df = ratings_df.copy()

    user_emb_train_merged = pd.merge(train_df, user_emb, how='left', left_on='user', right_on='User_ID').fillna(0)
    merged_train_df = pd.merge(user_emb_train_merged, item_emb, how='left', left_on='item', right_on='Course_ID').fillna(0)

    u_features = [f"User_Feature_{i}" for i in range(user_emb.shape[1] - 1)]
    c_features = [f"Course_Feature_{i}" for i in range(item_emb.shape[1] - 1)]

    user_train_embeddings = merged_train_df[u_features]
    course_train_embeddings = merged_train_df[c_features]
    ratings_train = merged_train_df['rating']

    x_train = user_train_embeddings + course_train_embeddings.values
    x_train.columns = [f"Feature_{i}" for i in range(item_emb.shape[1] - 1)]
    y_train = ratings_train

    if model_name == "Regression with Embedding Features":
        rf_model = sklearn.ensemble.RandomForestRegressor(
            n_estimators=100,
            min_samples_split=2,
            min_samples_leaf=1,
            max_features='log2',
            max_depth=20,
            random_state=42
        )
    else:
        label_encoder = sklearn.preprocessing.LabelEncoder()
        y_train = label_encoder.fit_transform(y_train.values.ravel())
        rf_model = sklearn.ensemble.RandomForestClassifier(
            n_estimators=200,
            max_depth=20,
            max_features='sqrt',
            min_samples_split=2,
            random_state=42
        )

    rf_model.fit(x_train, y_train)

    obj = {
        "rf_model": rf_model,
        "user_emb": user_emb,
        "item_emb": item_emb
    }

    tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".xz")
    tmp.close()
    
    try:

        with lzma.open(tmp.name, "wb") as f:
            pickle.dump(obj, f)

        existing_files = [file["name"] for file in supabase.storage.from_(bucket).list()]

        with open(tmp.name, "rb") as f:

            if file_name in existing_files:
                supabase.storage.from_(bucket).update(file_name, f)
                status = f"✅ Trained and Updated {model_name} on Supabase"
            else:
                f.seek(0)
                supabase.storage.from_(bucket).upload(file_name, f)
                status = f"✅ Trained and Uploaded {model_name} to Supabase"

    except Exception as e:
        status = f"❌ Error during training/upload: {e}"
    finally:
        os.remove(tmp.name)

    return status


Embedding_train("Regression with Embedding Features")

Epoch 1/10
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 15ms/step - loss: 1.8920 - mean_squared_error: 1.8919 - val_loss: 0.0487 - val_mean_squared_error: 0.0486
Epoch 2/10
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 13ms/step - loss: 0.1912 - mean_squared_error: 0.1911 - val_loss: 0.0467 - val_mean_squared_error: 0.0465
Epoch 3/10
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 12ms/step - loss: 0.1173 - mean_squared_error: 0.1171 - val_loss: 0.0445 - val_mean_squared_error: 0.0444
Epoch 4/10
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - loss: 0.0713 - mean_squared_error: 0.0711 - val_loss: 0.0438 - val_mean_squared_error: 0.0436
Epoch 5/10
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 11ms/step - loss: 0.0517 - mean_squared_error: 0.0515 - val_loss: 0.0437 - val_mean_squared_error: 0.0436
Epoch 6/10
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 12ms/st

'✅ Trained and Uploaded Regression with Embedding Features to Supabase'

In [20]:
Embedding_train("Classification with Embedding Features")

Epoch 1/10
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 13ms/step - loss: 1.8199 - mean_squared_error: 1.8198 - val_loss: 0.0462 - val_mean_squared_error: 0.0461
Epoch 2/10
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 11ms/step - loss: 0.1829 - mean_squared_error: 0.1827 - val_loss: 0.0455 - val_mean_squared_error: 0.0454
Epoch 3/10
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 12ms/step - loss: 0.1134 - mean_squared_error: 0.1132 - val_loss: 0.0444 - val_mean_squared_error: 0.0442
Epoch 4/10
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 12ms/step - loss: 0.0783 - mean_squared_error: 0.0781 - val_loss: 0.0440 - val_mean_squared_error: 0.0438
Epoch 5/10
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 12ms/step - loss: 0.0574 - mean_squared_error: 0.0572 - val_loss: 0.0438 - val_mean_squared_error: 0.0436
Epoch 6/10
[1m411/411[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 14ms/ste

'✅ Trained and Uploaded Classification with Embedding Features to Supabase'