In [16]:
import joblib
import pandas as pd
import numpy as np
import re
import string
from scipy.sparse import hstack
from functools import partial

In [17]:
import warnings
warnings.filterwarnings("ignore")

In [18]:
from functools import partial

In [19]:
from dotenv import load_dotenv

from pathlib import Path

env_path = Path("../.env-live")
# env_path = Path("../../.env-live-g1")
# env_path = Path("../../.env-live-g2")
# env_path = Path("../../.env-live-g3")
# env_path = Path("../../.env-live-g4")
# env_path = Path("../../.env-live-g5")
# env_path = Path("../../.env-live-g6")


if env_path.exists():
    load_dotenv(dotenv_path=env_path)

In [20]:
import os
os.environ['JRJ_MODEL_REGISTRY_S3_BUCKET_NAME']

'273-g5'

In [21]:
from jrjModelRegistry import handleDashboard, jrjRouterModelRegistry
from jrjModelRegistry.jrjModelRegistry import registerAJrjModel

In [8]:
# Load trained models
logreg = joblib.load('../models/logistic_regression.pkl')
rf = joblib.load('../models/random_forest.pkl')
xgb = joblib.load('../models/xgboost.pkl')

# Load TF-IDF vectorizer and stopwords
tfidf = joblib.load('../models/tfidf_vectorizer.pkl')
stop_words = set(open('stopwords_en.txt').read().splitlines())

In [9]:
def netflix_transformer(data):
    def clean_text(text):
        text = text.lower()
        text = re.sub(r'\d+', '', text)
        text = text.translate(str.maketrans('', '', string.punctuation))
        return ' '.join(word for word in text.split() if word not in stop_words)
    
    df = pd.DataFrame(data)
    df['description'] = df['description'].fillna("").apply(clean_text)

    X_text = tfidf.transform(df['description'])

    df['type_Movie'] = (df['type'] == 'Movie').astype(np.float32)
    df['type_TV Show'] = (df['type'] == 'TV Show').astype(np.float32)
    df['release_year'] = pd.to_numeric(df['release_year'], errors='coerce').fillna(0).astype(np.float32)

    X_other = df[['release_year', 'type_Movie', 'type_TV Show']].values
    return hstack([X_text, X_other])

In [10]:
def netflix_main_predictor(model, transformed_data):
    return model.predict(transformed_data)

In [11]:
logreg.mainPredictor = partial(netflix_main_predictor, logreg)
rf.mainPredictor = partial(netflix_main_predictor, rf)
xgb.mainPredictor = partial(netflix_main_predictor, xgb)

logreg.transformer = netflix_transformer
rf.transformer = netflix_transformer
xgb.transformer = netflix_transformer

In [24]:
def register_model(model, model_name, score):
    metadata = {
        "modelName": model_name,
        "version": "1.0.1",
        "score": float(score),
        "library": model.__class__.__name__,
        "sampleData": {
            "dataForTransfer": [{
                "description": "A young man battles dark forces in a magical realm.",
                "type": "Movie",
                "release_year": 2020
            }]
        }
    }
    registerAJrjModel(model, metadata)

In [25]:
register_model(logreg, "group5_logreg", 0.78)
register_model(rf, "group5_rf", 0.82)
register_model(xgb, "group5_xgb", 0.84)

✅ Uploaded encrypted ZIP to s3://273-g5/group5_logreg__1.0.1.pkl.zip
✅ Uploaded encrypted ZIP to s3://273-g5/group5_rf__1.0.1.pkl.zip
✅ Uploaded encrypted ZIP to s3://273-g5/group5_xgb__1.0.1.pkl.zip
