In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
import pandas as pd
import joblib
import pickle

# 1) Paths – adjust if needed
FEATURE_MAPS_PATH = "/content/drive/MyDrive/Project 1. Video Game Sales Analysis/feature_maps.pkl"
MODEL_PATH = "/content/drive/MyDrive/Project 1. Video Game Sales Analysis/lightgbm_sales_classifier.pkl"

# 2) Load feature maps
feature_maps = joblib.load(FEATURE_MAPS_PATH)

# 3) Load LightGBM model and metadata
with open(MODEL_PATH, "rb") as f:
    artifact = pickle.load(f)

model = artifact["model"]
features = artifact["features"]
label_mapping = artifact["label_mapping"]
best_threshold = artifact["best_threshold"]

# 4) Helper function to prepare input features
def prepare_features(platform, genre, publisher, feature_maps):
    row = {
        'Platform': platform,
        'Genre': genre,
        'Publisher': publisher,
        'Publisher_avg_sales': feature_maps['publisher_avg_sales_map'].get(publisher, 0),
        'Genre_avg_sales': feature_maps['genre_avg_sales_map'].get(genre, 0),
        'Platform_avg_sales': feature_maps['platform_avg_sales_map'].get(platform, 0),
        'Platform_Genre': f"{platform}_{genre}",
        'Platform_Publisher': f"{platform}_{publisher}",
        'Genre_Publisher': f"{genre}_{publisher}",
        'Publisher_rank': feature_maps['publisher_rank_map'].get(publisher, 0),
        'Genre_rank': feature_maps['genre_rank_map'].get(genre, 0),
        'Platform_rank': feature_maps['platform_rank_map'].get(platform, 0),
    }
    df = pd.DataFrame([row])
    # Convert all object columns to categorical dtype (same as in training)
    for col in df.select_dtypes(include='object').columns:
        df[col] = df[col].astype('category')
    return df[features]

# 5) Test example
test_df = prepare_features(
    platform="Wii",
    genre="Sports",
    publisher="Nintendo",
    feature_maps=feature_maps
)

print("Input row going into the model:")
print(test_df)

# 6) Make prediction using LightGBM model
prob = model.predict_proba(test_df)[:, 1][0]  # probability of being "good"
pred_label = int(prob >= best_threshold)

# Reverse label mapping: 0 → bad, 1 → good
inv_label_mapping = {v: k for k, v in label_mapping.items()}
pred_name = inv_label_mapping[pred_label]

print(f"\nPredicted probability for 'good' = {prob:.3f}")
print(f"Applied threshold = {best_threshold:.2f}")
print(f"Final prediction: {pred_name.upper()}")


Input row going into the model:
  Platform   Genre Publisher  Publisher_avg_sales  Genre_avg_sales  \
0      Wii  Sports  Nintendo             2.563836         0.568247   

   Platform_avg_sales Platform_Genre Platform_Publisher  Genre_Publisher  \
0            0.705279     Wii_Sports       Wii_Nintendo  Sports_Nintendo   

   Publisher_rank  Genre_rank  Platform_rank  
0           351.5      6004.5         4096.5  

Predicted probability for 'good' = 0.881
Applied threshold = 0.28
Final prediction: GOOD
