In [1]:
import pandas as pd
import streamlit as st
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import MultiLabelBinarizer
from importlib.resources import files
import matplotlib.pyplot as plt
import seaborn as sns
from stat386_final import viz, read, preprocess, model
from contextlib import redirect_stdout
import io
import pickle

In [2]:
def load_data(filepath):
    df = read.read_data(filepath)
    return df


def clean_data(df):
    sales_combined = preprocess.process_data(df)
    cleaned_df = preprocess.prepare_data(sales_combined)
    return sales_combined, cleaned_df


def load_model(df):
    na_mod, na_scaler = model.rf_fit(df, area='NA_Sales')
    eu_mod, eu_scaler = model.rf_fit(df, area='EU_Sales')
    jp_mod, jp_scaler = model.rf_fit(df, area='JP_Sales')
    other_mod, other_scaler = model.rf_fit(df, area='Other_Sales')
    global_mod, global_scaler = model.rf_fit(df, area='Global_Sales')
    return na_mod, na_scaler, eu_mod, eu_scaler, jp_mod, jp_scaler, other_mod, other_scaler, global_mod, global_scaler

In [4]:
filepath = './game_data.csv'
df = load_data(filepath)
sales_combined, cleaned_df = clean_data(df)
na_mod, na_scaler, eu_mod, eu_scaler, jp_mod, jp_scaler, other_mod, other_scaler, global_mod, global_scaler = load_model(df=cleaned_df)

#show something
cleaned_df

For Area: NA_Sales
Best Parameters: {'max_depth': 15, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 50}
R²: 0.3683031700027256
RMSE (log scale): 0.15796960815249547
Top 10 Feature Importances:
last_30_day_avg    0.276644
all_time_peak      0.192255
Year               0.124928
Platform_X360      0.119418
Platform_PS3       0.071990
Platform_PS2       0.036747
Platform_Wii       0.029871
Genre_Shooter      0.025555
Platform_XOne      0.024553
Platform_PC        0.020750
dtype: float64
For Area: EU_Sales
Best Parameters: {'max_depth': 15, 'min_samples_leaf': 1, 'min_samples_split': 3, 'n_estimators': 100}
R²: 0.38364081700717956
RMSE (log scale): 0.07839796928199697
Top 10 Feature Importances:
last_30_day_avg    0.253455
Platform_X360      0.203122
all_time_peak      0.182815
Platform_SAT       0.077112
Platform_PS3       0.048378
Year               0.032327
Platform_PS        0.026890
Platform_Wii       0.024358
Platform_PC        0.024172
Platform_DS        0.020848
dty

Unnamed: 0,all_time_peak,last_30_day_avg,Year,Rank,Global_Sales,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Platform_3DS,...,Genre_Fighting,Genre_Misc,Genre_Platform,Genre_Puzzle,Genre_Racing,Genre_Role-Playing,Genre_Shooter,Genre_Simulation,Genre_Sports,Genre_Strategy
0,4725,370.36,2008,1.0,15.87,9.30,4.75,0.00,1.83,0,...,0,0,0,0,0,0,1,0,0,0
1,471955,15320.19,2015,2.0,12.28,5.42,5.04,0.25,1.57,0,...,0,0,0,0,0,1,0,0,0,0
2,11836,738.31,2016,3.0,10.56,4.14,3.28,1.90,1.20,0,...,0,0,0,0,0,0,1,0,0,0
3,2653,48.72,2008,4.0,9.96,5.56,3.11,0.16,1.11,0,...,0,0,0,0,0,1,0,0,0,0
4,6444,109.63,2009,5.0,9.52,5.64,2.74,0.08,1.06,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
315,10022,78.90,2016,316.0,0.01,0.00,0.01,0.00,0.00,0,...,0,0,0,0,0,0,0,1,0,0
317,304,11.58,2001,318.0,0.01,0.01,0.00,0.00,0.00,0,...,0,0,1,0,0,0,0,0,0,0
318,238,45.84,2009,319.0,0.01,0.00,0.01,0.00,0.00,0,...,0,0,0,0,0,1,0,0,0,0
319,198,27.52,2012,320.0,0.01,0.00,0.00,0.01,0.00,0,...,0,0,0,0,0,0,0,1,0,0


In [6]:
artifacts = {
    "na_model": na_mod,
    "na_scaler": na_scaler,
    "eu_model": eu_mod,
    "eu_scaler": eu_scaler,
    "jp_model": jp_mod,
    "jp_scaler": jp_scaler,
    "other_model": other_mod,
    "other_scaler": other_scaler,
    "global_model": global_mod,
    "global_scaler": global_scaler,
}

for name, obj in artifacts.items():
    with open(f"{name}.pkl", "wb") as f:
        pickle.dump(obj, f)
    print(f"pickled {name}")

pickled na_model
pickled na_scaler
pickled eu_model
pickled eu_scaler
pickled jp_model
pickled jp_scaler
pickled other_model
pickled other_scaler
pickled global_model
pickled global_scaler
