In [10]:
import pandas as pd
import numpy as np
import pickle

from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.impute import SimpleImputer    
from sklearn.compose import ColumnTransformer
from sklearn.tree import DecisionTreeRegressor

df = pd.read_csv('/home/linux/Documents/Test_App/csv_file.csv', index_col = 'Name')
df = df.drop(['EU_Sales','NA_Sales','JP_Sales','Other_Sales','Year'], axis = 1)

X, Y = df.drop('Global_Sales', axis =1), df['Global_Sales']

# Séparation des variables numériques et catégorielles
num_vars = X.select_dtypes(exclude=['object']).columns
cat_vars = X.drop(num_vars, axis = 1).columns

# Transformateur numérique
numeric_transformer = make_pipeline(
    (SimpleImputer(strategy = 'median')), 
    (MinMaxScaler())
)

# Transformateur catégorielle
categorical_transformer = make_pipeline(
    (SimpleImputer(strategy ='most_frequent')),
    (OneHotEncoder(handle_unknown="ignore"))
)

# Combinaison des transformateur : preprocessor
preprocessor = ColumnTransformer(
    transformers = [('num', numeric_transformer, num_vars),('cat', categorical_transformer, cat_vars)]
)

# Pipeline finale
model_regression = Pipeline(
    steps = [('preprocessing',preprocessor),('regression',DecisionTreeRegressor(max_depth = 10))]
)

# Entraîner le pipeline modèle
model_regression.fit(X, Y)

print(model_regression.score(X,Y))
# Save the model as a pickle in a file
with open('reg.pkl', 'wb') as model_file:
  pickle.dump(model_regression, model_file)

X.to_csv('reg_X.csv', index = False)
Y.to_csv('reg_Y.csv', index = False)


0.8912348755047276


"# Load the model from the file\nmodel_classification_from_joblib = joblib.load('/home/linux/Documents/Test_App/models/model_regression.pkl')\n  \n# Use the loaded model to make predictions\nmodel_classification_from_joblib.predict(X_test)"