In [None]:
###########################
# data libraries
###########################
import pandas as pd
import numpy as np

###########################
# plot libraries
###########################
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib import rcParams


###########################
# data generation
###########################
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder, MinMaxScaler
from tensorflow.keras.layers import Input, Dense, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

### Import and Function
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import time
from sklearn.model_selection import (
    GridSearchCV,
    StratifiedKFold,
    cross_val_score,
    train_test_split,
)

###########################
# transform classes into numbers
###########################
def categoricalToNumerical(df, cols):
  enc = OrdinalEncoder(dtype=np.int16)
  df[cols]= enc.fit_transform(df[cols])


## MAIN
train = pd.read_csv("train_with_synthetic_data.csv")
test = pd.read_csv("competition.csv")


###########################
# preparing dataset, transform classes into numbers
# and separate test and its target
###########################

categoricalToNumerical(train, ['Consumer_type', 'Consumer_number', 'Installation_zone'])

train_target = train['Consumer_type']
train.drop(columns=['Consumer_type'],inplace=True)

rfc = RandomForestClassifier(
    ccp_alpha=0.01,
    n_jobs=-1,
    n_estimators=20,
    max_depth=10,
    random_state=42,
    oob_score=True,
    max_features=None,
    bootstrap=True,
)

# Create the parameter grids
parameter_grid = {
    #"n_estimators": [10, 25, 50],
    #"max_depth": [10, 30, None],
    # "max_features": ["sqrt", "log2", None],
    #"criterion": ["gini", "entropy", "log_loss"]
    # "oob_score": [True, False]
    #'random_state': [0, 10],
    #'min_samples_leaf':[1, 4, 8],
    #'min_samples_split': [2, 6, 10],
}

# Create Stratified folds
cross_validation = StratifiedKFold(n_splits=2)
cross_validation.get_n_splits(train, train_target)

# Create the scoring dictionary
SCORING = {
    "accuracy": "accuracy",
    "balanced_accuracy": "balanced_accuracy",
    "f1": "f1_macro",
}

# Create and fit the GridSearchCV
grid_search = GridSearchCV(
    estimator=rfc,
    param_grid=parameter_grid,
    cv=cross_validation,
    verbose=3,
    scoring=SCORING,
    return_train_score=True,
    refit="balanced_accuracy",
)

grid_search.fit(train, train_target)

print("Best score: {}".format(grid_search.best_score_))
print("Best parameters: {}".format(grid_search.best_params_))

best_dtc = grid_search.best_estimator_
best_dtc

my_model = best_dtc
my_model.fit(train, train_target)
my_model.score(train, train_target)