In [2]:
import pickle
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder
from imblearn.over_sampling import SMOTE

# Load data and save indices of columns
df = pd.read_csv("data.csv")
features = df.drop(columns=['Label'])
pickle.dump(features, open('features.pickle', 'wb'))

# Fit and save an OneHotEncoder
columns_to_fit = ['live_province', 'live_city', 'bank']
enc = OneHotEncoder(sparse=False).fit(df.loc[:, columns_to_fit])
pickle.dump(enc, open('encoder.pickle', 'wb'))

# Transform variables, merge with existing df and keep column names
column_names = enc.get_feature_names(columns_to_fit)
encoded_variables = pd.DataFrame(enc.transform(df.loc[:, columns_to_fit]), columns=column_names)
df = df.drop(columns_to_fit, 1)
df = pd.concat([df, encoded_variables], axis=1)
    
# Resampling data
y = df.Label
X = df.drop('Label', axis=1)
sm = SMOTE(random_state=27, sampling_strategy=1.0)
X, y = sm.fit_sample(X, y)


# Fit and save model
clf = RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                                              class_weight=None,
                                              criterion='gini', max_depth=None,
                                              max_features='auto',
                                              max_leaf_nodes=None,
                                              max_samples=None,
                                              min_impurity_decrease=0.0,
                                              min_impurity_split=None,
                                              min_samples_leaf=1,
                                              min_samples_split=2,
                                              min_weight_fraction_leaf=0.0,
                                              n_estimators=100, n_jobs=None,
                                              oob_score=False,
                                              random_state=None, verbose=0,
                                              warm_start=False).fit(X, y)
pickle.dump(clf, open('model.pickle', 'wb'))

ModuleNotFoundError: No module named 'imblearn'