In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>div.container { width:100% !important; }</style>"))

In [2]:
from sklearn.datasets import make_classification
X,y = make_classification(n_samples=10000, n_features=10, n_informative=5,
                          n_redundant=0, n_clusters_per_class=1, 
                          n_classes=2, weights=[0.99, 0.01], 
                          random_state=42)

In [3]:
from sklearn.model_selection import train_test_split
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.3, 
                                             stratify=y, random_state=42)

In [4]:
from imblearn.over_sampling import SMOTE
sm = SMOTE()
resampled_X, resampled_y = sm.fit_resample(train_X, train_y)

In [5]:
from sklearn.ensemble import RandomForestClassifier
rf_model = RandomForestClassifier(n_estimators=100, max_features=2, 
                                  random_state=42)
rf_model.fit(resampled_X, resampled_y)

RandomForestClassifier(max_features=2, random_state=42)

In [6]:
y_pred_rfc = rf_model.predict(test_X)
from sklearn.metrics import confusion_matrix
confusion_matrix(test_y, y_pred_rfc)

array([[2953,    5],
       [  27,   15]], dtype=int64)

In [7]:
# from sklearn.metrics import confusion_matrix
# def get_f1score(y, y_pred):
#     cm = confusion_matrix(y, y_pred)
#     precision = cm[1,1] / (cm[0,1]+cm[1,1])
#     recall = cm[1,1] / (cm[1,0]+cm[1,1])
#     f_measure = (2*precision*recall) / (precision+recall)
#     return f_measure

In [8]:
# get_f1score(test_y, y_pred_rfc)

In [9]:
from sklearn.metrics import confusion_matrix
def model_measure(model, train_X, train_y, test_X, test_y) :
    model.fit(train_X, train_y)
    y_pred = model.predict(test_X)
    cm = confusion_matrix(test_y, y_pred)
    precision = cm[1,1] / (cm[0,1]+cm[1,1])
    recall = cm[1,1] / (cm[1,0]+cm[1,1])
    f1score = (2*precision*recall) / (precision+recall)
    print(f"precision: {precision:.6f}, recall: {recall:.6f}, f1score: {f1score:.6f}")
#     return precision, recall, f_measure

In [10]:
from sklearn.ensemble import RandomForestClassifier
model_measure(RandomForestClassifier(n_estimators=100, max_features=2, 
                                     random_state=42), 
              resampled_X, resampled_y, test_X, test_y)

precision: 0.750000, recall: 0.357143, f1score: 0.483871


In [11]:
from sklearn.svm import SVC
model_measure(SVC(random_state=42), 
              resampled_X, resampled_y, test_X, test_y)

precision: 0.221154, recall: 0.547619, f1score: 0.315068


In [12]:
from sklearn.neural_network import MLPClassifier
model_measure(MLPClassifier(hidden_layer_sizes=(50,), max_iter=500),
              resampled_X, resampled_y, test_X, test_y)

precision: 0.571429, recall: 0.571429, f1score: 0.571429


In [13]:
from xgboost import XGBClassifier  # pip install xgboost
model_measure(XGBClassifier(max_depth=10, n_estimators=100, 
                            learning_rate=0.01, use_label_encoder=False),
              resampled_X, resampled_y, test_X, test_y)

precision: 0.203883, recall: 0.500000, f1score: 0.289655


In [14]:
from lightgbm import LGBMClassifier #  # pip install lightgbm
model_measure(LGBMClassifier(n_estimators=100), 
              resampled_X, resampled_y, test_X, test_y)

precision: 0.552632, recall: 0.500000, f1score: 0.525000
