In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import mean_squared_error

In [5]:
df = pd.read_csv("data_banknote_authentication.txt")
df.columns

Index(['3.6216', '8.6661', '-2.8073', '-0.44699', '0'], dtype='object')

In [7]:
df.rename(columns = {'3.6216':'para1', '8.6661':'para2', '-2.8073':'para3', '-0.44699':'para4', '0':'isCounterfeit'}, inplace = True)

In [8]:
df.columns

Index(['para1', 'para2', 'para3', 'para4', 'isCounterfeit'], dtype='object')

In [9]:
X = df.drop(['isCounterfeit'], axis=1).values
y = df['isCounterfeit'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4242)

In [10]:
rf_model = RandomForestClassifier().fit(X_train, y_train)

# Tahmin 

In [11]:
y_pred = rf_model.predict(X_test)
np.sqrt(mean_squared_error(y_test, y_pred))

0.12060453783110545

# Model Tuning

In [15]:
rf_params = {'max_depth': list(range(1, 10)), 'max_features': [1, 2, 3], 'n_estimators': [100, 200, 300]}
rf_cv_model = GridSearchCV(rf_model, rf_params, cv=10, n_jobs=-1).fit(X_train, y_train)
rf_cv_model.best_params_

{'max_depth': 6, 'max_features': 3, 'n_estimators': 100}

In [16]:
rf_tuned = RandomForestClassifier(max_depth=6, max_features=3, n_estimators=100).fit(X_train, y_train)
y_pred = rf_tuned.predict(X_test)
np.sqrt(mean_squared_error(y_test, y_pred))

0.13483997249264842