In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier

In [2]:
train = pd.read_csv("iot_devices_train.csv")
test = pd.read_csv("iot_devices_test.csv")
df = pd.concat([train, test])
df.columns

Index(['ack', 'ack_A', 'ack_B', 'bytes', 'bytes_A', 'bytes_A_B_ratio',
       'bytes_B', 'ds_field_A', 'ds_field_B', 'duration',
       ...
       'suffix_is_co.il', 'suffix_is_com', 'suffix_is_com.sg',
       'suffix_is_else', 'suffix_is_empty_char_value',
       'suffix_is_googleapis.com', 'suffix_is_net', 'suffix_is_org',
       'suffix_is_unresolved', 'device_category'],
      dtype='object', length=298)

In [3]:
X = df.drop(['device_category'], axis=1).values
y = df['device_category'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4242)

In [6]:
le = LabelEncoder()
le.fit(df["device_category"].unique())
y_train_encoded = le.transform(y_train)
y_test_encoded = le.transform(y_test)

In [7]:
xgb_model = XGBClassifier().fit(X_train, y_train_encoded)

# Tahmin

In [8]:
y_pred = xgb_model.predict(X_test)

In [12]:
np.sqrt(mean_squared_error(y_test_encoded, y_pred))

1.7703701664668294

# Model Tuning

In [None]:
xgb_params = {'n_estimators': [100, 250, 500, 2100],
              'subsample': [0.8, 0.9, 1.0],
              'max_depth': [3, 4, 5, 6],
              'learning_rate': [0.01, 0.1, 0.2],
              'min_samples_split': [3, 7, 10]}
xgb_model = XGBClassifier()
xgb_cv_model = GridSearchCV(xgb_model, xgb_params, cv=10, n_jobs=-1).fit(X_train, y_train)

In [None]:
xgb_tuned = RandomForestClassifier(max_depth=9, max_features=20, n_estimators=100).fit(X_train, y_train)
y_pred = xgb_tuned.predict(X_test)
np.sqrt(mean_squared_error(y_test_encoded, y_pred))