In [1]:
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
import pandas as pd

In [2]:
train_df = pd.read_csv('./gdz-elektrik-datathon-2024/train.csv')
test_df = pd.read_csv('./gdz-elektrik-datathon-2024/test.csv')

In [3]:
label_encoder = LabelEncoder()
X_train = train_df.drop(['tarih', 'bildirimsiz_sum', 'bildirimli_sum'], axis=1)
X_train['ilce'] = label_encoder.fit_transform(X_train['ilce'])
y_train = train_df['tarih']

X_test = test_df.drop(['tarih', 'bildirimli_sum'], axis=1)
X_test['ilce'] = label_encoder.transform(X_test['ilce'])
y_test = test_df['tarih']

In [4]:
model = RandomForestClassifier()

param_grid = {
    'n_estimators': [150, 165, 180],
    'max_depth': [None, 1, 2, 3],
    'min_samples_split': [3, 4, 5],
    'min_samples_leaf': [2, 3, 4]
}
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5)

grid_search.fit(X_train, y_train)

print("En iyi hiperparametreler:", grid_search.best_params_)
print("En iyi doğruluk skoru:", grid_search.best_score_)

best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Test verileri üzerinde doğruluk skoru:", accuracy)

En iyi hiperparametreler: {'max_depth': 1, 'min_samples_leaf': 3, 'min_samples_split': 3, 'n_estimators': 150}
En iyi doğruluk skoru: 0.0007684578657389568
Test verileri üzerinde doğruluk skoru: 0.0
