In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from imblearn.ensemble import BalancedRandomForestClassifier

# Load the dataset
data = pd.read_csv('train.csv')

# Split the dataset into features and the target variable
X = data.drop(['ID_FIRM','BANKR'], axis=1)
y = data['BANKR']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Use BalancedRandomForestClassifier instead of RandomForestClassifier
clf = BalancedRandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train_scaled, y_train)

y_pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


test = pd.read_csv('test.csv')
X_test = test.drop(['ID_FIRM'], axis=1)
X_test_scaled = scaler.transform(X_test)
y_pred = clf.predict(X_test_scaled)
out_data = pd.DataFrame({'ID_FIRM': test['ID_FIRM'], 'BANKR': y_pred})
out_data.to_csv('result_ML.csv', index=False)


In [None]:
# Hyperparameter Tuning
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid_search = GridSearchCV(BalancedRandomForestClassifier(random_state=42), param_grid, cv=StratifiedKFold(n_splits=5), scoring='f1')
grid_search.fit(X_train, y_train)

# Best parameters
best_params = grid_search.best_params_

# Train the model with best parameters
model = BalancedRandomForestClassifier(**best_params, random_state=42)
model.fit(X_train, y_train)

Number of null values in train_df is: 0


Unnamed: 0,ID_FIRM,P1110_B,P1110_E,P1120_B,P1120_E,P1130_B,P1130_E,P1140_B,P1140_E,P1150_B,...,P2421_E,P2430_B,P2430_E,P2450_B,P2450_E,P2460_B,P2460_E,P2400_B,P2400_E,BANKR
0,1,8,7,0,0,0,0,0,0,402,...,161,0,0,0,0,0,0,1561,621,0
1,2,0,0,3318,3318,0,0,0,0,208809,...,0,0,0,0,0,0,8551,-15296,-16123,0
2,3,0,0,0,0,0,0,0,0,237,...,0,0,0,0,0,6,0,-2166,-1375,0
3,4,0,0,0,0,0,0,0,0,15428,...,-55,0,22,0,4,0,0,-3390,502,0
4,5,0,0,0,0,0,0,0,0,340249,...,-895,0,11,0,0,105,0,26131,6100,0
