In [4]:
from src.data_preprocessing import *
from src.models import *
from sklearn.model_selection import GridSearchCV

df = load_data('../data/jobfair_train.csv')

columns_to_normalize = [
    'days_active_last_28_days', 'session_count_last_28_days',
    'avg_stars_top_11_players', 'tokens_spent_last_28_days',]

columns_to_drop = [
    'morale_boosters_stash', 'rests_stash', 'tokens_stash', 'tokens_spent_last_28_days',
    'avg_training_factor_top_11_players', 'avg_age_top_11_players',
    'league_match_watched_count_last_28_days', 'global_competition_level',
    'avg_stars_top_14_players', 'days_active_last_28_days', 'session_count_last_28_days',
    'playtime_last_28_days', 'league_match_won_count_last_28_days', 'training_count_last_28_days',
    'avg_stars_top_11_players', 'global_competition_level', 'club_id','cohort_season','season','dynamic_payment_segment', 'registration_country', 'registration_platform_specific'
]

df = preprocess_data(df, columns_to_normalize, columns_to_drop)

In [6]:
X_train, X_test, y_train, y_test = league_test_split(df, test_size=0.2, random_state=42)

rf = RandomForestClassifier()
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_features': ['auto', 'sqrt'],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)
grid_search.fit(X_train, y_train)
best_parameters = grid_search.best_params_
print(f"Best params: {best_parameters}")

Fitting 5 folds for each of 216 candidates, totalling 1080 fits
Najbolji parametri: {'max_depth': 10, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 10, 'n_estimators': 300}


In [7]:
mlp = MLPClassifier(max_iter=1000, random_state=42)

param_grid = {
    'hidden_layer_sizes': [(50,), (100,), (100, 50), (50, 50)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant', 'adaptive'],
}

grid_search = GridSearchCV(estimator=mlp, param_grid=param_grid, cv=5, n_jobs=-1, verbose=2)
grid_search.fit(scale(X_train), y_train)

best_parameters = grid_search.best_params_
print(f"Best params: {best_parameters}")


Fitting 5 folds for each of 64 candidates, totalling 320 fits
Najbolji parametri: {'activation': 'relu', 'alpha': 0.05, 'hidden_layer_sizes': (50, 50), 'learning_rate': 'constant', 'solver': 'adam'}
