In [5]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import yaml
import joblib
import os

seed=456

In [11]:
##  Extracting data
with open("../config.yaml", "r") as f:
    config = yaml.safe_load(f)

model_path = config["model"]["path"]
output_path = config["output"]["path"]

train_df = pd.read_csv(config["data"]["processed_train_path"])
test_df = pd.read_csv(config["data"]["processed_test_path"])

# Define our features (X) and target (y)
features = [col for col in train_df.columns if col not in ['battle_id', 'player_won']]
X_train = train_df[features]
y_train = train_df['player_won']
#print(features)

# Scaling all the data
scaler=StandardScaler()
scaler.fit(X_train)
X_train=scaler.transform(X_train)
X_test = scaler.transform(test_df[features])



## Logistic Regression

In [None]:
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

model = LogisticRegression(random_state=seed)

param_grid = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100],
    'penalty': ['l2'],
    'solver': ['liblinear', 'saga'],
    'max_iter': [5000]
}

grid_search = GridSearchCV(
    estimator=model,
    param_grid=param_grid,
    cv=5,
    scoring='accuracy',
    n_jobs=-1,
    verbose=1
)

grid_search.fit(X_train, y_train)

print("Migliori iperparametri trovati:", grid_search.best_params_)
print("Best accuracy in CV:", grid_search.best_score_)


best_model = grid_search.best_estimator_

Fitting 5 folds for each of 24 candidates, totalling 120 fits
Migliori iperparametri trovati: {'C': 1, 'max_iter': 5000, 'penalty': 'l2', 'solver': 'saga'}
Best accuracy in CV: 0.8079000000000001


In [14]:
grid_search.cv_results_

{'mean_fit_time': array([ 0.10009313,  0.2866117 ,  0.32669444,  0.23551583,  0.11729546,
         0.23829184,  0.52937465,  0.39999232,  0.39947991,  1.25194774,
         1.19591441,  1.77001247,  2.05672684,  6.85165401,  2.0542717 ,
        11.50962968, 12.97672682, 38.16678443,  3.29838204, 27.45628562,
        13.29325953, 38.28513236,  3.48845105, 28.26088648]),
 'std_fit_time': array([5.48326580e-03, 3.23818355e-02, 1.44077874e-02, 2.72147685e-02,
        1.72481715e-02, 1.68609530e-02, 5.17593111e-02, 4.41244680e-02,
        5.65682963e-02, 5.00921723e-01, 2.14141507e-01, 3.10297151e-01,
        4.55768750e-01, 2.11597966e+00, 3.31101746e-01, 2.24730538e+00,
        2.38293220e+00, 9.80519663e+00, 3.58559636e-01, 5.83419240e+00,
        2.31159763e+00, 1.02811231e+01, 5.01850767e-01, 5.93260563e+00]),
 'mean_score_time': array([0.00297823, 0.00521736, 0.00312743, 0.00291934, 0.00307584,
        0.00302582, 0.00317979, 0.00372305, 0.00752645, 0.00314708,
        0.0043437 , 0.00

In [8]:
# Testing the prediction on the test set

print("Generating predictions on the test set...")
test_predictions = best_model.predict(X_test)

Generating predictions on the test set...


In [9]:

# Create the submission DataFrame
submission_df = pd.DataFrame({
    'battle_id': test_df['battle_id'],
    'player_won': test_predictions
})
# Save the DataFrame to a .csv file
submission_df.to_csv(output_path, index=False)

print("\n'submission.csv' file created successfully!")


'submission.csv' file created successfully!
