In [18]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import yaml
import joblib
import os

seed=456

In [19]:
##  Extracting data
with open("../config.yaml", "r") as f:
    config = yaml.safe_load(f)

model_path = config["model"]["path"]
output_path = config["output"]["path"]

train_df = pd.read_csv(config["data"]["processed_train_path"])
test_df = pd.read_csv(config["data"]["processed_test_path"])

# Define our features (X) and target (y)
features = [col for col in train_df.columns if col not in ['battle_id', 'player_won']]
X_train = train_df[features]
y_train = train_df['player_won']
#print(features)

## Scaling all the data
scaler=StandardScaler()
scaler.fit(X_train)
X_train=scaler.transform(X_train)

#dividing into vali and train
X_train, X_vali, y_train, y_vali = train_test_split(
    X_train, y_train, test_size=0.3, shuffle=True, random_state=seed
)
X_test = scaler.transform(test_df[features])



In [20]:
# Initialize and train the model
print("Training a simple Logistic Regression model...")
model = LogisticRegression(random_state=seed, max_iter=1000000)
model.fit(X_train, y_train)
print("Model training complete.")

joblib.dump(model, os.path.join(model_path, "logreg.pk1"))


Training a simple Logistic Regression model...
Model training complete.


['../models\\logreg.pk1']

In [21]:
### Tests on validation
print("Generating predictions on the validation set...")
vali_predict = model.predict(X_vali)
accuracy=accuracy_score(vali_predict, y_vali)
print(accuracy)

Generating predictions on the validation set...
0.7763333333333333


In [22]:
# Testing the prediction on the test set

print("Generating predictions on the test set...")
test_predictions = model.predict(X_test)

Generating predictions on the test set...


In [23]:

# Create the submission DataFrame
submission_df = pd.DataFrame({
    'battle_id': test_df['battle_id'],
    'player_won': test_predictions
})
# Save the DataFrame to a .csv file
submission_df.to_csv(output_path, index=False)

print("\n'submission.csv' file created successfully!")


'submission.csv' file created successfully!
