In [None]:
# League of Legends Match Outcome Predictor
# Dataset: games.csv (171239 rows)

# 📌 Task 1: Import Required Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# For modeling
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import StandardScaler

# 📌 Task 2: Load Dataset
df = pd.read_csv('/datasets/_deepnote_work/20250731-171239/games.csv')
print("Shape:", df.shape)
print("Columns:", df.columns.tolist())

# 📌 Task 3: Visualize Winner Distribution
sns.countplot(data=df, x='winner')
plt.title('Team Win Distribution (1 = Blue, 2 = Red)')
plt.xlabel('Winning Team')
plt.ylabel('Match Count')
plt.savefig('/mnt/data/images/win_distribution.png')
plt.show()

# 📌 Task 4: Visualize Correlation with Win
plt.figure(figsize=(12, 8))
correlations = df.corr(numeric_only=True)['winner'].sort_values(ascending=False)
correlations.drop('winner').plot(kind='bar', color='skyblue')
plt.title("Feature Correlation with Winner")
plt.ylabel("Correlation")
plt.tight_layout()
plt.savefig('/mnt/data/images/feature_correlation.png')
plt.show()

# 📌 Task 5: Select Features
features = [
    'firstBlood', 'firstTower', 'firstBaron', 'firstDragon',
    'firstInhibitor', 't1_towerKills', 't1_inhibitorKills', 't1_baronKills', 't1_dragonKills',
    't2_towerKills', 't2_inhibitorKills', 't2_baronKills', 't2_dragonKills'
]
X = df[features]
y = df['winner'].apply(lambda x: 1 if x == 1 else 0)  # 1 = blue wins, 0 = red wins

# 📌 Task 6: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 📌 Task 7: Preprocess Features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 📌 Task 8: Train Logistic Regression Model
model = LogisticRegression(max_iter=1000)
model.fit(X_train_scaled, y_train)

# 📌 Task 9: Evaluate Model
y_pred = model.predict(X_test_scaled)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

# 📌 Task 10: Visualize Accuracy
plt.figure(figsize=(5, 5))
labels = ['Red Wins', 'Blue Wins']
sizes = [sum(y_pred == 0), sum(y_pred == 1)]
plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90, colors=['red', 'blue'])
plt.axis('equal')
plt.title('Predicted Win Distribution')
plt.savefig('/mnt/data/images/predicted_distribution.png')
plt.show()


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=5020ea59-6327-4a1f-a0fe-fa14b26ec575' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>