<a href="https://colab.research.google.com/github/kafitramarna/lstm-based-mobile-legend-hero-counter-predictor/blob/main/Mobile_Legend_Draft_Pick_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Library

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import re


# Import Dataset

In [None]:
df = pd.read_csv("/content/data_hero_best_counter_for.csv")
df.head()

Unnamed: 0,Hero,Pick Rate,Win Rate,Ban Rate,Best Counter For,Role,Best Lane
0,Lolita,0.69%,62.98%,6.75%,"['Granger', 'Cyclops', 'Diggie', 'Bane', 'Bruno']","['Support', 'Tank']",['roam']
1,Freya,0.99%,59.53%,57.75%,"['Faramis', 'Diggie', 'Mathilda', 'Balmond', '...",['Fighter'],['exp lane']
2,Zhask,1.94%,59.47%,63.89%,"['Lolita', 'Alice', 'Phoveus', 'Jawhead', 'Zhu...",['Mage'],['mid lane']
3,Chip,0.66%,57.62%,70.51%,"['Baxia', 'Aldous', 'Grock', 'Hayabusa', 'Vale...","['Support', 'Tank']",['roam']
4,Edith,1.52%,56.71%,2.95%,"['Alice', 'Julian', 'Belerick', 'Fredrinn', 'H...","['Tank', 'Marksman']","['exp lane', 'roam']"


# Function clean up dataset

In [None]:
def clean_array_string(array_string):
    cleaned_string = re.sub(r"[\'\"\[\]\s]+", "", array_string)
    cleaned_string = re.sub(r"\,(\s)*", ", ", cleaned_string)
    return cleaned_string

In [None]:
def percentage_to_decimal(percentage_string):
    return float(percentage_string.strip('%')) / 100

# Prepocessing data

## Clean up dataset

In [None]:
df['Pick Rate'] = df['Pick Rate'].apply(percentage_to_decimal)
df['Win Rate'] = df['Win Rate'].apply(percentage_to_decimal)
df['Ban Rate'] = df['Ban Rate'].apply(percentage_to_decimal)
df['Best Counter For'] = df['Best Counter For'].apply(clean_array_string).str.split(', ')
df['Role'] = df['Role'].apply(clean_array_string).str.split(', ')
df['Best Lane'] = df['Best Lane'].apply(clean_array_string).str.split(', ')


In [None]:
df.head()

Unnamed: 0,Hero,Pick Rate,Win Rate,Ban Rate,Best Counter For,Role,Best Lane
0,Lolita,0.0069,0.6298,0.0675,"[Granger, Cyclops, Diggie, Bane, Bruno]","[Support, Tank]",[roam]
1,Freya,0.0099,0.5953,0.5775,"[Faramis, Diggie, Mathilda, Balmond, Estes]",[Fighter],[explane]
2,Zhask,0.0194,0.5947,0.6389,"[Lolita, Alice, Phoveus, Jawhead, Zhuxin]",[Mage],[midlane]
3,Chip,0.0066,0.5762,0.7051,"[Baxia, Aldous, Grock, Hayabusa, Valentina]","[Support, Tank]",[roam]
4,Edith,0.0152,0.5671,0.0295,"[Alice, Julian, Belerick, Fredrinn, Hayabusa]","[Tank, Marksman]","[explane, roam]"


## Apply One Hot Encoding

In [None]:
best_counter_encoded = df['Best Counter For'].apply(lambda x: pd.Series(1, index=x)).fillna(0)
best_counter_encoded.columns = ['BestCounter_' + col for col in best_counter_encoded.columns]

role_encoded = df['Role'].apply(lambda x: pd.Series(1, index=x)).fillna(0)
role_encoded.columns = ['Role_' + col for col in role_encoded.columns]

best_lane_encoded = df['Best Lane'].apply(lambda x: pd.Series(1, index=x)).fillna(0)
best_lane_encoded.columns = ['BestLane_' + col for col in best_lane_encoded.columns]

df_encoded = pd.concat([df.drop(columns=['Best Counter For', 'Role', 'Best Lane']),
                        best_counter_encoded, role_encoded, best_lane_encoded], axis=1)

In [None]:
df_encoded.head(125)

Unnamed: 0,Hero,Pick Rate,Win Rate,Ban Rate,BestCounter_Granger,BestCounter_Cyclops,BestCounter_Diggie,BestCounter_Bane,BestCounter_Bruno,BestCounter_Faramis,...,Role_Tank,Role_Fighter,Role_Mage,Role_Marksman,Role_Assassin,BestLane_roam,BestLane_explane,BestLane_midlane,BestLane_goldlane,BestLane_jungler
0,Lolita,0.0069,0.6298,0.0675,1.0,1.0,1.0,1.0,1.0,0.0,...,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
1,Freya,0.0099,0.5953,0.5775,0.0,0.0,1.0,0.0,0.0,1.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
2,Zhask,0.0194,0.5947,0.6389,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,Chip,0.0066,0.5762,0.7051,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,Edith,0.0152,0.5671,0.0295,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120,Lancelot,0.0111,0.4225,0.0032,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
121,Nana,0.0051,0.4128,0.0049,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
122,Hanabi,0.0025,0.4087,0.0006,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
123,Esmeralda,0.0040,0.3925,0.0048,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0


## Pisah dataset

In [None]:
from sklearn.model_selection import train_test_split

X = df_encoded.drop(columns=['Hero', 'Win Rate', 'Pick Rate', 'Ban Rate'])
y_win_rate = df_encoded['Win Rate']
y_pick_rate = df_encoded['Pick Rate']
y_hero = df_encoded['Hero']

X_train_win, X_test_win, y_train_win, y_test_win = train_test_split(X, y_win_rate, test_size=0.3, random_state=42)
X_train_pick, X_test_pick, y_train_pick, y_test_pick = train_test_split(X, y_pick_rate, test_size=0.3, random_state=42)
X_train_hero, X_test_hero, y_train_hero, y_test_hero = train_test_split(X, y_hero, test_size=0.3, random_state=42)

#Latih Model Menggunakan Random Forest Dan LSTM

## Random Forest

In [None]:
from sklearn.ensemble import RandomForestRegressor
rf_win = RandomForestRegressor(n_estimators=10, random_state=0)
rf_pick = RandomForestRegressor(n_estimators=10, random_state=0)

rf_win.fit(X, y_win_rate)
rf_pick.fit(X, y_pick_rate)

##LSTM

In [None]:
X = X.values
X = X.reshape((X.shape[0], 1, X.shape[1]))

In [None]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y_hero)

y_one_hot = to_categorical(y_encoded, num_classes=125)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.utils import to_categorical

model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(X.shape[1], X.shape[2])))
model.add(Dense(125, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(X, y_one_hot, epochs=200, batch_size=32)

loss, accuracy = model.evaluate(X, y_one_hot)
print(f'Loss: {loss}, Accuracy: {accuracy}')

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [None]:
X_test_hero = X_test_hero.values.reshape((X_test_hero.shape[0], 1, X_test_hero.shape[1]))

In [None]:
predictions = model.predict(X_test_hero)
predicted_classes = np.argmax(predictions, axis=1)
y_classes = label_encoder.inverse_transform(predicted_classes)
predicted_labels = label_encoder.inverse_transform(predicted_classes)
print(predicted_labels)

['Joy' 'Gloo' 'Roger' 'Miya' 'Rafaela' 'Martis' 'Cecilion' 'Esmeralda'
 'Hanzo' 'Edith' 'Phoveus' 'Belerick' 'Barats' "Chang'e" 'Nolan' 'Julian'
 'Faramis' 'Thamuz' 'Popol and Kupa' 'Uranus' 'Alucard' 'Pharsa' 'Lolita'
 'Argus' 'Aurora' 'Alpha' 'Yve' 'Mathilda' 'Aldous' 'Selena' 'Cyclops'
 'Aulus' 'Khufra' 'Masha' 'Silvanna' 'Natan' 'Angela' 'Yin']


# Implementasi

In [None]:
import pandas as pd

def recommend_counters_with_lstm(enemy_heroes,lane_free,role_free,model_hero, df,top_n,has_pick):
    if has_pick is None:
        picked_heroes = []

    all_recommendations = []

    columns_to_drop = [
      'Hero',
      'Pick Rate',
      'Win Rate',
      'Ban Rate',
    ]
    df_reduced = df_encoded.drop(columns=columns_to_drop)
    df_recommendations = pd.DataFrame(columns=df_reduced.columns)

    row = {col: 0 for col in df_reduced.columns}
    for enemy_hero in enemy_heroes:
        counter_col = f'BestCounter_{enemy_hero}'
        if counter_col in row:
            row[counter_col] = 1
    for lane in lane_free:
      row[f'BestLane_{lane}'] = 1
    for role in role_free:
      row[f'Role_{role}'] = 1
    df_recommendations = np.reshape(pd.DataFrame([row], columns=df_reduced.columns).values, (1, 1, -1))

    predictions = model_hero.predict(df_recommendations)


    top_classes = np.argsort(predictions, axis=1)[:, -top_n:]
    top_probabilities = np.sort(predictions, axis=1)[:, -top_n:]

    top_labels = [label_encoder.inverse_transform(idx) for idx in top_classes]

    top_hero_by_counter = []
    for j in range(top_n):
        hero = top_labels[0][j]
        if hero not in has_pick:
            top_hero_by_counter.append({
                "Hero": hero,
                "Probability": top_probabilities[0][j]
            })

    return top_hero_by_counter


enemy_heroes = ['Natalia','Lolita','Barats','Lancelot','Hayabusa']
lane_free = ["roam", "explane", "midlane", "goldlane", "jungler"]
role_free = ["Mage", "Marksman", "Assassin", "Support", "Tank","Fighter"]
has_pick = []
top_n = 10

df_recommendations_prob = recommend_counters_with_lstm(enemy_heroes,lane_free,role_free,model,df,top_n,has_pick)



In [None]:
df_recommendations_prob

[{'Hero': 'Alice', 'Probability': 0.016204478},
 {'Hero': 'Edith', 'Probability': 0.018923158},
 {'Hero': 'Lesley', 'Probability': 0.021011049},
 {'Hero': 'Ixia', 'Probability': 0.025227353},
 {'Hero': 'Dyrroth', 'Probability': 0.027943661},
 {'Hero': 'Gatotkaca', 'Probability': 0.028170347},
 {'Hero': 'Minsitthar', 'Probability': 0.030948782},
 {'Hero': 'Minotaur', 'Probability': 0.053111967},
 {'Hero': 'Zhuxin', 'Probability': 0.07437805},
 {'Hero': 'Yin', 'Probability': 0.47351953}]

In [None]:
team_your = {
    'Heroes': ['Dyrroth', 'Zhuxin', 'Yin', 'Edith', 'Ixia'],
    'Lane': ['exp lane', 'mid lane', 'jungler', 'roam', 'gold lane'],
    'Roles': ['Fighter', 'Mage', 'Fighter', 'Tank', 'Marksman']
}

team_enemy = {
    'Heroes': ['Hayabusa', 'Barats', 'Natalia', 'Lancelot', 'Lolita'],
    'Lane': ['jungler', 'gold lane', 'roam', 'mid lane', 'exp lane'],
    'Roles': ['Assassin', 'Fighter', 'Assassin', 'Assassin', 'Support']
}

def calculate_team_win_rate(team, enemy_team):
    total_win_rate = 0
    total_heroes = len(team['Heroes'])
    counter_index = 0

    for hero in team['Heroes']:
        hero_data = df_encoded[df_encoded['Hero'] == hero]
        if hero_data.empty:
            continue

        hero_row = hero_data.iloc[0]
        base_win_rate = hero_row['Win Rate']*0.34

        # Penyesuaian berdasarkan counter hero lawan
        counter_factor = 0
        for enemy in enemy_team['Heroes']:
            if "BestCounter_" + enemy in df_encoded.columns:
                counter_factor += hero_row.get("BestCounter_" + enemy, 0) * 0.33

        lane_index = team['Heroes'].index(hero)
        hero_lane = team['Lane'][lane_index]
        lane_column = f'BestLane_{hero_lane.replace(" ", "")}'


        if lane_column in hero_row.index:
            lane_factor = hero_row[lane_column] * 0.33
        else:
            lane_factor = 0
            lane_factor = 0

        # Penyesuaian berdasarkan role (hindari tabrakan role)
        role_factors = ['Role_Support', 'Role_Tank', 'Role_Fighter', 'Role_Mage', 'Role_Marksman', 'Role_Assassin']
        role_factor = 0
        for role in role_factors:
            if hero_row[role] == 1:
                role_count = team['Roles'].count(role.split('_')[1])
                role_factor += 0.1 * (len(set(team['Roles'])) - role_count)

        # Hitung win rate yang disesuaikan
        adjusted_win_rate = base_win_rate + counter_factor + lane_factor - role_factor
        total_win_rate += adjusted_win_rate

        counter_index += counter_factor


    # Hitung rata-rata win rate tim
    avg_win_rate = total_win_rate / total_heroes

    # Hitung rata-rata counter index
    avg_counter = counter_index / total_heroes

    return min(max(avg_win_rate, 0.0), 1.0), min(max(avg_counter, 0.0), 1.0)

# Hitung win rate tim Anda
win_rate_team_your, counter_index = calculate_team_win_rate(team_your, team_enemy)
print(f"Probabilitas Win Tim Anda: {win_rate_team_your:.2%}")
print(f"Counter Index Tim Anda: {counter_index:.2%}")

Probabilitas Win Tim Anda: 56.76%
Counter Index Tim Anda: 46.20%
