In [13]:
import pandas as pd

# Load datasets with corrected paths using raw string
cleaned_hero_data = pd.read_csv(r'D:/mobile_legends_prediction/data/cleaned_hero_data.csv')
corrected_heroes_cleaned = pd.read_csv(r'D:/mobile_legends_prediction/data/corrected_heroes_cleaned.csv')

# Ensure the 'Match Result' column exists
if 'Match Result' not in cleaned_hero_data.columns:
    print("Error: 'Match Result' column not found in cleaned_hero_data")
else:
    # Convert specific columns to numeric values
    def convert_to_numeric(df, columns):
        for column in columns:
            df[column] = pd.to_numeric(df[column], errors='coerce')
        return df

    # Convert columns to numeric
    columns_to_convert = ['Winrate', 'Durability', 'Offense', 'Ability Effects', 'Difficulity']
    corrected_heroes_cleaned = convert_to_numeric(corrected_heroes_cleaned, columns_to_convert)

    # Calculate team features
    def calculate_team_features(row):
        features = {'Winrate': 0, 'Durability': 0, 'Offense': 0, 'Ability Effects': 0, 'Difficulity': 0}
        heroes = ['Hero_1', 'Hero_2', 'Hero_3', 'Hero_4', 'Hero_5']
        valid_heroes_count = 0
        for hero in heroes:
            hero_name = row[hero]
            hero_data = corrected_heroes_cleaned[corrected_heroes_cleaned['Hero'] == hero_name]
            if not hero_data.empty:
                hero_data = hero_data.iloc[0]
                valid_heroes_count += 1
                for feature in features:
                    if pd.notnull(hero_data[feature]):
                        features[feature] += hero_data[feature]
        if valid_heroes_count > 0:
            for feature in features:
                features[feature] /= valid_heroes_count
        return pd.Series(features)

    team_features = cleaned_hero_data.apply(calculate_team_features, axis=1)

    # Prepare data for model training
    X = team_features
    y = cleaned_hero_data['Match Result'].apply(lambda x: 1 if x == 'Victory' else 0)

    # Split data into training and test sets
    from sklearn.naive_bayes import GaussianNB
    from sklearn.model_selection import train_test_split

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train the Naive Bayes model
    model = GaussianNB()
    model.fit(X_train, y_train)

    # Evaluate model accuracy
    accuracy = model.score(X_test, y_test)
    print(f'Model Accuracy: {accuracy * 100:.2f}%')


Model Accuracy: 63.88%


In [14]:
import joblib

# Save the trained model
joblib.dump(model, 'naive_bayes_model.pkl')


['naive_bayes_model.pkl']