Exp-04 (Navies Bayes Algorithm)

In [None]:
import pandas as pd

def calculate_probability(df, class_label):
    class_count = df['Class'].value_counts().get(class_label, 0)
    total_count = len(df)
    probability = class_count / total_count
    print(f"Probability of '{class_label}': {probability:.4f}")
    return probability

def calculate_conditional_probability(df, feature, value, class_label):
    # Convert boolean to string if necessary
    if isinstance(value, bool):
        value = str(value)

    # Handle case where feature values might be boolean
    if df[feature].dtype == bool:
        value = value.lower() == 'true'  # Convert string 'true'/'false' to boolean

    # Filter rows and calculate probability
    matching_rows = df[(df[feature].astype(str).str.strip().str.lower() == str(value).lower()) & (df['Class'] == class_label)]
    matching_count = len(matching_rows)
    class_count = df['Class'].value_counts().get(class_label, 0)
    probability = matching_count / class_count if class_count > 0 else 0
    print(f"Conditional probability of '{feature}' = '{value}' given '{class_label}': {probability:.4f}")
    return probability

def predict_class(df, outlook, temperature, humidity, windy):
    print("\nCalculating probabilities for prediction...")

    # Overall probabilities
    play_probability = calculate_probability(df, "Play")
    noplay_probability = calculate_probability(df, "No Play")

    # Conditional probabilities for 'Play'
    i1_playprobability = calculate_conditional_probability(df, 'Outlook', outlook, "Play")
    i2_playprobability = calculate_conditional_probability(df, 'Temperature', temperature, "Play")
    i3_playprobability = calculate_conditional_probability(df, 'Humidity', humidity, "Play")
    i4_playprobability = calculate_conditional_probability(df, 'Windy', windy, "Play")

    # Conditional probabilities for 'No Play'
    i1_noplayprobability = calculate_conditional_probability(df, 'Outlook', outlook, "No Play")
    i2_noplayprobability = calculate_conditional_probability(df, 'Temperature', temperature, "No Play")
    i3_noplayprobability = calculate_conditional_probability(df, 'Humidity', humidity, "No Play")
    i4_noplayprobability = calculate_conditional_probability(df, 'Windy', windy, "No Play")

    # Final probabilities
    final_playprobability = play_probability * i1_playprobability * i2_playprobability * i3_playprobability * i4_playprobability
    final_noplayprobability = noplay_probability * i1_noplayprobability * i2_noplayprobability * i3_noplayprobability * i4_noplayprobability

    print(f"Final probability of 'Play': {final_playprobability:.4f}")
    print(f"Final probability of 'No Play': {final_noplayprobability:.4f}")

    return "Play" if final_playprobability > final_noplayprobability else "No Play"

def main():
    df_train = pd.read_csv("navie_weather.csv")
    df_test = pd.read_csv("navie_test.csv")

    correct_predictions = 0

    # Iterate over each row in the test DataFrame
    for index, row in df_test.iterrows():
        outlook = row['Outlook']
        temperature = row['Temperature']
        humidity = row['Humidity']
        windy = row['Windy']
        actual_class = row['Class']

        # Predict class
        predicted_class = predict_class(df_train, outlook, temperature, humidity, windy)
        # Store predicted class in DataFrame
        df_test.at[index, 'Predicted Class'] = predicted_class

        # Compare and count correct predictions
        if predicted_class == actual_class:
            correct_predictions += 1

    # Calculate accuracy
    accuracy = (correct_predictions / len(df_test)) * 100
    print(f"Accuracy: {accuracy:.2f}%")

    df_test.to_csv("test_with_predictions.csv", index=False)

if __name__ == "__main__":
    main()
