In [11]:
import openpyxl
import math
import random
from collections import Counter, defaultdict
from datetime import datetime
import matplotlib.pyplot as plt
from MyNaiveBayesClassifier import MyNaiveBayesClassifier

def read_excel(file_path):
    workbook = openpyxl.load_workbook(file_path)
    sheet = workbook.active
    data = []
    for row in sheet.iter_rows(values_only=True):
        data.append(list(row))
    return data[1:]  # Skip the header

def normalize_units(row, indices):
    normalized_row = []
    for i in indices:
        value = row[i]
        if value is None:
            normalized_row.append(0)  # Handle missing values by setting to 0
        elif i in range(14, 15):  # Humidity (%) Avg column
            normalized_row.append(value / 100 if value is not None else None)
        else:
            normalized_row.append(value)
    return normalized_row

def load_filtered_dataset(file_path):
    data = read_excel(file_path)
    filtered_data = []
    relevant_indices = [11, 14, 17, 20, 23, 26] + [-1]  # Avg value columns and label

    for row in data:
        if any(row[i] is None for i in relevant_indices):
            continue

        label = row[-1]
        if isinstance(label, str):
            label = label.strip().lower()
            label = 1 if label == "yes" else 0 if label == "no" else None

        if label is None or not all(isinstance(row[i], (int, float)) for i in relevant_indices[:-1]):
            continue

        normalized_row = normalize_units(row, relevant_indices[:-1])
        filtered_data.append(normalized_row + [label])


    features = [row[:-1] for row in filtered_data]
    labels = [row[-1] for row in filtered_data]
    return features, labels

def calculate_metrics(y_true, y_pred):
    true_positive = sum(1 for true, pred in zip(y_true, y_pred) if true == pred == 1)
    false_positive = sum(1 for true, pred in zip(y_true, y_pred) if true == 0 and pred == 1)
    false_negative = sum(1 for true, pred in zip(y_true, y_pred) if true == 1 and pred == 0)

    precision = true_positive / (true_positive + false_positive) if (true_positive + false_positive) > 0 else 0
    recall = true_positive / (true_positive + false_negative) if (true_positive + false_negative) > 0 else 0
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

    accuracy = sum(1 for true, pred in zip(y_true, y_pred) if true == pred) / len(y_true)
    return accuracy, precision, recall, f1_score

def run_prediction_interface():
    print("Welcome to the Naive Bayes Weather-UFO Prediction App")
    print("Enter the average weather data values below:")

    feature_names = [
        "Temperature (°F) Avg", "Dew Point (°F) Avg",
        "Humidity (%) Avg", "Wind Speed (mph) Avg", "Pressure (in) Avg", "Precipitation (in) Total"
    ]

    user_input = []
    for feature in feature_names:
        while True:
            try:
                value = float(input(f"Enter {feature}: "))
                if "Humidity" in feature:
                    value /= 100  # Normalize humidity input
                user_input.append(value)
                break
            except ValueError:
                print("Invalid input. Please enter a numerical value.")

    print("\nProcessing your input...")

    try:
        # Load dataset and train model
        file_path = 'merged_weather_ufo.xlsx'
        features, labels = load_filtered_dataset(file_path)
        X_train, y_train, _, _ = split_data(features, labels)

        nb_classifier = MyNaiveBayesClassifier()
        nb_classifier.fit(X_train, y_train)

        # Predict
        prediction = nb_classifier.predict([user_input])[0]
        result = "likely" if prediction == 1 else "unlikely"
        print(f"\nPrediction: It is {result} that a UFO sighting will occur based on the provided weather data.")

    except Exception as e:
        print(f"Error: {e}")

if __name__ == "__main__":
    run_prediction_interface()


Welcome to the Naive Bayes Weather-UFO Prediction App
Enter the average weather data values below:

Processing your input...
Error: list index out of range
