In [13]:
# Import required libraries
import pandas as pd
from sklearn import preprocessing
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split

# Load dataset from Excel file
file_path = 'Home_assign(data).xlsx'  # Replace with your actual file path
df = pd.read_excel(file_path)

print("Original Dataset:")
print(df)
# Strip any leading/trailing spaces in column names
df.columns = df.columns.str.strip()

# Encoding categorical columns using LabelEncoder
le = preprocessing.LabelEncoder()

# List of columns that are categorical and need encoding
categorical_columns = ['Gender', 'Physical Activity Level', 'Stress Level', 'Alcohol Consumption', 
                       'Smoking Habit', 'Sleep Disorder']

# Apply label encoding to the categorical columns
for column in categorical_columns:
    df[column] = le.fit_transform(df[column])

# Assign features and label variables
features = df.drop('Sleep Disorder', axis=1)
labels = df['Sleep Disorder']

# Split dataset into training and testing sets
features_train, features_test, label_train, label_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Create and train the Naive Bayes model
model = GaussianNB()
model.fit(features_train, label_train)

# Make predictions
predicted = model.predict(features_test)

# Print predictions
print("\nPredictions:", predicted)

# Confusion Matrix
conf_mat = confusion_matrix(label_test, predicted)

# Check if the confusion matrix is 2x2 (i.e., both classes are present)
if conf_mat.shape == (2, 2):
    # If both classes are present, proceed with the interpretation
    TP = conf_mat[1][1]  # True Positives
    FP = conf_mat[0][1]  # False Positives
    TN = conf_mat[0][0]  # True Negatives
    FN = conf_mat[1][0]  # False Negatives

    print("\nConfusion Matrix Interpretation:")
    print(f"True Positives (TP): {TP}")
    print(f"False Positives (FP): {FP}")
    print(f"True Negatives (TN): {TN}")
    print(f"False Negatives (FN): {FN}")

    # Accuracy Calculation
    print("\nAccuracy Calculation:")
    print(f"Accuracy = (TP + TN) / (TP + TN + FP + FN) = ({TP} + {TN}) / ({TP} + {TN} + {FP} + {FN})")
    accuracy_manual = (TP + TN) / (TP + TN + FP + FN)
    print(f"Calculated Accuracy: {accuracy_manual:.2f} or {accuracy_manual * 100:.2f}%")
else:
    print("\nConfusion Matrix has less than two classes. Here are the details:")
    print(conf_mat)

# Accuracy
accuracy = accuracy_score(label_test, predicted)
print("Accuracy:", accuracy)


Original Dataset:
   Gender  Age  Physical Activity Level  Stress Level Alcohol Consumption  \
0    Male   25                        3             2                 Low   
1  Female   30                        2             3            Moderate   
2    Male   45                        1             4                High   
3  Female   28                        3             1                 Low   
4    Male   35                        2             2            Moderate   
5  Female   40                        1             4                High   
6    Male   50                        1             3                High   
7  Female   32                        2             2                 Low   
8    Male   29                        3             2            Moderate   
9  Female   48                        1             4                High   

  Smoking Habit  Work Hours Sleep Disorder  
0            No           8            Yes  
1            No           9             No  