In [1]:
# Import the required libraries
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split

# Sample dataset
data = {
    'Weather': ['Sunny', 'Sunny', 'Overcast', 'Rainy', 'Rainy', 'Rainy', 'Overcast', 'Sunny', 'Sunny', 'Rainy'],
    'Temperature': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Mild', 'Mild', 'Hot', 'Mild'],
    'Play': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes']
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Display the dataset
print("Original Dataset:")
print(df)

# Perform label encoding on categorical columns
le = preprocessing.LabelEncoder()

# Encode categorical features
df['Weather'] = le.fit_transform(df['Weather'])
df['Temperature'] = le.fit_transform(df['Temperature'])
df['Play'] = le.fit_transform(df['Play'])

# Display the encoded dataset
print("\nEncoded Dataset:")
print(df)

# Assign features and label variables
features = df[['Weather', 'Temperature']]
labels = df['Play']

# Split dataset into training and testing sets
features_train, features_test, label_train, label_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Generate a model using Naive Bayes classifier (GaussianNB)
model = GaussianNB()

# Fit the dataset on classifier
model.fit(features_train, label_train)

# Perform prediction
predicted = model.predict(features_test)

# Print prediction
print("\nPredictions:", predicted)

# Confusion Matrix
conf_mat = confusion_matrix(label_test, predicted)
print("\nConfusion Matrix:")
print(conf_mat)

# Accuracy
accuracy = accuracy_score(label_test, predicted)
print("Accuracy:", accuracy)

# Confusion Matrix Interpretation
TP = conf_mat[1][1]  # True Positives
FP = conf_mat[0][1]  # False Positives
TN = conf_mat[0][0]  # True Negatives
FN = conf_mat[1][0]  # False Negatives

print("\nConfusion Matrix Interpretation:")
print(f"True Positives (TP): {TP}")
print(f"False Positives (FP): {FP}")
print(f"True Negatives (TN): {TN}")
print(f"False Negatives (FN): {FN}")

# Accuracy Calculation
print("\nAccuracy Calculation:")
print(f"Accuracy = (TP + TN) / (TP + TN + FP + FN) = ({TP} + {TN}) / ({TP} + {TN} + {FP} + {FN})")
accuracy_manual = (TP + TN) / (TP + TN + FP + FN)
print(f"Calculated Accuracy: {accuracy_manual:.2f} or {accuracy_manual * 100:.2f}%")


Original Dataset:
    Weather Temperature Play
0     Sunny         Hot   No
1     Sunny         Hot   No
2  Overcast         Hot  Yes
3     Rainy        Mild  Yes
4     Rainy        Cool  Yes
5     Rainy        Cool   No
6  Overcast        Mild  Yes
7     Sunny        Mild   No
8     Sunny         Hot  Yes
9     Rainy        Mild  Yes

Encoded Dataset:
   Weather  Temperature  Play
0        2            1     0
1        2            1     0
2        0            1     1
3        1            2     1
4        1            0     1
5        1            0     0
6        0            2     1
7        2            2     0
8        2            1     1
9        1            2     1

Predictions: [0 0]

Confusion Matrix:
[[1 0]
 [1 0]]
Accuracy: 0.5

Confusion Matrix Interpretation:
True Positives (TP): 0
False Positives (FP): 0
True Negatives (TN): 1
False Negatives (FN): 1

Accuracy Calculation:
Accuracy = (TP + TN) / (TP + TN + FP + FN) = (0 + 1) / (0 + 1 + 0 + 1)
Calculated Accuracy: 0.50

In [2]:
# Import required libraries
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split

# Create the dataset
data = {
    'Age': ['youth', 'youth', 'middle_aged', 'senior', 'senior', 'senior', 'middle_aged', 'youth', 'youth', 'senior', 
            'youth', 'middle_aged', 'middle_aged', 'senior'],
    'Income': ['high', 'high', 'high', 'medium', 'low', 'low', 'low', 'medium', 'low', 'medium', 
               'medium', 'medium', 'high', 'medium'],
    'Student': ['no', 'no', 'no', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'yes', 
                'yes', 'no', 'yes', 'no'],
    'Credit_Rating': ['fair', 'excellent', 'fair', 'fair', 'fair', 'excellent', 'excellent', 'fair', 
                      'fair', 'fair', 'excellent', 'excellent', 'fair', 'excellent'],
    'Buys_Computer': ['no', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'no', 'yes', 'yes', 
                      'yes', 'yes', 'yes', 'no']
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Display the original dataset
print("Original Dataset:")
print(df)

# Encode categorical features
le = preprocessing.LabelEncoder()

# Apply label encoding to all columns (except 'Buys_Computer' because it's the target)
df['Age'] = le.fit_transform(df['Age'])
df['Income'] = le.fit_transform(df['Income'])
df['Student'] = le.fit_transform(df['Student'])
df['Credit_Rating'] = le.fit_transform(df['Credit_Rating'])
df['Buys_Computer'] = le.fit_transform(df['Buys_Computer'])

# Display the encoded dataset
print("\nEncoded Dataset:")
print(df)

# Assign features and label variables
features = df[['Age', 'Income', 'Student', 'Credit_Rating']]
labels = df['Buys_Computer']

# Split dataset into training and testing sets
features_train, features_test, label_train, label_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Create and train the Naive Bayes model
model = GaussianNB()
model.fit(features_train, label_train)

# Make predictions
predicted = model.predict(features_test)

# Print predictions
print("\nPredictions:", predicted)

# Confusion Matrix
conf_mat = confusion_matrix(label_test, predicted)
print("\nConfusion Matrix:")
print(conf_mat)

# Accuracy
accuracy = accuracy_score(label_test, predicted)
print("Accuracy:", accuracy)

# Confusion Matrix Interpretation
TP = conf_mat[1][1]  # True Positives
FP = conf_mat[0][1]  # False Positives
TN = conf_mat[0][0]  # True Negatives
FN = conf_mat[1][0]  # False Negatives

print("\nConfusion Matrix Interpretation:")
print(f"True Positives (TP): {TP}")
print(f"False Positives (FP): {FP}")
print(f"True Negatives (TN): {TN}")
print(f"False Negatives (FN): {FN}")

# Accuracy Calculation
print("\nAccuracy Calculation:")
print(f"Accuracy = (TP + TN) / (TP + TN + FP + FN) = ({TP} + {TN}) / ({TP} + {TN} + {FP} + {FN})")
accuracy_manual = (TP + TN) / (TP + TN + FP + FN)
print(f"Calculated Accuracy: {accuracy_manual:.2f} or {accuracy_manual * 100:.2f}%")


Original Dataset:
            Age  Income Student Credit_Rating Buys_Computer
0         youth    high      no          fair            no
1         youth    high      no     excellent            no
2   middle_aged    high      no          fair           yes
3        senior  medium      no          fair           yes
4        senior     low     yes          fair           yes
5        senior     low     yes     excellent            no
6   middle_aged     low     yes     excellent           yes
7         youth  medium      no          fair            no
8         youth     low     yes          fair           yes
9        senior  medium     yes          fair           yes
10        youth  medium     yes     excellent           yes
11  middle_aged  medium      no     excellent           yes
12  middle_aged    high     yes          fair           yes
13       senior  medium      no     excellent            no

Encoded Dataset:
    Age  Income  Student  Credit_Rating  Buys_Computer
0     2  