In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

# Algorithm
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

import warnings
warnings.filterwarnings ("ignore")


In [2]:
df = pd.read_excel("weather.xlsx")
df

Unnamed: 0,outlook,temperature,humidity,windy,play
0,overcast,mild,high,0,yes
1,sunny,cool,high,1,yes
2,sunny,hot,high,1,yes
3,rainy,hot,high,0,yes
4,sunny,mild,high,0,no
5,overcast,mild,normal,0,yes
6,overcast,hot,high,1,yes
7,rainy,cool,normal,0,yes
8,rainy,mild,high,1,no
9,overcast,cool,normal,1,no


In [3]:
df1 =df.dropna()
df1

Unnamed: 0,outlook,temperature,humidity,windy,play
0,overcast,mild,high,0,yes
1,sunny,cool,high,1,yes
2,sunny,hot,high,1,yes
3,rainy,hot,high,0,yes
4,sunny,mild,high,0,no
5,overcast,mild,normal,0,yes
6,overcast,hot,high,1,yes
7,rainy,cool,normal,0,yes
8,rainy,mild,high,1,no
9,overcast,cool,normal,1,no


In [4]:
df1["outlook"].replace({"overcast":1,"rainy": 2, "sunny":3}, inplace=True)
df1["temperature"].replace({"mild": 0, "hot":1,"cool": 2}, inplace=True)
df1["humidity"].replace({"high": 0, "normal":1}, inplace=True)
df1["play"].replace({"yes":1,"no":0}, inplace=True)

In [5]:
x=df1.drop('play', axis=1) 
y=df1["play"]

x_train, x_test, y_train, y_test=train_test_split(x,y, test_size=0.2, random_state=11, stratify=y)


In [6]:
#guassianNB
gnb_model = GaussianNB()
gnb_model.fit(x_train, y_train)

In [7]:

y_pred=gnb_model.predict(x_test)

cnf_matrix = confusion_matrix(y_pred, y_test) 
print("Confusion Matrix:\n",cnf_matrix) 
print("-"*60)

accuracy = accuracy_score(y_pred, y_test) 
print("Accuracy:", accuracy) 
print("-"*60)

clf_report = classification_report(y_pred, y_test) 
print("Classification Report:\n", clf_report)

Confusion Matrix:
 [[0 0]
 [1 2]]
------------------------------------------------------------
Accuracy: 0.6666666666666666
------------------------------------------------------------
Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       1.00      0.67      0.80         3

    accuracy                           0.67         3
   macro avg       0.50      0.33      0.40         3
weighted avg       1.00      0.67      0.80         3



In [8]:
# Training Model Evaluation
y_pred_train = gnb_model.predict(x_train)

cnf_matrix = confusion_matrix(y_pred_train, y_train) 
print("Confusion Matrix:\n",cnf_matrix) 
print("-"*60)

accuracy = accuracy_score(y_pred_train, y_train) 
print("Accuracy:", accuracy)
print("-"*60)

clf_report = classification_report(y_pred_train, y_train) 
print("Classification Report:\n",clf_report)


Confusion Matrix:
 [[3 1]
 [1 6]]
------------------------------------------------------------
Accuracy: 0.8181818181818182
------------------------------------------------------------
Classification Report:
               precision    recall  f1-score   support

           0       0.75      0.75      0.75         4
           1       0.86      0.86      0.86         7

    accuracy                           0.82        11
   macro avg       0.80      0.80      0.80        11
weighted avg       0.82      0.82      0.82        11



In [9]:
column_names=x.columns
column_names

Index(['outlook', 'temperature', 'humidity', 'windy'], dtype='object')

In [10]:
outlook_value={"overcast": 1, "rainy": 2, "sunny":3}
temperature_value={"mild": 0, "hot": 1, "cool":2} 
humidity_value={"high":0,"normal":1}

In [11]:
json_data={"outlook": outlook_value, 
           "temperature":temperature_value, 
           "humidity": humidity_value, 
           "columns":list(column_names)}
json_data

{'outlook': {'overcast': 1, 'rainy': 2, 'sunny': 3},
 'temperature': {'mild': 0, 'hot': 1, 'cool': 2},
 'humidity': {'high': 0, 'normal': 1},
 'columns': ['outlook', 'temperature', 'humidity', 'windy']}

In [12]:
import json 
with open("Project_data_NB.json", "w") as f:
    json.dump(json_data, f)

In [13]:
outlook="rainy" 
temperature="mild" 
humidity="high" 
windy=1.0


In [14]:
test_array = np.zeros(len(column_names))
test_array[0] = json_data['outlook'] [outlook] 
test_array[1] = json_data['temperature'] [temperature] 
test_array[2] = json_data['humidity'] [humidity]
test_array[3] = windy

test_array


array([2., 0., 0., 1.])

In [15]:
play=gnb_model.predict([test_array])[0]

if play==1:
      print("yes, Play is happening") 
else:
    print("No, Play is not happening")

yes, Play is happening
