In [1]:
# importing libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
#loading the apple quality dataset
df = pd.read_csv('apple_quality.csv')
df.head(5)

Unnamed: 0,A_id,Size,Weight,Sweetness,Crunchiness,Juiciness,Ripeness,Acidity,Quality
0,0.0,-3.970049,-2.512336,5.34633,-1.012009,1.8449,0.32984,-0.491590483,good
1,1.0,-1.195217,-2.839257,3.664059,1.588232,0.853286,0.86753,-0.722809367,good
2,2.0,-0.292024,-1.351282,-1.738429,-0.342616,2.838636,-0.038033,2.621636473,bad
3,3.0,-0.657196,-2.271627,1.324874,-0.097875,3.63797,-3.413761,0.790723217,good
4,4.0,1.364217,-1.296612,-0.384658,-0.553006,3.030874,-1.303849,0.501984036,good


In [3]:
#preprocessing 
#removing null values
df.dropna(inplace=True)
df.tail(5)

Unnamed: 0,A_id,Size,Weight,Sweetness,Crunchiness,Juiciness,Ripeness,Acidity,Quality
3995,3995.0,0.059386,-1.067408,-3.714549,0.473052,1.697986,2.244055,0.137784369,bad
3996,3996.0,-0.293118,1.949253,-0.20402,-0.640196,0.024523,-1.0879,1.854235285,good
3997,3997.0,-2.634515,-2.138247,-2.440461,0.657223,2.199709,4.763859,-1.334611391,bad
3998,3998.0,-4.008004,-1.779337,2.366397,-0.200329,2.161435,0.214488,-2.229719806,good
3999,3999.0,0.27854,-1.715505,0.121217,-1.154075,1.266677,-0.776571,1.599796456,good


In [4]:
#label encoder -> categorical to numerical values
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le

In [5]:
df['Quality'] = le.fit_transform(df['Quality'])
df.head(4) # bad->0 good->1

Unnamed: 0,A_id,Size,Weight,Sweetness,Crunchiness,Juiciness,Ripeness,Acidity,Quality
0,0.0,-3.970049,-2.512336,5.34633,-1.012009,1.8449,0.32984,-0.491590483,1
1,1.0,-1.195217,-2.839257,3.664059,1.588232,0.853286,0.86753,-0.722809367,1
2,2.0,-0.292024,-1.351282,-1.738429,-0.342616,2.838636,-0.038033,2.621636473,0
3,3.0,-0.657196,-2.271627,1.324874,-0.097875,3.63797,-3.413761,0.790723217,1


In [6]:
df.columns

Index(['A_id', 'Size', 'Weight', 'Sweetness', 'Crunchiness', 'Juiciness',
       'Ripeness', 'Acidity', 'Quality'],
      dtype='object')

In [7]:
#splitting the dataset into test and training sets
from sklearn.model_selection import train_test_split
x = df[['Size', 'Weight', 'Sweetness', 'Crunchiness', 'Juiciness',
       'Ripeness', 'Acidity']]
y = df['Quality']
x_train, x_test,y_train,y_test = train_test_split(x,y,test_size=0.33,random_state=42)
print('shape of x_train: ',x_train.shape)
print('shape of x_test: ',x_test.shape)
print('shape of y_train: ',y_train.shape)
print('shape of y_test: ',y_test.shape)

shape of x_train:  (2680, 7)
shape of x_test:  (1320, 7)
shape of y_train:  (2680,)
shape of y_test:  (1320,)


In [8]:
#naive bayes
from sklearn.naive_bayes import GaussianNB
nb = GaussianNB()

In [9]:
nb.fit(x_train,y_train) #training 

In [10]:
pred_y = nb.predict(x_test) #testing
print(pred_y.shape)

(1320,)


In [11]:
pred_y

array([0, 1, 1, ..., 0, 1, 1])

In [12]:
#evaluation metrics
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
#accuracy
accuracy = accuracy_score(y_test,pred_y)
#f1_score
f1_score = f1_score(y_test,pred_y)
#precision
precision = precision_score(y_test,pred_y)
#recall
recall = recall_score(y_test,pred_y)

print('accuracy: ',accuracy)
print('f1_score: ',f1_score)
print('precision: ',precision)
print('recall: ',recall)

accuracy:  0.7454545454545455
f1_score:  0.7462235649546829
precision:  0.7553516819571865
recall:  0.7373134328358208


In [13]:
#unseen data prediction 
value = np.array([3.2,-2.1,4,-5.5,2.4,-1.1,-0.35])
print("shape of array before reshaping: ",value.shape)
#we need to reshape the array -> to convert (1,7) to (7,1) columns
value = value.reshape(1,7)
print("shape after reshaping:",value.shape)

shape of array before reshaping:  (7,)
shape after reshaping: (1, 7)


In [14]:
#predicting the value
output = nb.predict(value)
print(output)  # -> displays the data in form of list
#print(output[0])-> only number will be displayed instead of list

[1]




In [15]:
d = {0:'Bad',1:"Good"}
print(d[output[0]])

Good
