07 feb 2024

In [2]:
## import the necessary libraries
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt 
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix


In [3]:
## load the dataset
data = pd.read_csv('food_data.csv')
data.head()

Unnamed: 0,Food,Calories,Calories from Fat,Total Fat,Total Fat.1,Sodium,Sodium.1,Potassium,Potassium.1,Total Carbo-hydrate,Total Carbo-hydrate.1,Protein,Vitamin A,Vitamin C,Calcium,Iron,Food Type
0,Asparagus,20,0,0.0,0,0,0,230,7,4,1,2,10,15,2,2,Vegetables
1,Bell Pepper,25,0,0.0,0,40,2,220,6,6,2,1,4,190,2,4,Vegetables
2,Broccoli,45,0,0.5,1,80,3,460,13,8,3,4,6,220,6,6,Vegetables
3,Carrot,30,0,0.0,0,60,3,250,7,7,2,1,110,10,2,2,Vegetables
4,Cauliflower,25,0,0.0,0,30,1,270,8,5,2,2,0,100,2,2,Vegetables


In [4]:
## select the independent and dependent variable
data.index = data.iloc[:,0]
data_to_use = data.iloc[:,1:16]
data_to_target = data.iloc[:,16]

In [5]:
## data spliting
X_train, X_test, y_train, y_test = train_test_split(data_to_use, data_to_target, test_size=20, random_state=32)


In [6]:
## create our naive bayes
gaussian_model = GaussianNB()

## train my model
gaussian_model.fit(X_train, y_train)

In [7]:
## make prediction
prediction = gaussian_model.predict(X_test)


In [8]:
## get result in a dataframe
result = {'Actual_food_type': y_test, 'Predicted_food_type': prediction}
result_df = pd.DataFrame(result)
result_df

Unnamed: 0_level_0,Actual_food_type,Predicted_food_type
Food,Unnamed: 1_level_1,Unnamed: 2_level_1
Halibut,Seafood,Seafood
Mushrooms,Vegetables,Vegetables
Tuna,Seafood,Seafood
Rockfish,Seafood,Seafood
Avocado,Fruits,Seafood
Shrimp,Seafood,Seafood
"Salmon, Pink",Seafood,Seafood
Flounder/Sole,Seafood,Seafood
Lemon,Fruits,Fruits
Banana,Fruits,Fruits


In [9]:
## accuracy
print('Accuracy:', metrics.accuracy_score(y_test, prediction))

Accuracy: 0.8


In [10]:
## load the candy data
candy_data = pd.read_csv('candy-data.csv')
candy_data.head()

Unnamed: 0,competitorname,chocolate,fruity,caramel,peanutyalmondy,nougat,crispedricewafer,hard,bar,pluribus,sugarpercent,pricepercent,winpercent
0,100 Grand,1,0,1,0,0,1,0,1,0,0.732,0.86,66.971725
1,3 Musketeers,1,0,0,0,1,0,0,1,0,0.604,0.511,67.602936
2,One dime,0,0,0,0,0,0,0,0,0,0.011,0.116,32.261086
3,One quarter,0,0,0,0,0,0,0,0,0,0.011,0.511,46.116505
4,Air Heads,0,1,0,0,0,0,0,0,0,0.906,0.511,52.341465


In [11]:
X = candy_data.iloc[:, [3,12]]
y = candy_data.iloc[:, 1]

In [12]:
## split our datset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=25, random_state=0)

In [13]:
## STANDARD SCALER
scaler_x = StandardScaler()
X_train = scaler_x.fit_transform(X_train) 
X_test = scaler_x.transform(X_test)

In [14]:
## logistic regression model
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)

In [15]:
## make prediction
y_pred = log_reg.predict(X_test)

In [16]:
## get result in a dataframe
result_log = {'Actual_candy_type': y_test, 'Predicted_candy_type': y_pred}
output = pd.DataFrame(result_log)
output


Unnamed: 0,Actual_candy_type,Predicted_candy_type
2,0,0
13,0,0
63,0,0
41,0,1
48,0,0
30,0,0
50,0,0
76,1,0
55,0,0
43,1,1


In [17]:
## accuracy
print('Accuracy:', metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.76


In [18]:
conf_mat = confusion_matrix(y_test, y_pred)
conf_mat

array([[12,  3],
       [ 3,  7]])

In [19]:
## calculate the accuracy based on the confusion matrix
accuracy = (conf_mat[0][0] + conf_mat[1][1]) /25
accuracy




0.76