In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [2]:
# Carregando o conjunto de dados
wine_data = pd.read_csv("wine_dataset.csv")

In [4]:
wine_data.head()

Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol,quality,style
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,red
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5,red
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5,red
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6,red
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5,red


In [3]:
wine_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6497 entries, 0 to 6496
Data columns (total 13 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   fixed_acidity         6497 non-null   float64
 1   volatile_acidity      6497 non-null   float64
 2   citric_acid           6497 non-null   float64
 3   residual_sugar        6497 non-null   float64
 4   chlorides             6497 non-null   float64
 5   free_sulfur_dioxide   6497 non-null   float64
 6   total_sulfur_dioxide  6497 non-null   float64
 7   density               6497 non-null   float64
 8   pH                    6497 non-null   float64
 9   sulphates             6497 non-null   float64
 10  alcohol               6497 non-null   float64
 11  quality               6497 non-null   int64  
 12  style                 6497 non-null   object 
dtypes: float64(11), int64(1), object(1)
memory usage: 660.0+ KB


In [5]:
wine_data.describe()

Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol,quality
count,6497.0,6497.0,6497.0,6497.0,6497.0,6497.0,6497.0,6497.0,6497.0,6497.0,6497.0,6497.0
mean,7.215307,0.339666,0.318633,5.443235,0.056034,30.525319,115.744574,0.994697,3.218501,0.531268,10.491801,5.818378
std,1.296434,0.164636,0.145318,4.757804,0.035034,17.7494,56.521855,0.002999,0.160787,0.148806,1.192712,0.873255
min,3.8,0.08,0.0,0.6,0.009,1.0,6.0,0.98711,2.72,0.22,8.0,3.0
25%,6.4,0.23,0.25,1.8,0.038,17.0,77.0,0.99234,3.11,0.43,9.5,5.0
50%,7.0,0.29,0.31,3.0,0.047,29.0,118.0,0.99489,3.21,0.51,10.3,6.0
75%,7.7,0.4,0.39,8.1,0.065,41.0,156.0,0.99699,3.32,0.6,11.3,6.0
max,15.9,1.58,1.66,65.8,0.611,289.0,440.0,1.03898,4.01,2.0,14.9,9.0


In [6]:
# Definindo as características (X) e o rótulo (y)
X = wine_data.drop(columns=['style'])  # Recursos
y = wine_data['style']  # Alvo (rótulo)

In [7]:
# Dividindo os dados em conjuntos de treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# Criando e treinando o modelo Gaussian Naive Bayes
model = GaussianNB()
model.fit(X_train, y_train)

In [9]:
# Fazendo previsões
y_pred = model.predict(X_test)

In [10]:
# Avaliando o modelo
accuracy = accuracy_score(y_test, y_pred)
confusion = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

In [11]:
print("Acurácia:", accuracy)
print("Matriz de Confusão:\n", confusion)
print("Relatório de Classificação:\n", classification_rep)

Acurácia: 0.9684615384615385
Matriz de Confusão:
 [[330  11]
 [ 30 929]]
Relatório de Classificação:
               precision    recall  f1-score   support

         red       0.92      0.97      0.94       341
       white       0.99      0.97      0.98       959

    accuracy                           0.97      1300
   macro avg       0.95      0.97      0.96      1300
weighted avg       0.97      0.97      0.97      1300



In [17]:
# Testar o modelo com dados para prever o estilo do vinho
wine_features = pd.DataFrame({
    'fixed_acidity': [7.2],
    'volatile_acidity': [0.25],
    'citric_acid': [0.32],
    'residual_sugar': [6.0],
    'chlorides': [0.041],
    'free_sulfur_dioxide': [50],
    'total_sulfur_dioxide': [150],
    'density': [0.993],
    'pH': [3.2],
    'sulphates': [0.55],
    'alcohol': [10.5],
    'quality': [5]  # Adicione a qualidade do vinho
})

# Fazendo a previsão com base nas características fornecidas
predicted_style = model.predict(wine_features)

# Exibindo a previsão
print("Estilo do Vinho:", predicted_style[0])

Estilo do Vinho: white
