In [2]:
# pipenv install pandas plotly scikit-learn lightgbm bayesian-optimization ipykernel ipywidgets nbformat matplotlib
# No Mac, instalar biblioteca libomp: brew install libomp

# Importar bibliotecas
import pandas as pd
import numpy as np

# Visualização
import plotly.express as px
import matplotlib.pyplot as plt

# Machine Learning
from lightgbm import LGBMClassifier, early_stopping, plot_tree, plot_importance
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report, \
                            ConfusionMatrixDisplay, log_loss

# Otimização Bayesiana
from bayes_opt import BayesianOptimization

### Carregar os dados

In [3]:
df_vinhos = pd.read_csv('./datasets/wine_data.csv')

### Visualizar Estrutura

In [12]:
df_vinhos.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21000 entries, 0 to 20999
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   fixed_acidity         21000 non-null  float64
 1   volatile_acidity      21000 non-null  float64
 2   citric_acid           21000 non-null  float64
 3   residual_sugar        21000 non-null  float64
 4   chlorides             21000 non-null  float64
 5   free_sulfur_dioxide   21000 non-null  float64
 6   total_sulfur_dioxide  21000 non-null  float64
 7   density               21000 non-null  float64
 8   pH                    21000 non-null  float64
 9   sulphates             21000 non-null  float64
 10  alcohol               21000 non-null  float64
 11  quality               21000 non-null  int64  
dtypes: float64(11), int64(1)
memory usage: 1.9 MB


In [13]:
# Visualizar primeiros registros
df_vinhos.head(10)

Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol,quality
0,11.6,0.58,0.66,2.2,0.074,10.0,47.0,1.0008,3.25,0.57,9.0,3
1,10.4,0.61,0.49,2.1,0.2,5.0,16.0,0.9994,3.16,0.63,8.4,3
2,7.4,1.185,0.0,4.25,0.097,5.0,14.0,0.9966,3.63,0.54,10.7,3
3,10.4,0.44,0.42,1.5,0.145,34.0,48.0,0.99832,3.38,0.86,9.9,3
4,8.3,1.02,0.02,3.4,0.084,6.0,11.0,0.99892,3.48,0.49,11.0,3
5,7.6,1.58,0.0,2.1,0.137,5.0,9.0,0.99476,3.5,0.4,10.9,3
6,6.8,0.815,0.0,1.2,0.267,16.0,29.0,0.99471,3.32,0.51,9.8,3
7,7.3,0.98,0.05,2.1,0.061,20.0,49.0,0.99705,3.31,0.55,9.7,3
8,7.1,0.875,0.05,5.7,0.082,3.0,14.0,0.99808,3.4,0.52,10.2,3
9,6.7,0.76,0.02,1.8,0.078,6.0,12.0,0.996,3.55,0.63,9.95,3


In [14]:
# Visualizar últimos registros
df_vinhos.tail(10)

Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol,quality
20990,12.3,1.21,0.68,59.1,0.328,232.0,396.0,1.03428,3.12,0.59,12.2,9
20991,10.2,0.61,0.88,53.8,0.25,62.4,204.7,1.02776,3.52,1.14,9.7,9
20992,12.9,0.9,1.16,51.2,0.309,196.9,172.3,1.03,3.1,0.82,12.4,9
20993,13.0,0.58,1.22,52.8,0.247,93.0,190.7,1.01922,3.38,1.24,9.9,9
20994,12.8,0.85,1.12,44.9,0.188,215.3,253.3,1.02192,3.13,0.81,11.4,9
20995,9.7,1.02,0.91,50.0,0.412,114.6,181.7,1.02085,3.3,0.89,12.0,9
20996,10.2,0.61,0.88,53.8,0.25,62.4,204.7,1.02776,3.52,1.14,9.7,9
20997,13.4,0.46,1.04,52.1,0.449,63.0,273.5,1.02618,2.89,1.76,9.3,9
20998,6.6,1.03,1.09,25.3,0.138,179.8,295.0,1.02476,2.94,1.54,12.9,9
20999,9.3,0.93,1.32,33.6,0.412,128.7,290.1,1.02182,3.16,1.42,13.0,9


In [15]:
# Estatisitcas descritivas
df_vinhos.describe()

Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,alcohol,quality
count,21000.0,21000.0,21000.0,21000.0,21000.0,21000.0,21000.0,21000.0,21000.0,21000.0,21000.0,21000.0
mean,9.797079,0.774796,0.79387,31.289348,0.200245,129.442333,229.008762,1.009972,3.158712,1.020641,11.291716,6.0
std,2.413919,0.365015,0.384833,19.015391,0.124933,77.167262,100.183265,0.012032,0.171371,0.408304,1.182198,2.000048
min,3.8,0.08,0.0,0.6,0.009,1.0,6.0,0.98711,2.72,0.22,8.0,3.0
25%,7.6,0.43,0.41,9.8,0.072,45.0,150.0,0.997417,3.03,0.62,10.4,4.0
50%,10.0,0.83,0.87,37.6,0.205,145.8,240.5,1.0122,3.15,1.08,11.3,6.0
75%,11.8,1.08,1.11,46.8,0.298,194.325,311.625,1.01984,3.27,1.36,12.2,8.0
max,15.9,1.58,1.66,65.8,0.611,289.0,440.0,1.03898,4.01,2.0,14.9,9.0
