Análise Exploratória - Quantidade

Importação das bibliotecas necessárias

In [21]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from pmdarima import auto_arima


In [22]:
df = pd.read_csv("../data/quantidade.csv")

In [None]:
df.head()

In [None]:
plt.figure(figsize=(16,6))
sns.lineplot(data=df, x='data', y='leite', marker='o', color= '#1199d4')
plt.xticks(rotation=45)
plt.title('Vendas - Leite')
plt.xlabel('Mês')
plt.ylabel('Litros de leite')
plt.show()

In [None]:
plt.figure(figsize=(16,6))
sns.lineplot(data=df, x='data', y='queijo', marker='o', color='#c9aa0c')
plt.title('Vendas - Queijo')
plt.xlabel('Mês')
plt.xticks(rotation=45)
plt.ylabel('Queijo')
plt.show()

In [None]:
plt.figure(figsize=(16, 6))
sns.lineplot(data=df, x='data', y='doce_leite', marker='o', color='#7d6a0c')
plt.title('Vendas - Doce de Leite')
plt.xlabel('Mês')
plt.xticks(rotation=45)
plt.ylabel('Potes')
plt.show()

In [None]:
plt.figure(figsize=(16,6))
sns.lineplot(data=df, x='data', y='ovos', marker='o', color='#d69a00')
plt.title('Vendas - Ovos')
plt.xlabel('Mês')
plt.xticks(rotation=45)
plt.ylabel('Cartelas')
plt.show()

In [None]:
plt.figure(figsize=(16,6))
sns.lineplot(data=df, x='data', y='suinos', marker='o', color='#d6006b')
plt.title('Vendas - Suínos')
plt.xlabel('Mês')
plt.xticks(rotation=45)
plt.ylabel('Porcos')
plt.show()

In [None]:
plt.figure(figsize=(16,6))
sns.lineplot(data=df, x='data', y='hortalicas', marker='o', color='#00c403')
plt.title('Vendas - Hortaliças')
plt.xlabel('Mês')
plt.xticks(rotation=45)
plt.ylabel('Mudas')
plt.show()

In [None]:
colunas = ['leite', 'queijo', 'doce_leite', 'ovos', 'suinos', 'hortalicas']
resumo_df = pd.DataFrame({
    'Média': df[colunas].mean(),
    'Mínimo': df[colunas].min(),
    'Máximo': df[colunas].max(),
    'Soma': df[colunas].sum()
})

resumo_df = resumo_df.round(2)
resumo_df

In [None]:
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df[colunas])
kmeans = KMeans(n_clusters=3, random_state=42)
df['cluster'] = kmeans.fit_predict(scaled_data)

fig, axes = plt.subplots(1, 2, figsize=(16,6))

sns.scatterplot(data=df, x= 'leite', y='queijo', hue='cluster', palette='Set1', ax=axes[0])
axes[0].set_title('Leite vs Queijo')
axes[0].set_xlabel('Leite')
axes[0].set_ylabel('Queijo')

sns.scatterplot(data=df, x= 'leite', y='doce_leite', hue='cluster', palette='Set1', ax=axes[1])
axes[1].set_title('Leite vs Doce de Leite')
axes[1].set_xlabel('Leite')
axes[1].set_ylabel('Doce de Leite')

plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(8,6))
sns.heatmap(df[colunas].corr(), annot=True, cmap='coolwarm')
plt.title('Correlação entre produtos')
plt.show()

In [None]:
# Média de cada produto por cluster
df.groupby('cluster')[colunas].mean().style.format("{:.2f}")
 

In [None]:
df_melted = df.melt(id_vars='data', value_vars=colunas, var_name='produto', value_name='quantidade')
sns.lineplot(data=df_melted, x='data', y='quantidade', hue='produto')


In [35]:
X = df.drop(columns=['queijo', 'data'])
y = df['queijo'] 
split_index = int(len(df) * 0.8)

training_X = X.iloc[:split_index]
test_X = X.iloc[split_index:]

training_y = y.iloc[:split_index]
test_y = y.iloc[split_index:]


In [36]:
# X = df.drop(columns=['queijo', 'data'])
# y = df['queijo'] 
# training_X, test_X, training_y, test_y = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
model = auto_arima(y=training_y,
                   X=training_X,
                   m=12)

n_periods = len(test_y)
forecast = model.predict(n_periods=n_periods, X=test_X)

forecast

In [None]:
training_y.plot(figsize=(16, 6), legend=True)
test_y.plot(legend=True)
forecast.plot()

In [44]:
X = df.drop(columns=['leite', 'data'])
y = df['leite']

divisor_index = int(len(df) * 0.8)

training_y = y.iloc[:divisor_index]
test_y = y.iloc[divisor_index:]

training_X = X.iloc[:divisor_index]
test_X = X.iloc[divisor_index:]


In [None]:
model = auto_arima(y = training_y,
                   exogerous = training_X,
                   m = 12)

forecast = model.predict(n_periods=n_periods, X=test_X)

forecast

In [None]:
training_y.plot(figsize=(16,6), label = 'Treino')
test_y.plot(label = 'Teste')
forecast.plot(label = 'Previsão')
plt.legend()
plt.show()
