# **Predizione Consumi**

In [57]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

print(tf.__version__)

2.9.2


## Caricamento del Dataset

In [52]:
dataset = pd.read_csv('/content/consumi-storage.csv')
dataset.head()

# Identifico il feature set e il label set
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values
print(X)
print(y)

[[1 ' Pane' 3000 0 1000 2000]
 [2 ' Pane' 2000 500 500 3000]
 [3 ' Pane' 500 500 1000 2000]
 [4 ' Pane' 500 0 500 2000]
 [5 ' Pane' 1000 200 500 2300]
 [6 ' Pane' 0 300 500 1500]
 [7 ' Pane' 0 200 500 800]
 [8 ' Pane' 500 100 450 750]
 [9 ' Pane' 500 200 400 650]
 [10 ' Pane' 0 100 500 50]
 [11 ' Pane' 100 0 150 0]
 [12 ' Pane' 500 0 450 50]
 [13 ' Pane' 300 50 250 50]
 [14 ' Pane' 350 50 300 50]
 [1 ' Mortadella' 500 0 100 400]
 [2 ' Mortadella' 100 300 50 150]
 [3 ' Mortadella' 0 0 150 0]
 [4 ' Mortadella' 200 0 150 50]
 [5 ' Mortadella' 100 50 100 0]
 [6 ' Mortadella' 100 0 50 50]
 [7 ' Mortadella' 100 50 100 0]
 [8 ' Mortadella' 100 50 50 0]
 [9 ' Mortadella' 100 0 50 50]
 [10 ' Mortadella' 200 50 150 50]
 [11 ' Mortadella' 150 50 150 0]
 [12 ' Mortadella' 200 0 150 50]
 [13 ' Mortadella' 150 50 150 0]
 [14 ' Mortadella' 100 0 80 20]
 [1 ' Pasta' 1000 0 500 500]
 [2 ' Pasta' 500 0 600 400]
 [3 ' Pasta' 1000 0 700 700]
 [4 ' Pasta' 500 0 500 700]
 [5 ' Pasta' 0 0 500 200]
 [6 ' Past

## Preprocessamento

In [53]:
# one-encode sulla colona del nome del prodotto
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')
X = np.array(ct.fit_transform(X))
print(X)

[[0.0 1.0 0.0 1 3000 0 1000 2000]
 [0.0 1.0 0.0 2 2000 500 500 3000]
 [0.0 1.0 0.0 3 500 500 1000 2000]
 [0.0 1.0 0.0 4 500 0 500 2000]
 [0.0 1.0 0.0 5 1000 200 500 2300]
 [0.0 1.0 0.0 6 0 300 500 1500]
 [0.0 1.0 0.0 7 0 200 500 800]
 [0.0 1.0 0.0 8 500 100 450 750]
 [0.0 1.0 0.0 9 500 200 400 650]
 [0.0 1.0 0.0 10 0 100 500 50]
 [0.0 1.0 0.0 11 100 0 150 0]
 [0.0 1.0 0.0 12 500 0 450 50]
 [0.0 1.0 0.0 13 300 50 250 50]
 [0.0 1.0 0.0 14 350 50 300 50]
 [1.0 0.0 0.0 1 500 0 100 400]
 [1.0 0.0 0.0 2 100 300 50 150]
 [1.0 0.0 0.0 3 0 0 150 0]
 [1.0 0.0 0.0 4 200 0 150 50]
 [1.0 0.0 0.0 5 100 50 100 0]
 [1.0 0.0 0.0 6 100 0 50 50]
 [1.0 0.0 0.0 7 100 50 100 0]
 [1.0 0.0 0.0 8 100 50 50 0]
 [1.0 0.0 0.0 9 100 0 50 50]
 [1.0 0.0 0.0 10 200 50 150 50]
 [1.0 0.0 0.0 11 150 50 150 0]
 [1.0 0.0 0.0 12 200 0 150 50]
 [1.0 0.0 0.0 13 150 50 150 0]
 [1.0 0.0 0.0 14 100 0 80 20]
 [0.0 0.0 1.0 1 1000 0 500 500]
 [0.0 0.0 1.0 2 500 0 600 400]
 [0.0 0.0 1.0 3 1000 0 700 700]
 [0.0 0.0 1.0 4 500 0 500 7

In [54]:
# split the dataset into train and test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
print(X_train)

[[-0.75592895 -0.70710678  1.51657509 -0.81907636  0.07593889 -0.62147885
   0.46047559  0.21421954]
 [-0.75592895 -0.70710678  1.51657509 -1.32906729  0.07593889 -0.62147885
   0.85516895 -0.1936215 ]
 [ 1.32287566 -0.70710678 -0.65938047 -0.05408995 -0.5707665  -0.24859154
  -1.11829785 -0.73740956]
 [-0.75592895 -0.70710678  1.51657509  1.2208874   0.07593889 -0.62147885
   0.65782227 -0.39754202]
 [-0.75592895  1.41421356 -0.65938047 -1.07407182  0.07593889  3.10739423
   2.43394238  1.98153071]
 [-0.75592895 -0.70710678  1.51657509  1.73087834  0.88432063 -0.62147885
   1.05251563  0.07827252]
 [ 1.32287566 -0.70710678 -0.65938047 -0.56408089 -0.5707665  -0.24859154
  -1.11829785 -0.73740956]
 [ 1.32287566 -0.70710678 -0.65938047 -1.32906729 -0.5707665   1.615845
  -1.31564453 -0.53348904]
 [ 1.32287566 -0.70710678 -0.65938047  0.45590099 -0.5707665  -0.62147885
  -1.31564453 -0.66943605]
 [ 1.32287566 -0.70710678 -0.65938047 -1.07407182 -0.73244285 -0.62147885
  -0.92095117 -0.73

## Costruzione del modello

In [55]:
model = LinearRegression().fit(X_train, y_train)
print(f"intercept: {model.intercept_}")
print(f"slope: {model.coef_}")

intercept: 0.521969696969697
slope: [ 0.03629394 -0.01031849 -0.02740586  0.04215307 -0.01071407 -0.05129493
  0.10938234 -0.14524711]


Proviamo invece a cedere le prestazioni della regressione Polinomiale (cerco di rappresentare le features su uno spazio di dimensionalità maggiore di modo che si possano tenere in considerazione anche eventuali fluttuazioni). Prima di tutto, è necessario modificare le features in modo da essere rappresentate nello spazio di dimensionalità maggiore:

In [93]:
from sklearn.preprocessing import PolynomialFeatures

poly = PolynomialFeatures(degree=5, include_bias=False)
X_poly = poly.fit_transform(X)

# split poly features
X_poly_train, X_poly_test, y_poly_train, y_poly_test = train_test_split(X_poly, y, test_size=0.2, random_state=0)

# scale poly features
X_poly_train = sc.fit_transform(X_poly_train)
X_poly_test = sc.fit_transform(X_poly_test)
print(X_poly_train)

[[-0.75592895 -0.70710678  1.51657509 ... -0.32642455 -0.31171221
  -0.24688175]
 [-0.75592895 -0.70710678  1.51657509 ... -0.35514403 -0.32513262
  -0.2506304 ]
 [ 1.32287566 -0.70710678 -0.65938047 ... -0.36569573 -0.32710164
  -0.25087362]
 ...
 [-0.75592895 -0.70710678  1.51657509 ... -0.36390677 -0.32685127
  -0.25085042]
 [-0.75592895  1.41421356 -0.65938047 ...  0.55025011  0.69843329
   0.50916542]
 [-0.75592895  1.41421356 -0.65938047 ...  3.29808761  1.72396823
   0.50916542]]


Dopodiché è possibile procedere con la regressione:

In [94]:
poly_reg_model = LinearRegression().fit(X_poly_train, y_train)
print(f"intercept: {poly_reg_model.intercept_}")
print(f"slope: {poly_reg_model.coef_}")

intercept: 0.521969696969697
slope: [-5.49151575e-03 -2.86916702e-03  8.69121978e-03 ... -2.65480262e-05
 -6.82288174e-04 -1.18044579e-03]


## Possibili metriche per la regressione:
1) **R quadro**: Ha valore compreso tra 0 e 1. Se il suo valore è 0, allora significa che il modello è tale per cui le sue variabili predittive non spiegano per nulla la variabilità della y intorno alla sua media. Se invece vale 1 significa che le variabili indipendenti riescono a spiegare completamente la variabilità della y intorno alla sua media. Ovvero, conoscendo i valori delle variabili indipendenti puoi predire esattamente quale sarà il valore della y (molto bene!)

In [91]:
from sklearn.metrics import mean_absolute_error,r2_score,mean_squared_error

y_pred = model.predict(X_test)
print("y_pred:", y_pred)
print("y_test:", y_test)
print("R^2 : ", r2_score(y_test, y_pred))
print("MeanAbsErr :", mean_absolute_error(y_test,y_pred))
print("MeanSquaresError:",np.sqrt(mean_squared_error(y_test, y_pred)))

y_pred: [0.542994   0.61062712 0.67850591 0.12344833 0.58309246 0.67957298
 0.47464317 0.70655695 0.59978972]
y_test: [0.5  0.57 0.8  0.16 1.   0.75 0.5  0.9  0.44]
R^2 :  0.5212846320123092
MeanAbsErr : 0.12306567221212432
MeanSquaresError: 0.1706079610888832


Per la **Regressione Polinomiale**:

In [95]:
y_poly_pred = poly_reg_model.predict(X_poly_test)
print("y_pred:", y_poly_pred)
print("y_test:", y_test)
print("R^2 : ", r2_score(y_test, y_poly_pred))
print("MeanAbsErr :", mean_absolute_error(y_test,y_poly_pred))
print("MeanSquaresError:",np.sqrt(mean_squared_error(y_test, y_poly_pred)))

y_pred: [ 0.41736608  0.62494617  0.54402247 -0.01235393  0.6399371   0.62910059
  0.55306275  0.67937115  0.6222749 ]
y_test: [0.5  0.57 0.8  0.16 1.   0.75 0.5  0.9  0.44]
R^2 :  0.389541116227468
MeanAbsErr : 0.16698226305208502
MeanSquaresError: 0.19265878500409944
