# Objective of Notebook
I want to make an example of streaming machine learning. The dataset used is wine dataset even if that is not very suitable to a SML approach

In [8]:
pip install river

Note: you may need to restart the kernel to use updated packages.


In [9]:
from sklearn import datasets as skdatasets
from sklearn.naive_bayes import GaussianNB as GaussianNBSKL
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score , confusion_matrix


In [10]:
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('classic')
sns.set(style= 'ticks', color_codes= True)

In [24]:
wine = skdatasets.load_wine()
x= wine.data
y=wine.target

In [25]:
X_train, X_test, y_train, y_test  = train_test_split(wine.data, 
                                                     wine.target, 
                                                     test_size=0.25,
                                                     stratify= wine.target, 
                                                     random_state=42)
X_train.shape , y_train.shape, X_test.shape, y_test.shape

((133, 13), (133,), (45, 13), (45,))

In [26]:
#Test with gaussianNb but not sequential
model = GaussianNBSKL()
model.fit( X=X_train , y=y_train)

GaussianNB()

In [28]:
#I predict
y_pred = model.predict(X_test)


In [29]:
#I see accuracy

print('Model accuracy is ' + str(accuracy_score(y_test, y_pred)))

Model accuracy is 0.9777777777777777


In [30]:
print(confusion_matrix(y_test, y_pred))

[[15  0  0]
 [ 1 17  0]
 [ 0  0 12]]


## Streaming approach

In [39]:
from river.naive_bayes import GaussianNB
from river.stream import iter_sklearn_dataset

y_pred=[]
y_true=[]

model1 = GaussianNB()

sample= 0

for x, y in iter_sklearn_dataset(wine, shuffle=True, seed=42):
    
    
    yp = model1.predict_one(x)
    model1.learn_one(x,y)
    
    if yp is None:
        continue
    y_pred.append(yp)
    y_true.append(y)
    
    #accuracy after each sample
    
    print('Accuracy after '+ str(sample) + ' samples is: ' + str(accuracy_score(y_true,y_pred)))
    sample+=1

    

Accuracy after 0 samples is: 0.0
Accuracy after 1 samples is: 0.0
Accuracy after 2 samples is: 0.0
Accuracy after 3 samples is: 0.0
Accuracy after 4 samples is: 0.2
Accuracy after 5 samples is: 0.3333333333333333
Accuracy after 6 samples is: 0.42857142857142855
Accuracy after 7 samples is: 0.375
Accuracy after 8 samples is: 0.4444444444444444
Accuracy after 9 samples is: 0.4
Accuracy after 10 samples is: 0.45454545454545453
Accuracy after 11 samples is: 0.5
Accuracy after 12 samples is: 0.5384615384615384
Accuracy after 13 samples is: 0.5714285714285714
Accuracy after 14 samples is: 0.6
Accuracy after 15 samples is: 0.625
Accuracy after 16 samples is: 0.6470588235294118
Accuracy after 17 samples is: 0.6666666666666666
Accuracy after 18 samples is: 0.6842105263157895
Accuracy after 19 samples is: 0.7
Accuracy after 20 samples is: 0.7142857142857143
Accuracy after 21 samples is: 0.7272727272727273
Accuracy after 22 samples is: 0.7391304347826086
Accuracy after 23 samples is: 0.75
Accurac

In [34]:
print('Streaming model accuracy is: ' + str(accuracy_score(y_true, y_pred)))

Streaming model accuracy is: 0.943502824858757


In [36]:
print(confusion_matrix(y_true, y_pred))

[[56  1  2]
 [ 3 66  2]
 [ 0  2 45]]
