                          *** Implement Incremental/Online Learning using SGDClassifier **


In [None]:
import numpy as np
from sklearn import linear_model

X = np.array([[-1, -1], [-2, -1], [-2, -1], [-2, -1], [-2, -1], [2, 1]])
Y = np.array([1, 3, 3, 3, 3, 2])
clf = linear_model.SGDClassifier(max_iter=15, tol=None)
clf.fit(X, Y)


In [None]:
print(clf.predict([[-1, -1], [-2, -1], [2, 1],[4, -5]]))

In [None]:
# Partialy fit Pretrained model

X = np.array([[4, 5]])
Y = np.array([2])

clf.partial_fit(X, Y, classes=None, sample_weight=None)

In [None]:
print(clf.predict([[-1, -1], [-2, -1], [2, 1],[4, 5]]))

*** Putting all blocks together ***

In [None]:
# Save pretrained Model --> Load --> Partial Train

#Train Model with Initial Data
X = np.array([[-1, -1], [-2, -1], [2, 1]])
Y = np.array([1, 3, 2])

clf = linear_model.SGDClassifier(max_iter=15, tol=None)
clf.fit(X, Y)

import pickle
# now you can save it to a file
with open('SGDClassifier_1.pkl', 'wb') as f:
    pickle.dump(clf, f)

print(clf.predict([[-1, -1], [-2, -1], [2, 1],[-4, -5]]))

# Partialy fit Pretrained model
X = np.array([[-4, -5]])
Y = np.array([2])

# Load SGDClassifier_1
with open('SGDClassifier_1.pkl', 'rb') as f:
    clf1 = pickle.load(f)
    
clf.partial_fit(X, Y, classes=None, sample_weight=None)
print(clf.predict([[-1, -1], [-2, -1], [2, 1],[-4, -5]]))

                                            *** Using Iris Dataset ***

In [None]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

iris = datasets.load_iris()
X = iris.data
Y = iris.target

In [None]:
print(type(iris))
print(type(X))
print(type(Y))

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.20, random_state=42)
print(type(X_train))
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
X_train_1, X_train_2, y_train_1, y_train_2 = train_test_split(X_train, y_train, test_size=0.50, random_state=42)
print(X_train_1.shape)
print(X_train_2.shape)
print(y_train_1.shape)
print(y_train_2.shape)

In [None]:
#Full Model Training

clf_full = linear_model.SGDClassifier(max_iter=100, tol=None)
clf_full.fit(X_train, y_train)

y_pred = clf_full.predict(X_test)
accuracy_score(y_test, y_pred)

In [None]:
#Partial Model Traning - Batch 1
clf_Batch1 = linear_model.SGDClassifier(max_iter=100, tol=None)
clf_Batch1.fit(X_train_1, y_train_1)

y_pred = clf_Batch1.predict(X_test)
print(accuracy_score(y_test, y_pred))

import pickle
# now you can save it to a file
with open('SGDClassifier_Iris_Batch_1.pkl', 'wb') as f:
    pickle.dump(clf_Batch1, f)

In [None]:
#Partial Model Traning - Batch 2
# Load SGDClassifier_1
with open('SGDClassifier_Iris_Batch_1.pkl', 'rb') as f:
    clf_Batch2 = pickle.load(f)
    
clf_Batch2.partial_fit(X_train_2, y_train_2, classes=None, sample_weight=None)

y_pred = clf_Batch2.predict(X_test)
accuracy_score(y_test, y_pred)

*** Load Iris Data and Try **

In [None]:
import pandas as pd
df_Iris_Full = pd.read_csv(".//Iris Dataset//Iris.csv")
df_Iris_Batch1 = pd.read_csv(".//Iris Dataset//Iris_Batch1.csv")
df_Iris_Batch2 = pd.read_csv(".//Iris Dataset//Iris_Batch2.csv")

In [None]:
print(df_Iris_Full.shape)
print(df_Iris_Batch1.shape)
print(df_Iris_Batch2.shape)

print(type(df_Iris_Full))
print(type(df_Iris_Batch1))
print(type(df_Iris_Batch2))

print(np.unique(df_Iris_Full[["class"]]))
print(np.unique(df_Iris_Batch1[["class"]]))
print(np.unique(df_Iris_Batch2[["class"]]))

print(df_Iris_Full.head())
print(df_Iris_Batch1.head())
print(df_Iris_Batch2.head())

In [None]:
X_test = df_Iris_Full[["sepal_length", "sepal_width", "petal_length", "petal_width"]]
y_test = df_Iris_Full[["class"]]

X_train_1 = df_Iris_Batch1[["sepal_length", "sepal_width", "petal_length", "petal_width"]]
y_train_1 = df_Iris_Batch1[["class"]]

X_train_2 = df_Iris_Batch2[["sepal_length", "sepal_width", "petal_length", "petal_width"]]
y_train_2 = df_Iris_Batch2[["class"]]

In [None]:
print(X_test.shape)
print(y_test.shape)
print(X_train_1.shape)
print(y_train_1.shape)
print(X_train_2.shape)
print(y_train_2.shape)

In [None]:
#Full Model Training

clf_full = linear_model.SGDClassifier(max_iter=100, tol=None)
clf_full.fit(X_test, y_test.values.ravel())

y_pred = clf_full.predict(X_test)
accuracy_score(y_test, y_pred)

In [None]:
#Partial Model Traning - Batch 1
clf_Batch1 = linear_model.SGDClassifier(max_iter=100, tol=None, loss = "log")
clf_Batch1.fit(X_train_1, y_train_1.values.ravel())

y_pred = clf_Batch1.predict(X_test)
print(accuracy_score(y_test, y_pred))

import pickle
# now you can save it to a file
with open('SGDClassifier_Iris_Batch_1.pkl', 'wb') as f:
    pickle.dump(clf_Batch1, f)
    
print(y_pred)

In [None]:
#Partial Model Traning - Batch 2
# Load SGDClassifier_1
with open('SGDClassifier_Iris_Batch_1.pkl', 'rb') as f:
    clf_Batch2 = pickle.load(f)
    
clf_Batch2.partial_fit(X_train_2, y_train_2.values.ravel(), classes= np.unique(y_test), sample_weight=None)

y_pred = clf_Batch2.predict(X_test)
print(accuracy_score(y_test, y_pred))

print(y_pred)