<a href="https://colab.research.google.com/github/swarnava-96/Spam-Classifier-NLP-ML/blob/main/Spam_Model_Retraining.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Incremetal Model Retraining**

In [10]:
# Lets install creme
!pip install creme

Collecting creme
  Downloading creme-0.6.1-cp37-cp37m-manylinux2010_x86_64.whl (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 5.3 MB/s 
Collecting mmh3==2.5.1
  Downloading mmh3-2.5.1.tar.gz (9.8 kB)
Building wheels for collected packages: mmh3
  Building wheel for mmh3 (setup.py) ... [?25l[?25hdone
  Created wheel for mmh3: filename=mmh3-2.5.1-cp37-cp37m-linux_x86_64.whl size=39690 sha256=e47820c2b06cb6f009833c7a280d60557298c3eb720769f8891762912b436a25
  Stored in directory: /root/.cache/pip/wheels/ae/45/25/90e097a519143b2dca74cd93a056894a965f27908103e01799
Successfully built mmh3
Installing collected packages: mmh3, creme
Successfully installed creme-0.6.1 mmh3-2.5.1


In [2]:
# Loading the data
import pandas as pd

messages = pd.read_csv('/content/SMSSpamCollection', sep='\t',
                           names=["label", "message"])

In [3]:
# Lets check the shape of the data
messages.shape

(5572, 2)

In [4]:
# Train test split
from sklearn.model_selection import train_test_split
message_train, message_test = train_test_split(messages)

In [6]:
# Lets see the training data
message_train

Unnamed: 0,label,message
2415,ham,O was not into fps then.
1340,ham,Every monday..nxt week vl be completing..
1855,ham,They did't play one day last year know even th...
2312,spam,Congratulations! Thanks to a good friend U hav...
4600,ham,Have you laid your airtel line to rest?
...,...,...
714,ham,Save yourself the stress. If the person has a ...
1930,spam,Free 1st week entry 2 TEXTPOD 4 a chance 2 win...
3631,spam,Get the official ENGLAND poly ringtone or colo...
1565,ham,Tmrw. Im finishing 9 doors


In [7]:
# Converting the dataset into tuples
messages_train = message_train.to_records(index = False)
messages_test = message_test.to_records(index = False)

In [8]:
# Lets see our tuple
messages_train

rec.array([('ham', 'O was not into fps then.'),
           ('ham', 'Every monday..nxt week vl be completing..'),
           ('ham', "They did't play one day last year know even though they have very good team.. Like india."),
           ...,
           ('spam', 'Get the official ENGLAND poly ringtone or colour flag on yer mobile for tonights game! Text TONE or FLAG to 84199. Optout txt ENG STOP Box39822 W111WX £1.50'),
           ('ham', 'Tmrw. Im finishing 9 doors'),
           ('ham', "Dip's cell dead. So i m coming with him. U better respond else we shall come back.")],
          dtype=[('label', 'O'), ('message', 'O')])

In [13]:
# Creating the pipeline
# 1st function is creating the TFIDF
# 2nd function is the naive bayes predictor

import creme
import math
from creme import compose
from creme import feature_extraction
from creme import naive_bayes

model = compose.Pipeline(
    ("tokenize", feature_extraction.TFIDF(lowercase = False)),
    ("nb",naive_bayes.MultinomialNB(alpha = 1))
)

In [14]:
from creme import metrics
metric = metrics.Accuracy()

# Training the model row by row
for label, sentence in messages_train:
  model = model.fit_one(sentence, label)
  y_pred = model.predict_one(sentence)
  metric = metric.update(label, y_pred)  

In [15]:
# Lets see the training data accuracy
metric

Accuracy: 95.93%

In [16]:
# Test data accuracy
test_metric = metrics.Accuracy()
for label,sentence in messages_test:
  y_pred = model.predict_one(sentence)
  test_metric = metric.update(label,y_pred)

In [17]:
# Lets see the test metric
metric

Accuracy: 95.76%

In [19]:
# New data
model.fit_one("This guy is neutral","ham")
model.fit_one("Everybody is neutral","ham")

Pipeline (
  TFIDF (
    normalize=True
    on=None
    strip_accents=True
    lowercase=False
    preprocessor=None
    tokenizer=<built-in method findall of re.Pattern object at 0x7fdc72a0f850>
    ngram_range=(1, 1)
  ),
  MultinomialNB (
    alpha=1
  )
)