# Model Deployment

In this module, we explore the process of deploying applications for Natural Language Processing

NOTE: this notebook is to explain what is happening in the code.

To actually execute this code, run the code portions only in a .py file from an Anaconda prompt.

Assuming the name of your .py file is `nlp.py`, the code to run this file is: `streamlit run nlp.py`.

In [1]:
! pip install streamlit scikit-learn numpy
# required libraries
import os
import glob
import streamlit as st
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB # Naive Bayes
from sklearn.linear_model import SGDClassifier # SVM
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.pipeline import Pipeline
import numpy as np

Collecting streamlit
  Downloading streamlit-1.50.0-py3-none-any.whl.metadata (9.5 kB)
Collecting altair!=5.4.0,!=5.4.1,<6,>=4.0 (from streamlit)
  Downloading altair-5.5.0-py3-none-any.whl.metadata (11 kB)
Collecting blinker<2,>=1.5.0 (from streamlit)
  Downloading blinker-1.9.0-py3-none-any.whl.metadata (1.6 kB)
Collecting cachetools<7,>=4.0 (from streamlit)
  Downloading cachetools-6.2.1-py3-none-any.whl.metadata (5.5 kB)
Collecting pyarrow>=7.0 (from streamlit)
  Using cached pyarrow-21.0.0-cp311-cp311-macosx_12_0_arm64.whl.metadata (3.3 kB)
Collecting tenacity<10,>=8.1.0 (from streamlit)
  Using cached tenacity-9.1.2-py3-none-any.whl.metadata (1.2 kB)
Collecting toml<2,>=0.10.1 (from streamlit)
  Downloading toml-0.10.2-py2.py3-none-any.whl.metadata (7.1 kB)
Collecting gitpython!=3.1.19,<4,>=3.0.7 (from streamlit)
  Downloading gitpython-3.1.45-py3-none-any.whl.metadata (13 kB)
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata 

### Create a dropdown / selectbox.
We'll do this by creating a selectbox() object, and assign a title and a list of options.

In [4]:
# Create list of options for dropdown:
cs = ["Naive Bayes","SVM"]

# Define a selectbox option:
classification_space = st.sidebar.selectbox("Pick a classification method:", cs)

# Write a few phrases to the app on the screen:
st.write("Results")
st.write('Dataset details here:')
st.write("Twenty Newsgroup dataset chosen. It contains over 20000 posts from newspapers and 20 different topics.")




### Next, create a button for users to interact with.
When pressed, the button will run a classification algorithm, based on the user's selection.

In [8]:
# Create a button object:
if st.sidebar.button('Classify'):
    # Using a simple if statement, perform some task based on the selectbox options.
    # If the selectbox option is "Naive Bayes", then perform a Naive Bayes classifier.
    if classification_space == "Naive Bayes":
        trainData = fetch_20newsgroups(subset='train', shuffle=True)
        st.write("Naive Bayes selected")
        classificationPipeline = Pipeline([('bow', CountVectorizer()),
                                           ('vector', TfidfTransformer()), 
                                           ('classifier ', MultinomialNB())])
        classificationPipeline = classificationPipeline.fit(trainData.data, trainData.target)
        test_set = fetch_20newsgroups(subset='test', shuffle=True)
        dataPrediction = classificationPipeline.predict(test_set.data)

        # Print to the application on the screen the results of the Naive Bayes classification:
        st.write("Accuracy of Naive Bayes Classification:")
        st.write(np.mean(dataPrediction == test_set.target))

    # If the selectbox option is "SVM", then perform a Support Vector Machine classifier.
    if classification_space == "SVM":
        trainData = fetch_20newsgroups(subset='train', shuffle = True)
        st.write("SVM selected")
        classificationPipeline = Pipeline([('bow', CountVectorizer()), 
                                           ('vector', TfidfTransformer()), 
                                           ('classifier', SGDClassifier(loss='hinge', 
                                                                         penalty ='l1', 
                                                                         alpha=0.0005, 
                                                                         l1_ratio=0.17))])
        classificationPipeline = classificationPipeline.fit(trainData.data,trainData.target)
        test_set = fetch_20newsgroups(subset='test', shuffle=True)
        dataPrediction = classificationPipeline.predict (test_set.data)
        # Print to the application on the screen the results of the SVM classification:
        st. write ("Accuracy of SVM Classification:")
        st. write (np.mean(dataPrediction == test_set.target))

