<a href="https://colab.research.google.com/github/kungfumas/aplikasi-deep-learning/blob/master/streamlitclasslime.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv('https://raw.githubusercontent.com/AmolMavuduru/StreamlitTutorial/master/data/spam_data.csv')
data.head(10)

Unnamed: 0,Category,Message
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."
5,spam,FreeMsg Hey there darling it's been 3 week's n...
6,ham,Even my brother is not like to speak with me. ...
7,ham,As per your request 'Melle Melle (Oru Minnamin...
8,spam,WINNER!! As a valued network customer you have...
9,spam,Had your mobile 11 months or more? U R entitle...


In [3]:
import re # regex library
def preprocessor(text):
    text = re.sub('<[^>]*>', '', text) # Effectively removes HTML markup tags
    emoticons = re.findall('(?::|;|=)(?:-)?(?:\)|\(|D|P)', text)
    text = re.sub('[\W]+', ' ', text.lower()) + ' '.join(emoticons).replace('-', '')
    return text

In [4]:
from sklearn.model_selection import train_test_split
X = data['Message']
y = data['Category']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [5]:
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score, classification_report
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score

In [6]:
tfidf = TfidfVectorizer(strip_accents=None, lowercase=False, max_features=700, preprocessor=preprocessor, ngram_range=(1,1))
neural_net_pipeline = Pipeline([('vectorizer', tfidf), ('nn', MLPClassifier(hidden_layer_sizes=(700, 700)))])
neural_net_pipeline.fit(X_train, y_train)

In [7]:
y_pred = neural_net_pipeline.predict(X_test)
print(classification_report(y_test, y_pred))
print('Accuracy: {} %'.format(100 * accuracy_score(y_test, y_pred)))

              precision    recall  f1-score   support

         ham       0.99      0.99      0.99      1448
        spam       0.95      0.92      0.94       224

    accuracy                           0.98      1672
   macro avg       0.97      0.96      0.96      1672
weighted avg       0.98      0.98      0.98      1672

Accuracy: 98.32535885167464 %


In [12]:
from joblib import dump
dump(neural_net_pipeline, 'spam_classifier.joblib')

['spam_classifier.joblib']

In [14]:
pip install lime

Collecting lime
  Downloading lime-0.2.0.1.tar.gz (275 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m275.7/275.7 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: lime
  Building wheel for lime (setup.py) ... [?25l[?25hdone
  Created wheel for lime: filename=lime-0.2.0.1-py3-none-any.whl size=283835 sha256=4d678fb38872a04b4b72a7942c2a0c15e3ee3292394b92280e525f05bbc71a28
  Stored in directory: /root/.cache/pip/wheels/fd/a2/af/9ac0a1a85a27f314a06b39e1f492bee1547d52549a4606ed89
Successfully built lime
Installing collected packages: lime
Successfully installed lime-0.2.0.1


In [8]:
%%writefile app.py
import joblib
import re
from sklearn.neural_network import MLPClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
import streamlit as st
from lime.lime_text import LimeTextExplainer
import streamlit.components.v1 as components

st.write("# Spam Detection Engine")

message_text = st.text_input("Enter a message for spam evaluation")

def preprocessor(text):
    text = re.sub('<[^>]*>', '', text) # Effectively removes HTML markup tags
    emoticons = re.findall('(?::|;|=)(?:-)?(?:\)|\(|D|P)', text)
    text = re.sub('[\W]+', ' ', text.lower()) + ' '.join(emoticons).replace('-', '')
    return text

model = joblib.load('spam_classifier.joblib')

def classify_message(model, message):

	label = model.predict([message])[0]
	spam_prob = model.predict_proba([message])

	return {'label': label, 'spam_probability': spam_prob[0][1]}

if message_text != '':

	result = classify_message(model, message_text)

	st.write(result)


	explain_pred = st.button('Explain Predictions')

	if explain_pred:
		with st.spinner('Generating explanations'):
			class_names = ['ham', 'spam']
			explainer = LimeTextExplainer(class_names=class_names)
			exp = explainer.explain_instance(message_text,
				model.predict_proba, num_features=10)
			components.html(exp.as_html(), height=800)






Writing app.py


In [9]:

!pip install streamlit -qqqq


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.4/8.4 MB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.6/190.6 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.8/4.8 MB[0m [31m24.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.1/82.1 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.7/62.7 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25h

In [10]:
!pip install pyngrok

Collecting pyngrok
  Downloading pyngrok-7.0.3-py3-none-any.whl (21 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.0.3


In [15]:
!streamlit run app.py & npx localtunnel --port 8501

[##................] - fetchMetadata: sill resolveWithNewModule ms@2.1.2 checki[0m[K
Collecting usage statistics. To deactivate, set browser.gatherUsageStats to False.
[0m
[K[?25hnpx: installed 22 in 3.64s
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.80.34.194:8501[0m
[0m
your url is: https://rotten-teeth-doubt.loca.lt
[34m  Stopping...[0m
^C
