In [1]:
# Import pandas
import pandas as pd
# Import the required dependencies from sklearn
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC

# Set the column width to view the text message data.
pd.set_option('max_colwidth', 200)

! pip install --upgrade gradio
# Import Gradio
import gradio as gr



In [2]:
# Load the dataset into a DataFrame
df = pd.read_csv('Resources/SMSSpamCollection.csv')

In [3]:
df.sample(n=10)

Unnamed: 0,label,text_message
4738,ham,"Nt only for driving even for many reasons she is called BBD..thts it chikku, then hw abt dvg cold..heard tht vinobanagar violence hw is the condition..and hw ru ? Any problem?"
609,ham,Neva mind it's ok..
3634,ham,Hui xin is in da lib.
1657,ham,Yes I posted a couple of pics on fb. There's still snow outside too. I'm just waking up :)
3137,ham,Can you let me know details of fri when u find out cos I'm not in tom or fri. mentionned chinese. Thanks
3749,ham,"A bit of Ur smile is my hppnss, a drop of Ur tear is my sorrow, a part of Ur heart is my life, a heart like mine wil care for U, forevr as my GOODFRIEND"
2262,ham,Did you show him and wot did he say or could u not c him 4 dust?
2440,ham,Rightio. 11.48 it is then. Well arent we all up bright and early this morning.
5408,ham,... Are you in the pub?
2510,ham,"U wake up already? Wat u doing? U picking us up later rite? I'm taking sq825, reaching ard 7 smth 8 like dat. U can check e arrival time. C ya soon..."


In [4]:
def sms_classification(sms_text_df):
    """
    Perform SMS classification using a pipeline with TF-IDF vectorization and Linear Support Vector Classification.

    Parameters:
    - sms_text_df (pd.DataFrame): DataFrame containing 'text_message' and 'label' columns for SMS classification.

    Returns:
    - text_clf (Pipeline): Fitted pipeline model for SMS classification.

    This function takes a DataFrame with 'text_message' and 'label' columns, splits the data into
    training and testing sets, builds a pipeline with TF-IDF vectorization and Linear Support Vector
    Classification, and fits the model to the training data. 
    The fitted pipeline is returned to make future predictions.
    """

    # Set the features variable to the text message column.
    x = sms_text_df['text_message']

    # Set the target variable to the "label" column.
    y = sms_text_df['label']

    # Split data into training and testing and set the test_size = 33%
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)

    # Build a pipeline to transform the test set to compare to the training set.
    text_clf = Pipeline(
        [
            ('vector', TfidfVectorizer(stop_words='english')),
            ('svc', LinearSVC())
        ], verbose=True
        )

    # Fit the model to the transformed training data and return model.
    results = {
        "fit": text_clf.fit(x_train, y_train),
        "training_score":text_clf.score(x_train, y_train),
        "testing_score": text_clf.score(x_test, y_test),
    }
    return results

In [5]:
# Call the sms_classification function with the DataFrame and set the result to the "text_clf" variable
text_clf = sms_classification(df)

[Pipeline] ............ (step 1 of 2) Processing vector, total=   0.0s
[Pipeline] ............... (step 2 of 2) Processing svc, total=   0.0s


In [6]:
print(f"training_score {text_clf['training_score']}")
print(f"testing_score {text_clf['testing_score']}")

training_score 1.0
testing_score 0.9885807504078303


In [7]:
# Create a function called `sms_prediction` that takes in the SMS text and predicts the whether the text is "not spam" or "spam". 
# The function should return the SMS message, and say whether the text is "not spam" or "spam".
def sms_prediction(text):
    """
    Predict the spam/ham classification of a given text message using a pre-trained model.

    Parameters:
    - text (str): The text message to be classified.

    Returns:
    - str: A message indicating whether the text message is classified as spam or not.

    This function takes a text message and a pre-trained pipeline model, then predicts the
    spam/ham classification of the text. The result is a message stating whether the text is
    classified as spam or not.
    """

    ham = f'The text message: "{text}", is not spam.'
    spam = f'The text message: "{text}", is spam.'

    # Create a variable that will hold the prediction of a new text.
    is_spam = text_clf['fit'].predict([text])

    # Using a conditional if the prediction is "ham" return the message:
    # f'The text message: "{text}", is not spam.' Else, return f'The text message: "{text}", is spam.'
    if is_spam == 'ham':
        return ham
    else:
        return spam


In [8]:
# Create some random text messages. 
text_1 = """You are a lucky winner of $5000!!"""
text_2 = """You won 2 free tickets to the Super Bowl."""
text_3 = """You won 2 free tickets to the Super Bowl text us to claim your prize"""
text_4 = """Thanks for registering. Text 4343 to receive free updates on medicare"""

In [9]:
# Send the text messages to transform the data and predict the classification.
print(text_clf['fit'].predict([text_1]))
print(text_clf['fit'].predict([text_2]))
print(text_clf['fit'].predict([text_3]))
print(text_clf['fit'].predict([text_4]))

['ham']
['ham']
['spam']
['spam']


In [10]:
# Create a sms_app that takes a textbox for the inputs and has a textbox for the output.  
# Provide labels for each textbox. 
app = gr.Interface(
	fn=sms_prediction,
	inputs='text',
	outputs='text'
)
    
# Launch the app.
app.launch()

* Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




## Test the following text messages. 

---

1. You are a lucky winner of $5000!
2. You won 2 free tickets to the Super Bowl.
3. You won 2 free tickets to the Super Bowl text us to claim your prize.
4. Thanks for registering. Text 4343 to receive free updates on medicare.