In [1]:
!pip install gradio



In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
import gradio as gr

# Set the column width to view the text message data
pd.set_option('max_colwidth', 200)

# Load the dataset into a DataFrame
csv_file_path = "Resources/SMSSpamCollection.csv"
sms_text_df = pd.read_csv(csv_file_path)

# Drop rows with missing values in the 'text_message' column
sms_text_df = sms_text_df.dropna(subset=['text_message'])

# Verify the first few rows and the class distribution
print("First few rows of the dataset:")
print(sms_text_df.head())

print("\nClass distribution:")
print(sms_text_df['label'].value_counts())

First few rows of the dataset:
  label  \
0   ham   
1   ham   
2  spam   
3   ham   
4   ham   

                                                                                                                                                  text_message  
0                                              Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat...  
1                                                                                                                                Ok lar... Joking wif u oni...  
2  Free entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question(std txt rate)T&C's apply 08452810075over18's  
3                                                                                                            U dun say so early hor... U c already then say...  
4                                                                                                

In [3]:
def sms_classification(sms_text_df):
    """
    Perform SMS classification using a pipeline with TF-IDF vectorization and Linear Support Vector Classification.

    Parameters:
    - sms_text_df (pd.DataFrame): DataFrame containing 'text_message' and 'label' columns for SMS classification.

    Returns:
    - text_clf (Pipeline): Fitted pipeline model for SMS classification.

    This function takes a DataFrame with 'text_message' and 'label' columns, splits the data into
    training and testing sets, builds a pipeline with TF-IDF vectorization and Linear Support Vector
    Classification, and fits the model to the training data. 
    The fitted pipeline is returned to make future predictions.
    """
    # Set the features variable to the text message column.
    features = sms_text_df['text_message']
    # Set the target variable to the "label" column.
    target = sms_text_df['label']

    # Split data into training and testing and set the test_size = 33%
    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.33, random_state=42)

    # Build a pipeline to transform the test set to compare to the training set.
    text_clf = Pipeline([
        ('tfidf', TfidfVectorizer()),
        ('clf', LinearSVC()),
    ])


    # Fit the model to the transformed training data and return model.
    text_clf.fit(X_train, y_train)
    
    return text_clf

In [4]:
# Load the dataset into a DataFrame
csv_file_path = "./Resources/SMSSpamCollection.csv"
df = pd.read_csv(csv_file_path)


In [5]:
# Call the sms_classification function with the DataFrame and set the result to the "text_clf" variable
text_clf = sms_classification(df)



In [6]:
# Create a function called `sms_prediction` that takes in the SMS text and predicts the whether the text is "not spam" or "spam". 
# The function should return the SMS message, and say whether the text is "not spam" or "spam".
def sms_prediction(text):
    """
    Predict the spam/ham classification of a given text message using a pre-trained model.

    Parameters:
    - text (str): The text message to be classified.

    Returns:
    - str: A message indicating whether the text message is classified as spam or not.

    This function takes a text message and a pre-trained pipeline model, then predicts the
    spam/ham classification of the text. The result is a message stating whether the text is
    classified as spam or not.
    """
    # Create a variable that will hold the prediction of a new text.
    prediction = text_clf.predict([text])[0]
    # Using a conditional if the prediction is "ham" return the message:
    # f'The text message: "{text}", is not spam.' Else, return f'The text message: "{text}", is spam.'
    if prediction == 'ham':
        return f'The text message: "{text}", is not spam.'
    else:
        return f'The text message: "{text}", is spam.'

In [7]:
# Custom CSS to style the interface
custom_css = """
#app-container {
    background-color: #1d1d1d;
    color: white;
    font-family: Arial, sans-serif;
    padding: 20px;
    border-radius: 10px;
    box-shadow: 0px 0px 10px rgba(0,0,0,0.5);
    max-width: 800px;
    margin: auto;
}
#app-title {
    text-align: center;
    font-size: 24px;
    margin-bottom: 10px;
}
#app-description {
    text-align: center;
    font-size: 16px;
    margin-bottom: 20px;
}
#input-textbox, #output-textbox {
    background-color: #2e2e2e;
    color: white;
    border: 1px solid #444;
    padding: 10px;
    border-radius: 5px;
    width: 100%;
    box-sizing: border-box;
    margin-bottom: 10px;
}
#submit-btn {
    background-color: #ff4500;
    color: white;
    border: none;
    padding: 10px 20px;
    font-size: 16px;
    border-radius: 5px;
    cursor: pointer;
    width: 100%;
    margin-bottom: 10px;
}
#submit-btn:hover {
    background-color: #ff0000;
}
#clear-btn, #flag-btn {
    background-color: #666;
    color: white;
    border: none;
    padding: 10px 20px;
    font-size: 16px;
    border-radius: 5px;
    cursor: pointer;
    width: 100%;
    margin-bottom: 10px;
}
#clear-btn:hover, #flag-btn:hover {
    background-color: #444;
}
"""

def clear_inputs():
    return "", ""

# Create the Gradio interface using Blocks API
with gr.Blocks(css=custom_css) as demo:
    with gr.Column(elem_id="app-container"):
        gr.Markdown("# SMS Spam Classifier", elem_id="app-title")
        gr.Markdown("Enter an SMS message to classify it as spam or not spam.", elem_id="app-description")
        
        input_text = gr.Textbox(lines=2, placeholder="Enter your SMS message here...", elem_id="input-textbox")
        
        with gr.Row():
            submit_btn = gr.Button("Submit", elem_id="submit-btn")
            clear_btn = gr.Button("Clear", elem_id="clear-btn")
        
        output_text = gr.Textbox(label="Our app has determined:", interactive=False, elem_id="output-textbox")
        
        with gr.Row():
            flag_btn = gr.Button("Flag", elem_id="flag-btn")

        submit_btn.click(fn=sms_prediction, inputs=input_text, outputs=output_text)
        clear_btn.click(fn=clear_inputs, inputs=[], outputs=[input_text, output_text])

# Launch the application
demo.launch(share=True)


Running on local URL:  http://127.0.0.1:7862
IMPORTANT: You are using gradio version 4.28.3, however version 4.29.0 is available, please upgrade.
--------
Running on public URL: https://141f9f902dd8358532.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




## Test the following text messages. 

---

1. You are a lucky winner of $5000!
2. You won 2 free tickets to the Super Bowl.
3. You won 2 free tickets to the Super Bowl text us to claim your prize.
4. Thanks for registering. Text 4343 to receive free updates on medicare.