In [65]:
#!pip install gradio

In [66]:
# Import pandas
import pandas as pd
# Import the required dependencies from sklearn
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC

# Set the column width to view the text message data.
pd.set_option('max_colwidth', 200)

# Import Gradio
import gradio as gr

In [67]:
def sms_classification(sms_text_df):
    """
    Perform SMS classification using a pipeline with TF-IDF vectorization and Linear Support Vector Classification.

    Parameters:
    - sms_text_df (pd.DataFrame): DataFrame containing 'text_message' and 'label' columns for SMS classification.

    Returns:
    - text_clf (Pipeline): Fitted pipeline model for SMS classification.

    This function takes a DataFrame with 'text_message' and 'label' columns, splits the data into
    training and testing sets, builds a pipeline with TF-IDF vectorization and Linear Support Vector
    Classification, and fits the model to the training data.
    The fitted pipeline is returned to make future predictions.
    """
    # Set the features variable to the text message column
    X = sms_text_df['text_message']

    # Set the target variable to the "label" column
    y = sms_text_df['label']

    # Split data into training and testing and set the test_size to 33%
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, stratify=y)

    # Build a pipeline to transform the test set to compare to the training set
    text_clf = Pipeline([
        ('tfidf', TfidfVectorizer(stop_words='english')),
        ('clf', LinearSVC()),
    ])

    # Fit the model to the transformed training data
    text_clf.fit(X_train, y_train)

    return text_clf

In [83]:
sms_text_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5572 entries, 1 to 5572
Data columns (total 2 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   label         5572 non-null   object
 1   text_message  5572 non-null   object
dtypes: object(2)
memory usage: 87.2+ KB


In [68]:
print(sms_text_df['label'].value_counts())

label
ham     4825
spam     747
Name: count, dtype: int64


In [69]:
print(sms_text_df.head())

  label  \
1   ham   
2   ham   
3  spam   
4   ham   
5   ham   

                                                                                                                                                  text_message  
1                                              Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat...  
2                                                                                                                                Ok lar... Joking wif u oni...  
3  Free entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question(std txt rate)T&C's apply 08452810075over18's  
4                                                                                                            U dun say so early hor... U c already then say...  
5                                                                                                Nah I don't think he goes to us

In [5]:
#from google.colab import drive
#drive.mount('/content/drive')

Mounted at /content/drive


In [70]:
# Load the dataset into a DataFrame
sms_text_df = pd.read_csv('/Resources/SMSSpamCollection.csv', sep=',', names=['label', 'text_message'], quotechar='"')

In [42]:
# Drop rows with missing values in the 'text_message' column
#sms_text_df.dropna(subset=['text_message'], inplace=True)

In [71]:
print(sms_text_df.head())

   label  \
0  label   
1    ham   
2    ham   
3   spam   
4    ham   

                                                                                                                                                  text_message  
0                                                                                                                                                 text_message  
1                                              Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat...  
2                                                                                                                                Ok lar... Joking wif u oni...  
3  Free entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question(std txt rate)T&C's apply 08452810075over18's  
4                                                                                                            U dun say so 

In [72]:
sms_text_df = sms_text_df.iloc[1:]

In [73]:
print(sms_text_df.head())
print(sms_text_df['label'].value_counts())

  label  \
1   ham   
2   ham   
3  spam   
4   ham   
5   ham   

                                                                                                                                                  text_message  
1                                              Go until jurong point, crazy.. Available only in bugis n great world la e buffet... Cine there got amore wat...  
2                                                                                                                                Ok lar... Joking wif u oni...  
3  Free entry in 2 a wkly comp to win FA Cup final tkts 21st May 2005. Text FA to 87121 to receive entry question(std txt rate)T&C's apply 08452810075over18's  
4                                                                                                            U dun say so early hor... U c already then say...  
5                                                                                                Nah I don't think he goes to us

In [74]:
# Call the sms_classification function with the DataFrame and set the result to the "text_clf" variable
text_clf = sms_classification(sms_text_df)

In [81]:
# Create a function called `sms_prediction` that takes in the SMS text and predicts the whether the text is "not spam" or "spam".
# The function should return the SMS message, and say whether the text is "not spam" or "spam".
def sms_prediction(text):
    """
    Predict the spam/ham classification of a given text message using a pre-trained model.

    Parameters:
    - text (str): The text message to be classified.

    Returns:
    - str: A message indicating whether the text message is classified as spam or not.

    This function takes a text message and a pre-trained pipeline model, then predicts the
    spam/ham classification of the text. The result is a message stating whether the text is
    classified as spam or not.
    """
    # Create a variable that will hold the prediction of a new text.
    prediction = text_clf.predict([text])[0]

    # Using a conditional to check if the prediction is "ham" or "spam".
    if prediction == 'ham':
        return f'The text message: "{text}", is not spam.'
    else:
        return f'The text message: "{text}", is spam.'

# Create a Gradio app that takes a textbox for inputs and outputs the prediction.
sms_app = gr.Interface(
    fn=sms_prediction,
    inputs=gr.Textbox(label="Enter SMS Text"),
    outputs=gr.Textbox(label="Our App has determined"),
    title="SMS Spam Classification",
    description="Please enter a text message to determine if it is spam or not."
)

# Launch the app.
sms_app.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://d628592fe075def4fd.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




## Test the following text messages.

---

1. You are a lucky winner of $5000!
2. You won 2 free tickets to the Super Bowl.
3. You won 2 free tickets to the Super Bowl text us to claim your prize.
4. Thanks for registering. Text 4343 to receive free updates on medicare.