In [1]:
#  INSTALL DEPENDENCY AND LIBRARIES
# multiple output model
import os # os library help us to work with the different file faucets
import pandas as pd 
import tensorflow as tf
import numpy as np
# tensorflow and tensorflow-gpu  are going to be deep learning models - keras
#  will be used to create sequential model
#  -pandas will help in reading the tabular data 
#  matplotlib - helps for some plotting
#  sklearn
# numpy - numpy is used as np.expand_dims  --> wrap up any of the information inside
# the another set of array ---> used when we got one sample in our batch
# and we want to pass it through our deep learning models bcz we are expecting multiple examples
# in that particular batch so we normally wrap it up inside of that 


In [2]:
# Bringing our data 
df = pd.read_csv( # here we use pd.read_csv function to read the csv
    os.path.join('jigsaw-toxic-comment-classification-challenge','train.csv', 'train.csv')
#     os.path.join ---> gives us the full path to our dataset
)

In [3]:
from tensorflow.keras.layers import TextVectorization
# textvectorization is used for tokenization

In [4]:
X = df['comment_text']
y = df[df.columns[2:]].values
# .values convert it into numpy array


In [5]:
MAX_FEATURES = 200000 # number of words in the vocab

In [6]:
vectorizer = TextVectorization(max_tokens=MAX_FEATURES,
                               output_sequence_length=1800,
                               output_mode='int')
# output_sequence_length ---> maximum length of the sentence in our token 
# output_mode in the form of integer

In [7]:
vectorizer.adapt(X.values)
# adapt will help us to learn all the words in vocabulary 
# we use  X.values cuz we need numpy array instead of pandas series
# vectorizer.get_vocabulary()  ---> can also be used to get the vocabulary 

In [8]:
vectorized_text = vectorizer(X.values)
#  here we will pass all our x values through the vectorizer and we gonna get the vectorized_text data

In [9]:
import tensorflow as tf
import gradio as gr

In [10]:
# model.save('toxicity.h5')

In [11]:
model = tf.keras.models.load_model('toxicity.h5')

In [12]:
def score_comment(comment):
    vectorized_comment = vectorizer([comment])
    results = model.predict(vectorized_comment)
    
    text = ''
    for idx, col in enumerate(df.columns[2:]):
        text += '{}: {}\n'.format(col, results[0][idx]>0.5)
    
    return text

In [13]:
interface = gr.Interface(fn=score_comment,  capture_session=True,
                         inputs=gr.inputs.Textbox(lines=2, placeholder='Comment to score'),
                        outputs='text')



In [14]:
interface.launch(share=True)

Running on local URL:  http://127.0.0.1:7860
Running on public URL: https://24457.gradio.app

This share link expires in 72 hours. For free permanent hosting, check out Spaces: https://huggingface.co/spaces


(<gradio.routes.App at 0x17d97abeb80>,
 'http://127.0.0.1:7860/',
 'https://24457.gradio.app')

