In [1]:
import tensorflow as tf 
import pandas as pd 
import matplotlib.pyplot as plt
import numpy as np


In [3]:
df = pd.read_csv('dataset/train.csv')

In [4]:
df.head()


Unnamed: 0,id,comment_text,toxic,severe_toxic,obscene,threat,insult,identity_hate
0,0000997932d777bf,Explanation\nWhy the edits made under my usern...,0,0,0,0,0,0
1,000103f0d9cfb60f,D'aww! He matches this background colour I'm s...,0,0,0,0,0,0
2,000113f07ec002fd,"Hey man, I'm really not trying to edit war. It...",0,0,0,0,0,0
3,0001b41b1c6bb37e,"""\nMore\nI can't make any real suggestions on ...",0,0,0,0,0,0
4,0001d958c54c6e35,"You, sir, are my hero. Any chance you remember...",0,0,0,0,0,0


In [5]:
df.iloc[2]


id                                                000113f07ec002fd
comment_text     Hey man, I'm really not trying to edit war. It...
toxic                                                            0
severe_toxic                                                     0
obscene                                                          0
threat                                                           0
insult                                                           0
identity_hate                                                    0
Name: 2, dtype: object

In [6]:
from tensorflow.keras.layers import TextVectorization

In [7]:
X = df.iloc[:,1]
y = df.iloc[:,2:].values

In [8]:
y

array([[0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       ...,
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0]], dtype=int64)

In [9]:
MAX_FEATURES = 200000

In [10]:
text_vectorizer = TextVectorization(
    max_tokens=MAX_FEATURES,
    output_mode='int',
    output_sequence_length=1000,
)

In [11]:
text_vectorizer.adapt(X.values)

In [12]:
text_vectorizer.get_vocabulary()

['',
 '[UNK]',
 'the',
 'to',
 'of',
 'and',
 'a',
 'you',
 'i',
 'is',
 'that',
 'in',
 'it',
 'for',
 'this',
 'not',
 'on',
 'be',
 'as',
 'have',
 'are',
 'your',
 'with',
 'if',
 'article',
 'was',
 'or',
 'but',
 'page',
 'my',
 'an',
 'from',
 'by',
 'do',
 'at',
 'about',
 'me',
 'so',
 'wikipedia',
 'can',
 'what',
 'there',
 'all',
 'has',
 'will',
 'talk',
 'please',
 'would',
 'its',
 'no',
 'one',
 'just',
 'like',
 'they',
 'he',
 'dont',
 'which',
 'any',
 'been',
 'should',
 'more',
 'we',
 'some',
 'other',
 'who',
 'see',
 'here',
 'also',
 'his',
 'think',
 'im',
 'because',
 'know',
 'how',
 'am',
 'people',
 'why',
 'edit',
 'articles',
 'only',
 'out',
 'up',
 'when',
 'were',
 'use',
 'then',
 'may',
 'time',
 'did',
 'them',
 'now',
 'being',
 'their',
 'than',
 'thanks',
 'even',
 'get',
 'make',
 'good',
 'had',
 'very',
 'information',
 'does',
 'could',
 'well',
 'want',
 'such',
 'sources',
 'way',
 'name',
 'these',
 'deletion',
 'pages',
 'first',
 'help'

In [13]:
text_vectorizer('I am good')[:3]

<tf.Tensor: shape=(3,), dtype=int64, numpy=array([ 8, 74, 98], dtype=int64)>

In [14]:
vectorized_text = text_vectorizer(X.values)

In [15]:
dataset = tf.data.Dataset.from_tensor_slices((vectorized_text, y))
dataset = dataset.cache()
dataset = dataset.shuffle(160000)
dataset = dataset.batch(16)
dataset = dataset.prefetch(8)


In [16]:
dataset_len = len(dataset)

In [17]:
train_size = int(dataset_len * 0.8)
val_size = int(dataset_len * 0.1)
test_size = dataset_len - train_size - val_size

train_data = dataset.take(train_size)
val_data = dataset.skip(train_size).take(val_size)
test_data = dataset.skip(train_size + val_size)

In [18]:
len(train_data)

7979

In [19]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Bidirectional, Dense, Embedding

In [20]:
SEQUENCE_LENGTH = 1800

In [21]:
model = Sequential()
model.add(Embedding(input_dim=MAX_FEATURES + 1, output_dim=32))
model.add(Bidirectional(LSTM(32, activation='tanh')))
model.add(Dense(128, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(128, activation='relu'))
model.add(Dense(6, activation='sigmoid'))

In [22]:
model.compile(loss='BinaryCrossentropy', optimizer='Adam')

In [23]:
model.summary()

In [24]:
history = model.fit(train_data, epochs=1, validation_data=val_data)

[1m7979/7979[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28727s[0m 4s/step - loss: 0.0825 - val_loss: 0.0454


In [25]:
input_text = text_vectorizer('I love you')

In [26]:
model.predict(np.expand_dims(input_text,0))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 234ms/step


array([[0.04870512, 0.00016695, 0.01392549, 0.00098231, 0.01099103,
        0.00477233]], dtype=float32)

In [27]:
for data in test_data:
    print()









































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































In [28]:
predictions=[]
for inputs, _ in test_data:  # Iterate through the dataset and get the inputs
    preds = model.predict(inputs)
    print(preds)
    break

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 250ms/step
[[6.95247874e-02 8.12954531e-05 1.09299216e-02 5.98041166e-04
  1.06963916e-02 3.22680781e-03]
 [2.30810978e-03 2.03016384e-07 5.75609040e-04 3.30788907e-06
  2.71287019e-04 8.22174261e-05]
 [4.88503501e-02 3.50928931e-05 7.77241960e-03 3.01182939e-04
  7.10985623e-03 2.01996346e-03]
 [3.87943885e-03 2.06236649e-07 8.99897714e-04 4.26110682e-06
  4.54371388e-04 1.05926156e-04]
 [2.24936800e-03 6.24959853e-08 5.44148614e-04 1.53128258e-06
  2.42782684e-04 5.23672497e-05]
 [3.75729124e-03 4.60212277e-07 8.43184418e-04 6.90312982e-06
  4.44188481e-04 1.34984657e-04]
 [2.52290051e-02 9.84619510e-06 4.63559758e-03 1.05669802e-04
  3.62695125e-03 9.88416956e-04]
 [4.72461618e-03 3.56595876e-07 1.13970926e-03 6.64371782e-06
  5.96939004e-04 1.45092548e-04]
 [3.54464701e-03 1.75437918e-07 8.49219621e-04 3.67165285e-06
  4.19597898e-04 9.66226871e-05]
 [4.75816305e-05 7.40387057e-11 1.51412014e-05 3.58272700e-09
  3.131981

In [29]:
batch_X, batch_y = test_data.as_numpy_iterator().next()
(model.predict(batch_X) > 0.5).astype(int)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step


array([[0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0]])

In [30]:
from tensorflow.keras.metrics import Precision, Recall, CategoricalAccuracy

In [31]:
pre = Precision()
re = Recall()
acc = CategoricalAccuracy()

In [32]:
for batch in test_data.as_numpy_iterator():

    X_true,y_true = batch
    y_hat = model.predict(X_true)
    y_true = y_true.flatten()
    y_hat = y_hat.flatten()
    pre.update_state(y_true, y_hat)
    re.update_state(y_true, y_hat)
    acc.update_state(y_true, y_hat)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 46ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47

In [14]:
!pip install gradio jinja2

Collecting gradio
  Downloading gradio-4.37.1-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.3.2.tar.gz (5.5 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting gradio-client==1.0.2 (from gradio)
  Downloading gradio_client-1.0.2-py3-none-any.whl.metadata (7.1 kB)
Collecting huggingface-hub>=0.19.3 (from gradio)
  Downloading huggingface_hub-0.23.4-py3-none-any.whl.metadata (12 kB)
Collecting importlib-resources<7.0,>=1.3 (from gradio)
  Downloading importlib_resources-6.4.0-py3-none-any.whl.metadata (3.9 kB)
Collecting pydantic>=2.0 (from gradio)
  Using cached pydantic-2.7.4-py3-none-any.whl.metadata (109 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting ruff>=0.2.2 (from gradio)
  Downloading ruff-0.4.10-py3-n

In [1]:
import tensorflow as tf
import gradio as gr

In [2]:
model.save('toxicity.keras')

NameError: name 'model' is not defined

In [3]:

model = tf.keras.models.load_model('toxicity.keras')
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

  saveable.load_own_variables(weights_store.get(inner_path))


In [4]:
input_str = text_vectorizer('hey i freaken hate you!')

NameError: name 'text_vectorizer' is not defined

In [42]:
res = model.predict(np.expand_dims(input_str,0))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 482ms/step


In [43]:
res

array([[0.89695907, 0.03325239, 0.5688753 , 0.0190154 , 0.4113771 ,
        0.06846711]], dtype=float32)

In [6]:
def score_comment(comment):
    vectorized_comment = text_vectorizer([comment])
    results = model.predict(vectorized_comment)
    text = ''
    for idx, col in enumerate(df.columns[2:]):
        text += '{}: {}\n'.format(col, results[0][idx]>0.5)
    return text

In [53]:
ans = score_comment(" You suck")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step


True

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 72ms/step


In [7]:
interface = gr.Interface(
    fn=score_comment,
    inputs=gr.Textbox(lines=2, placeholder='Comment please'),
    outputs='text'
)

interface.launch()

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.




Traceback (most recent call last):
  File "C:\Users\rushi\anaconda3\Lib\site-packages\gradio\queueing.py", line 541, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\rushi\anaconda3\Lib\site-packages\gradio\route_utils.py", line 276, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\rushi\anaconda3\Lib\site-packages\gradio\blocks.py", line 1928, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\rushi\anaconda3\Lib\site-packages\gradio\blocks.py", line 1514, in call_function
    prediction = await anyio.to_thread.run_sync(
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\rushi\anaconda3\Lib\site-packages\anyio\to_thread.py", line 56, in run_sync
    return await get_async_backend().run_sync_in_worker_thread(
           ^^^^^^^^^^^^^^^^^^^^^^

In [None]:
def calculator()

In [58]:
import gradio as gr

def calculator(num1, num2, operation):
    if operation == 'Addition':
        result = num1 + num2
    elif operation == 'Subtraction':
        result = num1 - num2
    elif operation == 'Multiplication':
        result = num1 * num2
    elif operation == 'Division':
        if num2 != 0:
            result = num1 / num2
        else:
            result = "Error: Division by zero is not allowed."
    else:
        result = "Invalid operation."
    return result

# Define the Gradio interface using the updated API
interface = gr.Interface(
    fn=calculator,
    inputs=[
        gr.Number(label="First Number"),
        gr.Number(label="Second Number"),
        gr.Radio(['Addition', 'Subtraction', 'Multiplication', 'Division'], label="Operation")
    ],
    outputs=gr.Textbox(),
    title="Simple Calculator",
    description="Perform basic arithmetic operations."
)

# Launch the Gradio app
interface.launch()


Running on local URL:  http://127.0.0.1:7863

To create a public link, set `share=True` in `launch()`.


