In [None]:
#Text Classification using Bag of Words and TF-IDF with TensorFlow.

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

data = {'text': ['I love programming', 'Python is great', 'I enjoy machine learning',
                 'TensorFlow is a powerful tool', 'AI is the future'],
        'label': ['positive', 'positive', 'positive', 'positive', 'neutral']}

df = pd.DataFrame(data)
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)

# Option 1: Bag of Words (BoW)
vectorizer_bow = CountVectorizer()
X_train_bow = vectorizer_bow.fit_transform(X_train).toarray()
X_test_bow = vectorizer_bow.transform(X_test).toarray()

# Option 2: TF-IDF
vectorizer_tfidf = TfidfVectorizer()
X_train_tfidf = vectorizer_tfidf.fit_transform(X_train).toarray()
X_test_tfidf = vectorizer_tfidf.transform(X_test).toarray()

# Build a simple neural network with TensorFlow
def build_model(input_dim):
    model = Sequential()
    model.add(Dense(16, activation='relu', input_dim=input_dim))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))  # Binary classification (positive or neutral)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model


print("Training with Bag of Words (BoW)...")
model_bow = build_model(X_train_bow.shape[1])
model_bow.fit(X_train_bow, y_train, epochs=10, batch_size=2, verbose=1)

loss, accuracy = model_bow.evaluate(X_test_bow, y_test)
print(f'BoW Model Accuracy: {accuracy:.2f}')

print("Training with TF-IDF...")
model_tfidf = build_model(X_train_tfidf.shape[1])
model_tfidf.fit(X_train_tfidf, y_train, epochs=10, batch_size=2, verbose=1)


loss, accuracy = model_tfidf.evaluate(X_test_tfidf, y_test)
print(f'TF-IDF Model Accuracy: {accuracy:.2f}')


Training with Bag of Words (BoW)...


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 16ms/step - accuracy: 0.6667 - loss: 0.6975
Epoch 2/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.8333 - loss: 0.6899
Epoch 3/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.8333 - loss: 0.6867 
Epoch 4/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.6667 - loss: 0.7099 
Epoch 5/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.6667 - loss: 0.7057 
Epoch 6/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.6667 - loss: 0.6741 
Epoch 7/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.6667 - loss: 0.6983 
Epoch 8/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.6667 - loss: 0.6652 
Epoch 9/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [