In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [17]:
import pandas as pd

# Sample data (tech-related sentences with sentiment labels)
data = {
    "text": [
        "I love programming",
        "Python is great",
        "I enjoy machine learning",
        "TensorFlow is a powerful tool",
        "AI is the future"
    ],
    "label": [
        "positive", "positive", "positive", "positive", "neutral"
    ]
}

# Create DataFrame
df = pd.DataFrame(data)

# Save to CSV
df.to_csv("tech_sentiment_dataset.csv", index=False)
# Display the DataFrame
print(df.head())

                            text     label
0             I love programming  positive
1                Python is great  positive
2       I enjoy machine learning  positive
3  TensorFlow is a powerful tool  positive
4               AI is the future   neutral


In [18]:
df = pd.read_csv("tech_sentiment_dataset.csv")
df

Unnamed: 0,text,label
0,I love programming,positive
1,Python is great,positive
2,I enjoy machine learning,positive
3,TensorFlow is a powerful tool,positive
4,AI is the future,neutral


### Encode Labels

In [19]:
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['label'])

df.head()

Unnamed: 0,text,label
0,I love programming,1
1,Python is great,1
2,I enjoy machine learning,1
3,TensorFlow is a powerful tool,1
4,AI is the future,0


### Split data into training and testing sets

In [20]:
X_train, X_test, y_train, y_test = train_test_split(
    df['text'], df['label'], test_size=0.2, random_state=42
)
X_train, X_test

(4                 AI is the future
 2         I enjoy machine learning
 0               I love programming
 3    TensorFlow is a powerful tool
 Name: text, dtype: object,
 1    Python is great
 Name: text, dtype: object)

### Vectorize text data using Bag of Words and TF-IDF

In [21]:
# Vectorize text data using Bag of Words
vectorizer_bow = CountVectorizer()
X_train_bow = vectorizer_bow.fit_transform(X_train).toarray()
X_test_bow = vectorizer_bow.transform(X_test).toarray()

In [22]:
# Vectorize text data using TF-IDF
vectorizer_tfidf = TfidfVectorizer()
X_train_tfidf = vectorizer_tfidf.fit_transform(X_train).toarray()
X_test_tfidf = vectorizer_tfidf.transform(X_test).toarray()

### Build a simple neural network model

In [23]:
def build_model(input_dim):
    model = Sequential([
        Dense(16, activation='relu', input_dim=input_dim),
        Dense(8, activation='relu'),
        Dense(1, activation='sigmoid')  # Binary classification
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

### Train and evaluate model

In [24]:
X_train_bow, X_train_bow.shape

(array([[1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0],
        [0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0],
        [0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1]], dtype=int64),
 (4, 12))

In [25]:
# Training and evaluating with Bag of Words
model_bow = build_model(X_train_bow.shape[1])
model_bow.fit(X_train_bow, y_train, epochs=10, batch_size=2, verbose=1)

Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.5000 - loss: 0.6757 
Epoch 2/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.6667 - loss: 0.6343
Epoch 3/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.3333 - loss: 0.6854    
Epoch 4/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.5000 - loss: 0.6425
Epoch 5/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.5000 - loss: 0.6363
Epoch 6/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.3333 - loss: 0.6702    
Epoch 7/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.5000 - loss: 0.6244
Epoch 8/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.3333 - loss: 0.6603    
Epoch 9/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

<keras.src.callbacks.history.History at 0x2bbdbc767e0>

In [26]:
loss, accuracy = model_bow.evaluate(X_test_bow, y_test)
print(f'BoW Model Accuracy: {accuracy:.2f}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 113ms/step - accuracy: 1.0000 - loss: 0.6172
BoW Model Accuracy: 1.00


In [27]:
# Training and evaluating with TF-IDF
model_tfidf = build_model(X_train_tfidf.shape[1])
model_tfidf.fit(X_train_tfidf, y_train, epochs=10, batch_size=2, verbose=1)

Epoch 1/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.1667 - loss: 0.7889     
Epoch 2/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.3333 - loss: 0.7398
Epoch 3/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.3333 - loss: 0.7386
Epoch 4/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.1667 - loss: 0.7655    
Epoch 5/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.1667 - loss: 0.7427    
Epoch 6/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.1667 - loss: 0.7452    
Epoch 7/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.3333 - loss: 0.7019
Epoch 8/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step - accuracy: 0.1667 - loss: 0.7163    
Epoch 9/10
[1m2/2[0m [32m━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x2bbdb5c1b80>

In [28]:
loss, accuracy = model_tfidf.evaluate(X_test_tfidf, y_test)
print(f'TF-IDF Model Accuracy: {accuracy:.2f}')

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 115ms/step - accuracy: 1.0000 - loss: 0.6341
TF-IDF Model Accuracy: 1.00


In [29]:
text = "Python is programming"
text = vectorizer_tfidf.transform([text]).toarray()
results = model_tfidf.predict(text)

a = np.argmax(results)
"Positive" if a == 1 else "Neutral"

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step


'Neutral'

In [30]:
text = "Python is programming"
text = vectorizer_bow.transform([text]).toarray()
results = model_bow.predict(text)

a = np.argmax(results)
"Positive" if a == 1 else "Neutral"

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 49ms/step


'Neutral'