In [10]:
!pip install sentence-transformers

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




In [11]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sentence_transformers import SentenceTransformer
from sklearn.linear_model import LogisticRegression  # Changed import
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [5]:
# 1. Prepare the dataset
data = {
    'text': [
        "I love programming in Python",
        "The weather today is sunny",
        "Artificial Intelligence is fascinating",
        "I enjoy long walks on the beach",
        "Machine learning can solve complex problems",
        "It's raining cats and dogs",
        "Deep learning models require a lot of data",
        "I went to the cinema yesterday",
        "Natural language processing is a subset of AI",
        "She loves painting and sculpting"
    ],
    'label': [1, 0, 1, 0, 1, 0, 1, 0, 1, 0]  # Example labels
}

In [None]:
df = pd.DataFrame(data)

# Split into train and test
X_train, X_test, y_train, y_test = train_test_split(
    df['text'], df['label'], test_size=0.2, random_state=42
)

In [12]:
# 2. Encode the text data
model_name = 'all-MiniLM-L6-v2'
sentence_model = SentenceTransformer(model_name)

In [7]:
print("Encoding training data...")
X_train_embeddings = sentence_model.encode(X_train.tolist(), convert_to_tensor=False)
print("Encoding testing data...")
X_test_embeddings = sentence_model.encode(X_test.tolist(), convert_to_tensor=False)

Encoding training data...
Encoding testing data...


In [8]:
# 3. Initialize and train the classifier
classifier = LogisticRegression(random_state=42, max_iter=1000)  # Changed classifier
print("Training the classifier...")
classifier.fit(X_train_embeddings, y_train)

Training the classifier...


In [9]:
# 4. Make predictions
print("Making predictions on the test set...")
y_pred = classifier.predict(X_test_embeddings)

# 5. Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"\nAccuracy: {accuracy:.2f}")

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Making predictions on the test set...

Accuracy: 1.00

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2

Confusion Matrix:
[[1 0]
 [0 1]]
