In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

# Sample data
data = {
    'text': [
        "Hey handsome, how's your day going?",
        "Don't forget to submit the report by tonight.",
        "You're looking amazing today!",
        "Can we schedule a meeting for tomorrow?",
        "I love the way you think.",
        "Please review the attached document.",
        "Your smile is contagious.",
        "Let's catch up over coffee sometime.",
        "The server is down again, please check.",
        "I can't stop thinking about you."
    ],
    'label': [
        'flirt',
        'not_flirt',
        'flirt',
        'not_flirt',
        'flirt',
        'not_flirt',
        'flirt',
        'flirt',
        'not_flirt',
        'flirt'
    ]
}

df = pd.DataFrame(data)

df = pd.read_csv("flirting_rated.csv")
# Extra cleaning
df = df.dropna()

# Features and labels
X = df['final_messages']
y = df['polarity']

# Split into training and testing (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize TF-IDF Vectorizer
vectorizer = TfidfVectorizer(stop_words='english')

# Fit and transform the training data
X_train_tfidf = vectorizer.fit_transform(X_train)

# Transform the testing data
X_test_tfidf = vectorizer.transform(X_test)

# Initialize the classifier
classifier = LogisticRegression()

# Train the classifier
classifier.fit(X_train_tfidf, y_train)

# Predict on the test set
y_pred = classifier.predict(X_test_tfidf)

# Evaluation metrics
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# New sentences to predict
new_sentences = [
    "Are you free this weekend for a date?",
    "Please send me the latest sales figures.",
    "You have the most beautiful eyes.",
    "The meeting has been rescheduled to 3 PM.",
    "You're looking amazing today!",
      "I want you for fucking"
]

# Transform the new sentences
new_tfidf = vectorizer.transform(new_sentences)

# Predict
predictions = classifier.predict(new_tfidf)

# Display predictions
for sentence, label in zip(new_sentences, predictions):
    print(f"Sentence: \"{sentence}\" --> Prediction: {label}")


Accuracy: 0.8695652173913043

Classification Report:
              precision    recall  f1-score   support

         0.0       0.87      1.00      0.93       479
         1.0       0.96      0.23      0.37        96

    accuracy                           0.87       575
   macro avg       0.91      0.61      0.65       575
weighted avg       0.88      0.87      0.83       575

Sentence: "Are you free this weekend for a date?" --> Prediction: 0.0
Sentence: "Please send me the latest sales figures." --> Prediction: 0.0
Sentence: "You have the most beautiful eyes." --> Prediction: 0.0
Sentence: "The meeting has been rescheduled to 3 PM." --> Prediction: 0.0
Sentence: "You're looking amazing today!" --> Prediction: 0.0
Sentence: "I want you for fucking" --> Prediction: 0.0
