In [None]:
import pandas as pd
from textblob import TextBlob
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

In [None]:
import nltk
nltk.download('wordnet')

In [None]:
# Load dataset
df = pd.read_csv("Mental-Health-Twitter.csv")

In [None]:
# Keep only the post_text column
df = df[['post_text']]

In [None]:
# Convert text to lowercase
df['post_text'] = df['post_text'].str.lower()

In [None]:
# Basic text cleaning: remove digits and punctuation
df['post_text'] = df['post_text'].str.replace(r'\d+', '', regex=True)
df['post_text'] = df['post_text'].str.replace(r'[^\w\s]', '', regex=True)

In [None]:
# Get sentiment polarity using TextBlob
df['polarity'] = df['post_text'].apply(lambda x: TextBlob(x).sentiment.polarity)

In [None]:
# Label sentiment as Positive or Negative
df['Sentiment'] = df['polarity'].apply(lambda x: 'Positive' if x >= 0 else 'Negative')

In [None]:
# Split data
X = df['post_text']
y = df['Sentiment']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Vectorize text
vectorizer = CountVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

In [None]:
# Train KNN model
knn = KNeighborsClassifier()
knn.fit(X_train_vec, y_train)

In [None]:
# Predict and check accuracy
y_pred = knn.predict(X_test_vec)
print("Accuracy:", accuracy_score(y_test, y_pred))

In [None]:
# Optional: Plot sentiment counts
df['Sentiment'].value_counts().plot(kind='bar', title='Sentiment Count')
plt.show()