<a href="https://colab.research.google.com/github/sanjayande/studymate/blob/main/AIOVERTHINGDETECTOR_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd

data = {
    "text": [
        "I keep thinking about my future again and again",
        "I cannot stop worrying about what people think of me",
        "I feel anxious and replay conversations in my head",
        "I am a bit nervous about tomorrow",
        "I feel calm and relaxed today",
        "Everything is going fine",
        "I overanalyze every small mistake",
        "I am stressed and thinking too much",
        "Today is a normal day",
        "I feel peaceful"
    ],

    "label": [
        "High",
        "High",
        "High",
        "Medium",
        "Low",
        "Low",
        "High",
        "Medium",
        "Low",
        "Low"
    ]
}

df = pd.DataFrame(data)
df


Unnamed: 0,text,label
0,I keep thinking about my future again and again,High
1,I cannot stop worrying about what people think...,High
2,I feel anxious and replay conversations in my ...,High
3,I am a bit nervous about tomorrow,Medium
4,I feel calm and relaxed today,Low
5,Everything is going fine,Low
6,I overanalyze every small mistake,High
7,I am stressed and thinking too much,Medium
8,Today is a normal day,Low
9,I feel peaceful,Low


In [2]:
df.to_csv("overthinking_data.csv", index=False)


In [3]:
!ls


overthinking_data.csv  sample_data


In [4]:
import re
import nltk
nltk.download('stopwords')

from nltk.corpus import stopwords


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [5]:
def clean_text(text):
    text = text.lower()                      # lowercase
    text = re.sub(r'[^a-z\s]', '', text)     # remove numbers & symbols
    words = text.split()                     # split words
    words = [w for w in words
             if w not in stopwords.words('english')]  # remove stopwords
    return ' '.join(words)


In [6]:
import pandas as pd

df = pd.read_csv("overthinking_data.csv")
df.head()


Unnamed: 0,text,label
0,I keep thinking about my future again and again,High
1,I cannot stop worrying about what people think...,High
2,I feel anxious and replay conversations in my ...,High
3,I am a bit nervous about tomorrow,Medium
4,I feel calm and relaxed today,Low


In [7]:
df['clean_text'] = df['text'].apply(clean_text)
df.head()


Unnamed: 0,text,label,clean_text
0,I keep thinking about my future again and again,High,keep thinking future
1,I cannot stop worrying about what people think...,High,cannot stop worrying people think
2,I feel anxious and replay conversations in my ...,High,feel anxious replay conversations head
3,I am a bit nervous about tomorrow,Medium,bit nervous tomorrow
4,I feel calm and relaxed today,Low,feel calm relaxed today


In [8]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [9]:
X = df['clean_text']   # input (text)
y = df['label']        # output (Low, Medium, High)


In [10]:
vectorizer = TfidfVectorizer(max_features=3000)


In [11]:
X_tfidf = vectorizer.fit_transform(X)


In [12]:
X_tfidf.shape


(10, 31)

In [13]:
from sklearn.model_selection import train_test_split


In [14]:
X_train, X_test, y_train, y_test = train_test_split(
    X_tfidf,    # input features
    y,          # labels
    test_size=0.2,
    random_state=42
)


In [15]:
print("Training data size:", X_train.shape)
print("Testing data size:", X_test.shape)


Training data size: (8, 31)
Testing data size: (2, 31)


In [17]:
from sklearn.linear_model import LogisticRegression


In [18]:
model = LogisticRegression(max_iter=1000)


In [19]:
model.fit(X_train, y_train)


In [20]:
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score


In [21]:
y_pred = model.predict(X_test)


In [22]:
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

        High       1.00      1.00      1.00         1
         Low       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2



In [23]:
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 1.0


In [24]:
print(confusion_matrix(y_test, y_pred))


[[1 0]
 [0 1]]


In [25]:
def predict_overthinking(text):
    cleaned = clean_text(text)                 # clean text
    vector = vectorizer.transform([cleaned])   # convert to TF-IDF
    prediction = model.predict(vector)[0]      # predict label
    return prediction


In [26]:
test_text = "I keep thinking about my mistakes again and again"
result = predict_overthinking(test_text)
print("Overthinking Level:", result)


Overthinking Level: High


In [27]:
def get_suggestion(level):
    if level == "High":
        return (
            "You may be overthinking a lot. "
            "Try grounding exercises, deep breathing, or taking a short break."
        )
    elif level == "Medium":
        return (
            "You seem slightly overthinking. "
            "Focus on one task at a time and avoid repetitive thoughts."
        )
    else:
        return (
            "You seem calm. "
            "Maintain this balance and continue positive habits."
        )


In [28]:
def analyze_text(text):
    level = predict_overthinking(text)
    suggestion = get_suggestion(level)

    print("User Text:", text)
    print("Detected Overthinking Level:", level)
    print("Suggestion:", suggestion)


In [30]:
analyze_text("I keep thinking about the future again and again")


User Text: I keep thinking about the future again and again
Detected Overthinking Level: High
Suggestion: You may be overthinking a lot. Try grounding exercises, deep breathing, or taking a short break.
