In [25]:
# Import required libraries
import pandas as pd
import numpy as np

In [26]:
# Load the dataset
df = pd.read_csv('/content/training.csv')
df.sample(5)

Unnamed: 0,text,label
10022,i have to squint with a magnifying glass to re...,1
4076,i definitely have a ton to learn still and i f...,1
7188,i start to feel happy about where i am an unex...,1
306,im feeling really quite angry,3
11524,im feeling really bitchy so just stop reading ...,3


In [27]:
# Data cleaning

# Convert to lower case
df['text']=df['text'].str.lower()
df.sample()

Unnamed: 0,text,label
9869,i get the happy i can die now feeling and i ho...,1


In [28]:
# Remove HTML Tags and URL's using regular expression
import re

# function to remove html tags
def remove_html_tags(text):
    pattern = re.compile('<.*?>')
    return pattern.sub(r'', text)

# Function to remove url's
def remove_url(text):
    pattern = re.compile(r'https?://\S+|www\.\S+')
    return pattern.sub(r'', text)

for text in df['text']:
  text = remove_html_tags(text)

df['text'] = df['text'].apply(remove_url)

df.sample()

Unnamed: 0,text,label
9006,i feel like all this allergen free cooking is ...,1


In [29]:
# Remove punctuations
import string,time
exclude = string.punctuation

# Function to remove punctuations
def remove_punctuation(text):
  for char in exclude:
    text = text.replace(char,'')
  return text

df['text'] = df['text'].apply(remove_punctuation)
df.sample()

Unnamed: 0,text,label
6009,i began to feel a bit regretful,0


In [30]:
# Convert the text to vectors using vectorizer and train the model
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

X = df['text']
y = df['label'].map({
    0: "sadness",
    1: "joy",
    2: "love",
    3: "anger",
    4: "fear",
    5: "surprise"
})

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

model = LogisticRegression()
model.fit(X_train_vec,y_train)
y_pred = model.predict(X_test_vec)


from sklearn.metrics import accuracy_score
print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.84125


In [31]:
import pandas as pd
from datetime import datetime
from textblob import TextBlob
import os
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import joblib

In [32]:
# Taking input from user and analysis the mental health
LOG_FILE = "user_mood_log.csv"
NEGATIVE_EMOTIONS = ["sadness", "anger", "fear"]

EMOTION_TO_CONDITION = {
    "sadness": "Possible sign of depression or low emotional well-being.",
    "anger": "May indicate stress, frustration, or irritability.",
    "fear": "Could be linked to anxiety or worry.",
    "joy": "Positive mood, suggests emotional well-being.",
    "love": "Healthy emotional attachment or connection.",
    "surprise": "Neutral emotion; context-dependent."
}

# Predict emotion using trained model
def predict_emotion(text):
    # Transform the input text using vectorizer
    text_vec = vectorizer.transform([text])
    return model.predict(text_vec)[0]

def get_sentiment(text):
    return TextBlob(text).sentiment.polarity

# Save entry to CSV
def log_entry(text, emotion, sentiment):
    entry = {
        "date": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        "text": text,
        "predicted_emotion": emotion,
        "sentiment_score": sentiment,
        "health_insight": EMOTION_TO_CONDITION.get(emotion, "Unknown")
    }

    if os.path.exists(LOG_FILE):
        df = pd.read_csv(LOG_FILE)
        df = pd.concat([df, pd.DataFrame([entry])], ignore_index=True)
    else:
        df = pd.DataFrame([entry])

    df.to_csv(LOG_FILE, index=False)

# Analyze trends of user's previous data
def analyze_trends():
    if not os.path.exists(LOG_FILE):
        print("No data available yet.")
        return

    df = pd.read_csv(LOG_FILE)
    total = len(df)
    negative_count = df["predicted_emotion"].isin(NEGATIVE_EMOTIONS).sum()
    negative_pct = round((negative_count / total) * 100, 2)

    # Consecutive sadness streak
    sad_streak = 0
    max_sad_streak = 0
    for emotion in df["predicted_emotion"]:
        if emotion == "sadness":
            sad_streak += 1
            max_sad_streak = max(max_sad_streak, sad_streak)
        else:
            sad_streak = 0

    # Negative emotion streak
    df["is_negative"] = df["predicted_emotion"].isin(NEGATIVE_EMOTIONS)
    neg_streak = 0
    max_neg_streak = 0
    for val in df["is_negative"]:
        if val:
            neg_streak += 1
            max_neg_streak = max(max_neg_streak, neg_streak)
        else:
            neg_streak = 0

    # Mood change frequency
    df["prev_emotion"] = df["predicted_emotion"].shift(1)
    df["mood_changed"] = df["predicted_emotion"] != df["prev_emotion"]
    mood_swings = df["mood_changed"].sum()



    print("\n--- Mood Summary ---")
    print(f"Total entries: {total}")
    print(f"Negative mood entries: {negative_count} ({negative_pct}%)")
    print(f"Longest sadness streak: {max_sad_streak}")
    print(f"Longest negative mood streak: {max_neg_streak}")
    print(f"Number of mood changes: {mood_swings}")
    print("------------------------")

# Input loop to take user input
print("Heyy! how are you feeling. Type 'exit' to quit.\n")

while True:
    user_input = input("How are you feeling today? > ").strip()
    if user_input.lower() == "exit":
        print("Goodbye.")
        break

    emotion = predict_emotion(user_input)
    insight = EMOTION_TO_CONDITION.get(emotion, "Unknown")

    print(f"Predicted Emotion: {emotion}")
    print(f"Health Insight: {insight}")

    log_entry(user_input, emotion,get_sentiment(user_input))
    analyze_trends()
    print("\n")

Heyy! how are you feeling. Type 'exit' to quit.

How are you feeling today? > hopeless
Predicted Emotion: sadness
Health Insight: Possible sign of depression or low emotional well-being.

--- Mood Summary ---
Total entries: 7
Negative mood entries: 5 (71.43%)
Longest sadness streak: 2
Longest negative mood streak: 2
Number of mood changes: 5
------------------------


How are you feeling today? > hopeless
Predicted Emotion: sadness
Health Insight: Possible sign of depression or low emotional well-being.

--- Mood Summary ---
Total entries: 1
Negative mood entries: 1 (100.0%)
Longest sadness streak: 1
Longest negative mood streak: 1
Number of mood changes: 1
------------------------


How are you feeling today? > exit
Goodbye.


In [33]:
log = pd.read_csv('/content/user_mood_log.csv')
log.head()

Unnamed: 0,date,text,predicted_emotion,sentiment_score,health_insight
0,2025-07-08 07:58:38,hopeless,sadness,0.0,Possible sign of depression or low emotional w...
