# import libraries

# Step 1: Load the Dataset

# Step 3: Data Preprocessing

# Step 4: Generate Embeddings

# Step 5: Indexing with FAISS

# Step 6: Query Input


# Step 7: Sentiment Analysis


# Example of taking user input and responding


In [None]:
import pandas as pd
import re
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import pipeline

# Load Journal Data
df = pd.read_csv(r'C:\Users\DELL\OneDrive - UPES\Desktop\IIT Kanpur\data\data.csv')
print(f"‚úÖ Loaded {len(df)} journal entries.")

if df.empty:
    print("‚ö† DataFrame is empty. Exiting program.")
    exit()

df['date'] = pd.date_range(start='2021-02-12', periods=len(df), freq='D')

# Load Models
sentiment_pipeline = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base")
print("‚úÖ Sentiment Analysis model loaded successfully!")

embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
print("‚úÖ Sentence Transformer model loaded successfully!")

from transformers import pipeline

llm = pipeline("text2text-generation", model="google/flan-t5-base")
print("‚úÖ BART model loaded successfully!")

print("‚úÖ LLM loaded successfully for text generation!")

# Clean Text Function
def clean_text(text):
    text = re.sub(r'[^\w\s]', '', str(text))
    return text.lower()

df['cleaned_text'] = df['Answer'].fillna("").apply(clean_text)

# Create Embeddings and Vector Store
embeddings = embedding_model.encode(df['cleaned_text'].tolist(), convert_to_numpy=True)
dimension = embeddings.shape[1]
faiss_index = faiss.IndexFlatL2(dimension)

if len(embeddings) > 0:
    faiss_index.add(np.array(embeddings, dtype=np.float32))
    print("‚úÖ FAISS index populated successfully!")
else:
    print("‚ö† No embeddings generated. Check input data.")
    exit()

# Retrieve Similar Entries
def retrieve_similar_entries(query, top_k=3):
    if faiss_index.ntotal == 0:
        print("‚ö† FAISS index is empty! No retrieval possible.")
        return df.head(0)
    
    query_embedding = embedding_model.encode([query], convert_to_numpy=True)
    query_embedding = np.array(query_embedding, dtype=np.float32)
    distances, indices = faiss_index.search(query_embedding, top_k)
    
    return df.iloc[indices[0]] if len(indices) > 0 else df.head(0)

# Analyze Sentiment
def analyze_sentiment(entry):
    return sentiment_pipeline(entry)[0]

# Generate AI Reflection

def generate_reflection(current_entry, retrieved_entries):
    # Extract emotions and key themes
    current_emotion = analyze_sentiment(current_entry)  # Assuming you have a function for this
    
    past_emotions = [analyze_sentiment(entry) for entry in retrieved_entries]
    
    # Identify common emotional patterns
    if all(emotion == "joy" for emotion in past_emotions):
        trend = "consistent happiness and accomplishment"
        insight = "You thrive on achieving goals, and these moments fuel your confidence."
    elif "stress" in past_emotions and "joy" in past_emotions:
        trend = "a shift from stress to relief"
        insight = "You tend to feel stressed before major tasks but ultimately gain confidence from completing them."
    else:
        trend = "varied emotions"
        insight = "Your emotions fluctuate based on workload, but accomplishment consistently brings you joy."

    # Final AI-generated reflection
    reflection = (
        f"Lately, you've experienced {trend}. "
        f"{insight} Keep recognizing your achievements, as they reinforce your sense of progress and capability."
    )

    return reflection


# Perform Sentiment Analysis with Enhanced RAG
report = "\U0001F4DC *Journal Sentiment Analysis Report* \U0001F4DC\n" + "=" * 50 + "\n"

for index, row in df.iterrows():
    try:
        print(f"\n---------------------------------------\n")
        print(f"Entry: {row['cleaned_text']}")  

        similar_entries_df = retrieve_similar_entries(row['cleaned_text'])
        similar_entries_texts = similar_entries_df['cleaned_text'].tolist()
        print(f"‚úÖ Retrieved {len(similar_entries_texts)} similar entries")  

        current_sentiment = analyze_sentiment(row['cleaned_text'])  
        current_emotion = current_sentiment['label']
        print(f"üìä Emotion (Current Entry): {current_emotion}")  

        ai_reflection = generate_reflection(row['cleaned_text'], similar_entries_texts)
        print(f"üß† AI Reflection: {ai_reflection}")

        report += f"\U0001F4C5 *Date:* {row['date']}\n"
        report += f"‚úç *Entry:* {row['cleaned_text']}\n"
        report += f"\U0001F4CA *Emotion (Current Entry):* {current_emotion}\n"
        report += f"\U0001F50E *Similar Entries Retrieved:* {len(similar_entries_texts)}\n"
        report += f"üß† *AI Reflection:* {ai_reflection}\n"
        report += "-" * 50 + "\n"
    except Exception as e:
        print(f"‚ùå Error processing entry {index}: {e}")

# Save Report to File
with open("sentiment_analysis_report.txt", "w", encoding="utf-8") as file:
    file.write(report)

print(report)
print("‚úÖ Sentiment analysis report with RAG and AI-generated reflections successfully generated!")

In [None]:
import pandas as pd
import re
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer

# Load Journal Data
df = pd.read_csv(r'C:\Users\DELL\OneDrive - UPES\Desktop\IIT Kanpur\data\data.csv')
print(f"‚úÖ Loaded {len(df)} journal entries.")

if df.empty:
    print("‚ö† DataFrame is empty. Exiting program.")
    exit()

df['date'] = pd.date_range(start='2021-02-12', periods=len(df), freq='D')

# Load Models
sentiment_pipeline = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base")
print("‚úÖ Sentiment Analysis model loaded successfully!")

embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
print("‚úÖ Sentence Transformer model loaded successfully!")

llm = pipeline("text2text-generation", model="google/flan-t5-base")
print("‚úÖ FLAN-T5 model loaded successfully!")

# Clean Text Function
def clean_text(text):
    text = re.sub(r'[^\w\s]', '', str(text))
    return text.lower()

df['cleaned_text'] = df['Answer'].fillna("").apply(clean_text)

# Create Embeddings and Vector Store
embeddings = embedding_model.encode(df['cleaned_text'].tolist(), convert_to_numpy=True)
dimension = embeddings.shape[1]
faiss_index = faiss.IndexFlatL2(dimension)

if len(embeddings) > 0:
    faiss_index.add(np.array(embeddings, dtype=np.float32))
    print("‚úÖ FAISS index populated successfully!")
else:
    print("‚ö† No embeddings generated. Check input data.")
    exit()

# Retrieve Similar Entries
def retrieve_similar_entries(query, top_k=3):
    if faiss_index.ntotal == 0:
        print("‚ö† FAISS index is empty! No retrieval possible.")
        return []
    
    query_embedding = embedding_model.encode([query], convert_to_numpy=True)
    distances, indices = faiss_index.search(query_embedding.astype(np.float32), top_k)
    
    return df.iloc[indices[0]]['cleaned_text'].tolist() if len(indices) > 0 else []

# Analyze Sentiment
def analyze_sentiment(entry):
    return sentiment_pipeline(entry)[0]['label']

# Generate AI Reflection
def generate_reflection(current_entry, retrieved_entries):
    current_emotion = analyze_sentiment(current_entry)
    past_emotions = [analyze_sentiment(entry) for entry in retrieved_entries]
    
    emotion_counts = {emotion: past_emotions.count(emotion) for emotion in set(past_emotions)}
    dominant_emotion = max(emotion_counts, key=emotion_counts.get, default="neutral")
    
    summary_prompt = (
        f"User's journal entry: {current_entry}\n"
        f"Past similar entries and emotions: {retrieved_entries} ({emotion_counts})\n"
        f"Generate a meaningful reflection on the user's emotions and trends."
    )
    
    reflection = llm(summary_prompt, max_length=100, truncation=True)[0]['generated_text']
    return reflection

# Perform Sentiment Analysis with Enhanced RAG
report = "\U0001F4DC *Journal Sentiment Analysis Report* \U0001F4DC\n" + "=" * 50 + "\n"

for index, row in df.iterrows():
    try:
        print(f"\n---------------------------------------\n")
        print(f"Entry: {row['cleaned_text']}")  

        similar_entries_texts = retrieve_similar_entries(row['cleaned_text'])
        print(f"‚úÖ Retrieved {len(similar_entries_texts)} similar entries")  

        current_emotion = analyze_sentiment(row['cleaned_text'])  
        print(f"üìä Emotion (Current Entry): {current_emotion}")  

        ai_reflection = generate_reflection(row['cleaned_text'], similar_entries_texts)
        print(f"üß† AI Reflection: {ai_reflection}")

        report += f"\U0001F4C5 *Date:* {row['date']}\n"
        report += f"‚úç *Entry:* {row['cleaned_text']}\n"
        report += f"\U0001F4CA *Emotion (Current Entry):* {current_emotion}\n"
        report += f"\U0001F50E *Similar Entries Retrieved:* {len(similar_entries_texts)}\n"
        report += f"üß† *AI Reflection:* {ai_reflection}\n"
        report += "-" * 50 + "\n"
    except Exception as e:
        print(f"‚ùå Error processing entry {index}: {e}")

# Save Report to File
with open("sentiment_analysis_report.txt", "w", encoding="utf-8") as file:
    file.write(report)

print(report)
print("‚úÖ Sentiment analysis report with RAG and AI-generated reflections successfully generated!")


‚úÖ Loaded 1473 journal entries.


Device set to use cpu


‚úÖ Sentiment Analysis model loaded successfully!
‚úÖ Sentence Transformer model loaded successfully!


Device set to use cpu


‚úÖ FLAN-T5 model loaded successfully!
‚úÖ FAISS index populated successfully!

---------------------------------------

Entry: my family was the most salient part of my day since most days the care of my 2 children occupies the majority of my time they are 2 years old and 7 months and i love them but they also require so much attention that my anxiety is higher than ever i am often overwhelmed by the care the require but at the same i am so excited to see them hit developmental and social milestones
‚úÖ Retrieved 3 similar entries
üìä Emotion (Current Entry): fear
üß† AI Reflection: The user's journal entry: my family was the most salient part of my day since most days the care of my 2 children occupies the majority of my time they are 2 years old and 7 months and i love them but they also require so much attention that my anxiety is higher than ever i am often overwhelmed by the care the require but at the same i am so excited to see them hit developmental and social milestones

--

In [None]:
import pandas as pd
import re
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import pipeline


# Load Journal Data
df = pd.read_csv(r'C:\Users\DELL\OneDrive - UPES\Desktop\IIT Kanpur\data\data.csv')
print(f"‚úÖ Loaded {len(df)} journal entries.")

if df.empty:
    print("‚ö† DataFrame is empty. Exiting program.")
    exit()

df['date'] = pd.date_range(start='2021-02-12', periods=len(df), freq='D')

# Load Models
sentiment_pipeline = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base")
print("‚úÖ Sentiment Analysis model loaded successfully!")

embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
print("‚úÖ Sentence Transformer model loaded successfully!")

llm = pipeline("text2text-generation", model="google/flan-t5-large")
print("‚úÖ Text Generation model (FLAN-T5) loaded successfully!")


# Clean Text Function
def clean_text(text):
    text = re.sub(r'[^\w\s]', '', str(text))
    return text.lower().strip()

df['cleaned_text'] = df['Answer'].fillna("").apply(clean_text)

# Create Embeddings and Vector Store
embeddings = embedding_model.encode(df['cleaned_text'].tolist(), convert_to_numpy=True)
dimension = embeddings.shape[1]
faiss_index = faiss.IndexFlatL2(dimension)

if len(embeddings) > 0:
    faiss_index.add(np.array(embeddings, dtype=np.float32))
    print("‚úÖ FAISS index populated successfully!")
else:
    print("‚ö† No embeddings generated. Check input data.")
    exit()

# Retrieve Similar Entries
def retrieve_similar_entries(query, top_k=3):
    if faiss_index.ntotal == 0:
        print("‚ö† FAISS index is empty! No retrieval possible.")
        return []
    
    query_embedding = embedding_model.encode([query], convert_to_numpy=True)
    distances, indices = faiss_index.search(query_embedding.astype(np.float32), top_k)
    
    return df.iloc[indices[0]]['cleaned_text'].tolist() if len(indices) > 0 else []

# Analyze Sentiment
def analyze_sentiment(entry):
    return sentiment_pipeline(entry)[0]['label']

# Generate AI Reflection
def generate_reflection(current_entry, retrieved_entries):
    current_emotion = analyze_sentiment(current_entry)
    past_emotions = [analyze_sentiment(entry) for entry in retrieved_entries]
    
    emotion_counts = {emotion: past_emotions.count(emotion) for emotion in set(past_emotions)}
    dominant_emotion = max(emotion_counts, key=emotion_counts.get, default="neutral")
    
    # Construct prompt for text generation
    prompt = (
        f"User's journal entry: {current_entry}\n"
        f"Past similar entries show emotions: {emotion_counts}\n"
        f"Based on these, generate a deep and insightful reflection about the user's emotional patterns and potential future trends."
    )
    
    # Generate reflection (without summarization constraints)
    reflection = llm(prompt, max_new_tokens=150, do_sample=True, temperature=0.5)[0]['generated_text']
    
    print(reflection)  # Debugging print
    return reflection  # Moved inside the function


# Perform Sentiment Analysis with Enhanced RAG
report = "\U0001F4DC *Journal Sentiment Analysis Report* \U0001F4DC\n" + "=" * 50 + "\n"

for index, row in df.iterrows():
    try:
        print(f"\n---------------------------------------\n")
        print(f"Entry: {row['cleaned_text']}")  

        similar_entries_texts = retrieve_similar_entries(row['cleaned_text'])
        print(f"‚úÖ Retrieved {len(similar_entries_texts)} similar entries")  

        current_emotion = analyze_sentiment(row['cleaned_text'])  
        print(f"üìä Emotion (Current Entry): {current_emotion}")  

        ai_reflection = generate_reflection(row['cleaned_text'], similar_entries_texts)
        print(f"üß† AI Reflection: {ai_reflection}")

        report += f"\U0001F4C5 *Date:* {row['date']}\n"
        report += f"‚úç *Entry:* {row['cleaned_text']}\n"
        report += f"\U0001F4CA *Emotion (Current Entry):* {current_emotion}\n"
        report += f"\U0001F50E *Similar Entries Retrieved:* {len(similar_entries_texts)}\n"
        report += f"üß† *AI Reflection:* {ai_reflection}\n"
        report += "-" * 50 + "\n"
    except Exception as e:
        print(f"‚ùå Error processing entry {index}: {e}")

# Save Report to File
with open("sentiment_analysis_report.txt", "w", encoding="utf-8") as file:
    file.write(report)

print(report)
print("‚úÖ Sentiment analysis report with RAG and AI-generated reflections successfully generated!")
