In [1]:
!pip install transformers
!pip install sentence_transformers
!pip install langdetect
!pip install gradio

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence_transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence_transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence_transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence_transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence_transformers)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=1.11.0->sentence_transformers)
 

In [None]:
!pip install langdetect
!pip install gradio

Collecting langdetect
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: langdetect
  Building wheel for langdetect (setup.py) ... [?25l[?25hdone
  Created wheel for langdetect: filename=langdetect-1.0.9-py3-none-any.whl size=993222 sha256=804ac52b8d9434ec34c14d438a772569676e31177429bce2eb6fdbdc65f429ff
  Stored in directory: /root/.cache/pip/wheels/0a/f2/b2/e5ca405801e05eb7c8ed5b3b4bcf1fcabcd6272c167640072e
Successfully built langdetect
Installing collected packages: langdetect
Successfully installed langdetect-1.0.9
Collecting gradio
  Downloading gradio-5.16.1-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fast

In [None]:
#Final model
import torch
import pandas as pd
import gradio as gr
import nltk
from nltk.corpus import stopwords
from transformers import AutoModelForQuestionAnswering, AutoTokenizer
from sentence_transformers import SentenceTransformer, util
from langdetect import detect
import re

nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

csv_path = "/content/Woman_Healthcare_new_multi - training_data.csv"
df_healthcare = pd.read_csv(csv_path)

def contains_hindi(text):
    return bool(re.search(r'[\u0900-\u097F]', str(text)))

def detect_language(text):
    text = str(text)
    if contains_hindi(text):
        return 'hi'
    try:
        detected = detect(text)
        return 'hi' if detected == 'hi' else 'en'
    except:
        return 'en' if bool(re.search(r'[a-zA-Z]', text)) else 'hi'

def preprocess_text(text):
    if contains_hindi(text):
        return text
    words = text.lower().split()
    words = [word for word in words if word not in stop_words]
    return " ".join(words)

english_qa = []
hindi_qa = []

for _, row in df_healthcare.iterrows():
    qa_pair = {"question": row["Question"], "context": row["Answer"]}
    answer_lang = detect_language(row["Answer"])
    if answer_lang == 'hi':
        if contains_hindi(row["Answer"]):
            hindi_qa.append(qa_pair)
    else:
        english_qa.append(qa_pair)

embedding_model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')

hindi_corpus = [qa["context"] for qa in hindi_qa]
english_corpus = [qa["context"] for qa in english_qa]

hindi_embeddings = embedding_model.encode(hindi_corpus, convert_to_tensor=True)
english_embeddings = embedding_model.encode(english_corpus, convert_to_tensor=True)

def find_best_context(user_question):
    user_language = detect_language(user_question)
    processed_question = preprocess_text(user_question)
    question_embedding = embedding_model.encode(processed_question, convert_to_tensor=True)

    if user_language == 'hi':
        corpus_embeddings = hindi_embeddings
        qa_dataset = hindi_qa
        if len(qa_dataset) == 0:
            return "क्षमा करें, हिंदी में कोई उत्तर उपलब्ध नहीं है।"
    else:
        corpus_embeddings = english_embeddings
        qa_dataset = english_qa
        if len(qa_dataset) == 0:
            return "Sorry, no answers available in English."

    similarity_scores = util.pytorch_cos_sim(question_embedding, corpus_embeddings)[0]
    top_k = min(3, len(similarity_scores))
    best_matches = torch.topk(similarity_scores, k=top_k)

    for idx in range(top_k):
        match_idx = best_matches.indices[idx].item()
        match_score = best_matches.values[idx].item()
        if match_score < 0.2:
            continue
        context = qa_dataset[match_idx]["context"]
        context_lang = detect_language(context)
        if context_lang == user_language:
            return context

    return "क्षमा करें, मैं आपके प्रश्न का कोई अच्छा उत्तर नहीं ढूंढ पाया। कृपया अपना प्रश्न दोबारा पूछें।" if user_language == 'hi' else "I'm sorry, I couldn't find a good match for your question. Please try rephrasing."

def get_answer(user_question):
    best_context = find_best_context(user_question)
    if isinstance(best_context, str) and ("sorry" in best_context.lower() or "क्षमा" in best_context):
        return best_context
    user_language = detect_language(user_question)
    return f"उत्तर: {best_context}" if user_language == 'hi' else f"Answer: {best_context}"

iface = gr.Interface(
    fn=get_answer,
    inputs=gr.Textbox(label="Ask a question (पूछें) / Ask in English"),
    outputs=gr.Textbox(label="Answer (उत्तर) / Response"),
    title="Multilingual Women's Healthcare Chatbot",
    description="Ask any question related to women's healthcare in English or Hindi. हिंदी या अंग्रेजी में महिलाओं के स्वास्थ्य से संबंधित कोई भी प्रश्न पूछें।"
)

iface.launch()

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/4.12k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/645 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/471M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/480 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.08M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling%2Fconfig.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://c6d396e3917e220025.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [3]:
!pip install gtts

Collecting gtts
  Downloading gTTS-2.5.4-py3-none-any.whl.metadata (4.1 kB)
Downloading gTTS-2.5.4-py3-none-any.whl (29 kB)
Installing collected packages: gtts
Successfully installed gtts-2.5.4


In [4]:
!pip install speechrecognition

Collecting speechrecognition
  Downloading SpeechRecognition-3.14.1-py3-none-any.whl.metadata (31 kB)
Downloading SpeechRecognition-3.14.1-py3-none-any.whl (32.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.9/32.9 MB[0m [31m38.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: speechrecognition
Successfully installed speechrecognition-3.14.1


In [5]:
#FINAL WITH PERIOD TRACKER
import torch
import pandas as pd
import numpy as np
import gradio as gr
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from textblob import TextBlob
from nltk.corpus import stopwords
from transformers import AutoModelForQuestionAnswering, AutoTokenizer
from sentence_transformers import SentenceTransformer, util
from langdetect import detect
import re
import os
from gtts import gTTS
import speech_recognition as sr
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import pickle
from sklearn.preprocessing import MinMaxScaler
import statsmodels.api as sm
from statsmodels.tsa.statespace.sarimax import SARIMAX
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense, Dropout
import json
import calendar

# Download necessary NLTK data
nltk.download('stopwords')
nltk.download('vader_lexicon')

stop_words = set(stopwords.words('english'))
sia = SentimentIntensityAnalyzer()

# Load healthcare dataset
csv_path = "/content/Woman_Healthcare_new_multi - training_data.csv"
df_healthcare = pd.read_csv(csv_path)

# Function to detect Hindi
def contains_hindi(text):
    return bool(re.search(r'[\u0900-\u097F]', str(text)))

# Detect language
def detect_language(text):
    text = str(text)
    if contains_hindi(text):
        return 'hi'
    try:
        detected = detect(text)
        return 'hi' if detected == 'hi' else 'en'
    except:
        return 'en' if bool(re.search(r'[a-zA-Z]', text)) else 'hi'

# Sentiment Analysis
def analyze_sentiment(text, language):
    if language == 'en':
        score = sia.polarity_scores(text)['compound']
    else:
        translated = TextBlob(text).translate(to="en")
        score = sia.polarity_scores(str(translated))['compound']

    if score > 0.2:
        return "Positive 😊"
    elif score < -0.2:
        return "Negative 😟"
    else:
        return "Neutral 🙂"

# Text Preprocessing
def preprocess_text(text):
    if contains_hindi(text):
        return text
    words = text.lower().split()
    words = [word for word in words if word not in stop_words]
    return " ".join(words)

# Create QA pairs
english_qa = []
hindi_qa = []

for _, row in df_healthcare.iterrows():
    qa_pair = {"question": row["Question"], "context": row["Answer"]}
    answer_lang = detect_language(row["Answer"])
    if answer_lang == 'hi':
        hindi_qa.append(qa_pair)
    else:
        english_qa.append(qa_pair)

# Load Embedding Model
embedding_model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')

# Create embeddings
hindi_corpus = [qa["context"] for qa in hindi_qa]
english_corpus = [qa["context"] for qa in english_qa]

hindi_embeddings = embedding_model.encode(hindi_corpus, convert_to_tensor=True)
english_embeddings = embedding_model.encode(english_corpus, convert_to_tensor=True)

# Find best context
def find_best_context(user_question):
    user_language = detect_language(user_question)
    processed_question = preprocess_text(user_question)
    question_embedding = embedding_model.encode(processed_question, convert_to_tensor=True)

    if user_language == 'hi':
        corpus_embeddings = hindi_embeddings
        qa_dataset = hindi_qa
        if not qa_dataset:
            return "क्षमा करें, हिंदी में कोई उत्तर उपलब्ध नहीं है।"
    else:
        corpus_embeddings = english_embeddings
        qa_dataset = english_qa
        if not qa_dataset:
            return "Sorry, no answers available in English."

    similarity_scores = util.pytorch_cos_sim(question_embedding, corpus_embeddings)[0]
    best_match_idx = torch.argmax(similarity_scores).item()
    best_match_score = similarity_scores[best_match_idx].item()

    if best_match_score < 0.2:
        return "क्षमा करें, मुझे आपके प्रश्न के लिए कोई अच्छा उत्तर नहीं मिला।" if user_language == 'hi' else "I'm sorry, I couldn't find a good answer."

    return qa_dataset[best_match_idx]["context"]

# Get chatbot answer with sentiment
def get_answer(user_question):
    user_language = detect_language(user_question)
    sentiment = analyze_sentiment(user_question, user_language)

    # Check if this is a period tracking question
    period_tracking_keywords = [
        "period", "cycle", "menstruation", "menstrual", "predict", "track",
        "मासिक धर्म", "पीरियड", "माहवारी", "चक्र"
    ]

    is_period_question = any(keyword in user_question.lower() for keyword in period_tracking_keywords)

    if is_period_question:
        try:
            # Extract user ID if present
            user_id_match = re.search(r'user[_\s]?id[:\s]+(\w+)', user_question.lower())
            user_id = user_id_match.group(1) if user_id_match else "default_user"

            # Try to detect if user is asking for predictions
            if any(word in user_question.lower() for word in ["predict", "when", "next", "forecast", "कब", "अगला", "भविष्यवाणी"]):
                prediction = get_period_prediction(user_id)
                return user_question, prediction, sentiment

            # Check if adding new period data
            date_pattern = r'(\d{1,2})[/\-\.](\d{1,2})[/\-\.]?(\d{4})?'
            date_match = re.search(date_pattern, user_question)

            if date_match:
                day, month, year = date_match.groups()
                year = year or datetime.now().year
                try:
                    date_str = f"{int(day)}/{int(month)}/{int(year)}"
                    add_period_data(user_id, date_str)
                    return user_question, f"Period data added for {date_str}. Your cycles are being analyzed.", sentiment
                except:
                    pass

            # Default period tracking response
            return user_question, "I can help track your periods. You can:\n1. Add a period date: 'Add period on DD/MM/YYYY'\n2. Get prediction: 'Predict my next period'", sentiment

        except Exception as e:
            print(f"Error in period handling: {e}")
            return user_question, "I had trouble processing your period tracking request. Please try again.", sentiment

    # Regular healthcare question handling
    best_context = find_best_context(user_question)

    if "sorry" in best_context.lower() or "क्षमा" in best_context:
        return user_question, best_context, sentiment

    # Add supportive messages for negative sentiment
    if "Negative" in sentiment:
        best_context += " 💙 It seems like you're feeling low. Remember, you're not alone. Stay strong and reach out for support if needed. 💙"

    return user_question, best_context, sentiment

# Process voice input
def process_voice_input(audio_file):
    recognizer = sr.Recognizer()
    try:
        with sr.AudioFile(audio_file) as source:
            audio_data = recognizer.record(source)
            user_question = recognizer.recognize_google(audio_data)
        print(f"Recognized Text: {user_question}")

        user_question, response_text, sentiment = get_answer(user_question)

        # TTS response
        language = "hi" if contains_hindi(response_text) else "en"
        tts = gTTS(text=response_text, lang=language)
        audio_filename = "response.mp3"
        tts.save(audio_filename)

        return user_question, response_text, sentiment, audio_filename

    except Exception as e:
        print("Error:", e)
        return "Could not recognize speech.", "Error processing your request.", "Unknown", None

# ====== PERIOD TRACKING FUNCTIONALITY ======

# Initialize or load period data storage
def get_user_data_path(user_id):
    user_data_dir = os.path.join("period_data")
    os.makedirs(user_data_dir, exist_ok=True)
    return os.path.join(user_data_dir, f"{user_id}.json")

def load_user_data(user_id):
    data_path = get_user_data_path(user_id)
    if os.path.exists(data_path):
        with open(data_path, 'r') as f:
            return json.load(f)
    return {"user_id": user_id, "period_dates": []}

def save_user_data(user_id, data):
    data_path = get_user_data_path(user_id)
    with open(data_path, 'w') as f:
        json.dump(data, f)

def add_period_data(user_id, date_string):
    """Add a period date to user's data"""
    try:
        user_data = load_user_data(user_id)

        # Parse date and standardize format
        day, month, year = map(int, date_string.split('/'))
        date_obj = datetime(year, month, day)
        standard_date = date_obj.strftime("%Y-%m-%d")

        # Don't add duplicates
        if standard_date not in user_data["period_dates"]:
            user_data["period_dates"].append(standard_date)
            user_data["period_dates"].sort()
            save_user_data(user_id, user_data)

            # Retrain models if we have enough data
            if len(user_data["period_dates"]) >= 3:
                train_period_models(user_id)

        return True
    except Exception as e:
        print(f"Error adding period data: {e}")
        return False

# Calculate cycle lengths
def calculate_cycle_lengths(dates):
    """Calculate cycle lengths from a list of period start dates"""
    if len(dates) < 2:
        return []

    # Convert string dates to datetime objects
    date_objects = [datetime.strptime(date, "%Y-%m-%d") for date in dates]
    date_objects.sort()

    # Calculate differences in days
    cycle_lengths = []
    for i in range(1, len(date_objects)):
        days_diff = (date_objects[i] - date_objects[i-1]).days
        if 10 <= days_diff <= 45:  # Filter out unlikely cycle lengths
            cycle_lengths.append(days_diff)

    return cycle_lengths

# LSTM Model for period prediction
def create_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(64, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(32))
    model.add(Dropout(0.2))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    return model

def prepare_lstm_data(cycle_lengths, time_steps=3):
    """Prepare data for LSTM model"""
    # Not enough data
    if len(cycle_lengths) < time_steps + 1:
        return None, None, None

    # Scale data
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(np.array(cycle_lengths).reshape(-1, 1))

    # Create sequences
    X, y = [], []
    for i in range(len(scaled_data) - time_steps):
        X.append(scaled_data[i:i+time_steps])
        y.append(scaled_data[i+time_steps])

    return np.array(X), np.array(y), scaler

def train_lstm_model(user_id, cycle_lengths):
    """Train LSTM model for cycle prediction"""
    if len(cycle_lengths) < 5:  # Need at least 5 cycles for meaningful training
        return None

    X, y, scaler = prepare_lstm_data(cycle_lengths)
    if X is None:
        return None

    # Create and train model
    model = create_lstm_model((X.shape[1], 1))
    model.fit(X, y, epochs=100, batch_size=1, verbose=0)

    # Save model and scaler
    models_dir = os.path.join("period_models", user_id)
    os.makedirs(models_dir, exist_ok=True)

    model.save(os.path.join(models_dir, "lstm_model.h5"))
    with open(os.path.join(models_dir, "lstm_scaler.pkl"), 'wb') as f:
        pickle.dump(scaler, f)

    return model, scaler

# SARIMA model for period prediction
def train_sarima_model(user_id, cycle_lengths):
    """Train SARIMA model for cycle prediction"""
    if len(cycle_lengths) < 5:  # Need minimum data
        return None

    try:
        # Find optimal parameters (simplified)
        model = SARIMAX(cycle_lengths, order=(1, 0, 1),
                       seasonal_order=(0, 0, 0, 0))
        results = model.fit(disp=False)

        # Save model
        models_dir = os.path.join("period_models", user_id)
        os.makedirs(models_dir, exist_ok=True)

        with open(os.path.join(models_dir, "sarima_model.pkl"), 'wb') as f:
            pickle.dump(results, f)

        return results
    except:
        return None

def train_period_models(user_id):
    """Train both LSTM and SARIMA models for period prediction"""
    user_data = load_user_data(user_id)
    dates = user_data["period_dates"]

    if len(dates) < 3:
        return False

    cycle_lengths = calculate_cycle_lengths(dates)

    # Only proceed if we have enough cycle data
    if len(cycle_lengths) >= 3:
        lstm_result = train_lstm_model(user_id, cycle_lengths)
        sarima_result = train_sarima_model(user_id, cycle_lengths)
        return lstm_result is not None or sarima_result is not None

    return False

def predict_with_lstm(user_id, recent_cycles):
    """Predict next cycle length using LSTM model"""
    models_dir = os.path.join("period_models", user_id)
    lstm_model_path = os.path.join(models_dir, "lstm_model.h5")
    scaler_path = os.path.join(models_dir, "lstm_scaler.pkl")

    if not (os.path.exists(lstm_model_path) and os.path.exists(scaler_path)):
        return None

    try:
        # Load model and scaler
        model = load_model(lstm_model_path)
        with open(scaler_path, 'rb') as f:
            scaler = pickle.load(f)

        # Need at least 3 recent cycles
        if len(recent_cycles) < 3:
            return None

        # Scale input data
        input_data = scaler.transform(np.array(recent_cycles[-3:]).reshape(-1, 1))
        input_data = np.reshape(input_data, (1, 3, 1))

        # Make prediction
        prediction = model.predict(input_data)
        prediction = scaler.inverse_transform(prediction)[0, 0]

        return max(min(round(prediction), 45), 21)  # Constrain to reasonable range
    except Exception as e:
        print(f"LSTM prediction error: {e}")
        return None

def predict_with_sarima(user_id, recent_cycles):
    """Predict next cycle length using SARIMA model"""
    models_dir = os.path.join("period_models", user_id)
    sarima_model_path = os.path.join(models_dir, "sarima_model.pkl")

    if not os.path.exists(sarima_model_path):
        return None

    try:
        # Load model
        with open(sarima_model_path, 'rb') as f:
            model = pickle.load(f)

        # Make prediction
        forecast = model.forecast(steps=1)
        prediction = forecast[0]

        return max(min(round(prediction), 45), 21)  # Constrain to reasonable range
    except Exception as e:
        print(f"SARIMA prediction error: {e}")
        return None

def calculate_average_cycle(cycle_lengths):
    """Calculate average cycle length"""
    if not cycle_lengths:
        return 28  # Default average

    # Filter out extreme values
    valid_cycles = [c for c in cycle_lengths if 21 <= c <= 45]
    if not valid_cycles:
        return 28

    return round(sum(valid_cycles) / len(valid_cycles))

def get_period_prediction(user_id):
    """Generate period predictions using multiple models"""
    user_data = load_user_data(user_id)
    dates = user_data["period_dates"]

    if not dates:
        return "No period data found. Please add your period dates first."

    if len(dates) == 1:
        return f"I see you've recorded one period on {dates[0]}. Please add more period dates for predictions."

    # Sort dates and get the most recent
    dates.sort()
    last_period = datetime.strptime(dates[-1], "%Y-%m-%d")

    # Calculate cycle lengths
    cycle_lengths = calculate_cycle_lengths(dates)

    # Get average cycle length
    avg_cycle = calculate_average_cycle(cycle_lengths)

    # Try model predictions if we have enough data
    prediction_methods = []
    predicted_days = []

    if len(cycle_lengths) >= 3:
        # Try LSTM prediction
        lstm_prediction = predict_with_lstm(user_id, cycle_lengths)
        if lstm_prediction:
            prediction_methods.append("LSTM")
            predicted_days.append(lstm_prediction)

        # Try SARIMA prediction
        sarima_prediction = predict_with_sarima(user_id, cycle_lengths)
        if sarima_prediction:
            prediction_methods.append("SARIMA")
            predicted_days.append(sarima_prediction)

    # Always include average as fallback
    prediction_methods.append("average")
    predicted_days.append(avg_cycle)

    # Calculate weighted prediction (favor ML models if available)
    if len(predicted_days) > 1:
        weighted_prediction = sum(predicted_days) / len(predicted_days)
        predicted_cycle_length = round(weighted_prediction)
    else:
        predicted_cycle_length = predicted_days[0]

    # Calculate predicted date
    predicted_date = last_period + timedelta(days=predicted_cycle_length)
    formatted_date = predicted_date.strftime("%B %d, %Y")

    # Calculate prediction window (±2 days)
    early_date = (predicted_date - timedelta(days=2)).strftime("%B %d")
    late_date = (predicted_date + timedelta(days=2)).strftime("%B %d")

    # Generate response
    response = f"Based on your {len(cycle_lengths)} recorded cycles "
    response += f"using {', '.join(prediction_methods)} prediction, "

    if len(cycle_lengths) >= 3:
        response += f"your next period is likely to start on {formatted_date}.\n\n"
        response += f"Prediction window: {early_date} to {late_date}\n"
        response += f"Average cycle length: {avg_cycle} days\n"
        response += f"Last period started: {last_period.strftime('%B %d, %Y')}"
    else:
        response += f"your next period is estimated around {formatted_date}.\n\n"
        response += "Please add more period dates for more accurate predictions."

    return response

def generate_cycle_visualization(user_id):
    """Generate a visualization of past cycles and prediction"""
    user_data = load_user_data(user_id)
    dates = user_data["period_dates"]

    if len(dates) < 2:
        return None

    # Calculate cycle lengths
    cycle_lengths = calculate_cycle_lengths(dates)

    # Create visualization
    plt.figure(figsize=(10, 6))
    plt.plot(range(1, len(cycle_lengths) + 1), cycle_lengths, 'o-', color='purple')
    plt.axhline(y=calculate_average_cycle(cycle_lengths), color='r', linestyle='--', label='Average')
    plt.title('Your Menstrual Cycle History')
    plt.xlabel('Cycle Number')
    plt.ylabel('Days')
    plt.grid(True, alpha=0.3)
    plt.legend()

    # Save visualization
    vis_path = os.path.join("period_data", f"{user_id}_visualization.png")
    plt.savefig(vis_path)
    plt.close()

    return vis_path

# Calendar visualization tab
def create_period_calendar(user_id):
    user_data = load_user_data(user_id)
    dates = user_data["period_dates"]

    if not dates:
        return "No period data found."

    # Calculate next predicted period
    cycle_lengths = calculate_cycle_lengths(dates)
    avg_cycle = calculate_average_cycle(cycle_lengths)
    last_period = datetime.strptime(dates[-1], "%Y-%m-%d")
    next_period = last_period + timedelta(days=avg_cycle)

    # Create calendar for current month
    current_month = datetime.now().month
    current_year = datetime.now().year

    cal = calendar.monthcalendar(current_year, current_month)
    month_name = calendar.month_name[current_month]

    # Convert dates to day numbers for highlighting
    period_days = []
    predicted_days = []

    # Process all dates for proper display
    for date_str in dates:
        date_obj = datetime.strptime(date_str, "%Y-%m-%d")
        if date_obj.month == current_month and date_obj.year == current_year:
            period_days.append(date_obj.day)

    # Add predicted period days (typically spans 5 days)
    if next_period.month == current_month and next_period.year == current_year:
        for i in range(5):  # Assuming 5-day period
            pred_day = (next_period + timedelta(days=i)).day
            predicted_days.append(pred_day)

    # Debug info
    print(f"Period days for display: {period_days}")
    print(f"Predicted days for display: {predicted_days}")

    # Build calendar HTML with explicit styling
    html = f"<h2>{month_name} {current_year}</h2>"
    html += "<table style='width:100%; border-collapse:collapse; border:1px solid #ddd;'>"
    html += "<tr style='background-color:#f5f5f5;'><th style='padding:8px; border:1px solid #ddd;'>Mon</th><th style='padding:8px; border:1px solid #ddd;'>Tue</th><th style='padding:8px; border:1px solid #ddd;'>Wed</th><th style='padding:8px; border:1px solid #ddd;'>Thu</th><th style='padding:8px; border:1px solid #ddd;'>Fri</th><th style='padding:8px; border:1px solid #ddd;'>Sat</th><th style='padding:8px; border:1px solid #ddd;'>Sun</th></tr>"

    for week in cal:
        html += "<tr>"
        for day in week:
            if day == 0:
                html += "<td style='padding:8px; border:1px solid #ddd;'></td>"
            elif day in period_days:
                html += f"<td style='padding:8px; border:1px solid #ddd; background-color:#dc143c; color:white; text-align:center; font-weight:bold;'>{day}</td>"
            elif day in predicted_days:
                html += f"<td style='padding:8px; border:1px solid #ddd; background-color:#ffb6c1; text-align:center;'>{day}</td>"
            else:
                html += f"<td style='padding:8px; border:1px solid #ddd; text-align:center;'>{day}</td>"
        html += "</tr>"

    html += "</table>"
    html += "<div style='margin-top:15px;'>"
    html += "<span style='display:inline-block; background-color:#dc143c; color:white; padding:4px 8px; margin-right:10px; font-weight:bold;'>Recorded period</span> "
    html += "<span style='display:inline-block; background-color:#ffb6c1; padding:4px 8px;'>Predicted period</span>"
    html += "</div>"

    return html

# ====== GRADIO UI ======

with gr.Blocks() as iface:
    gr.Markdown("# 🎙️ AI Women's Healthcare + Period Tracker")
    gr.Markdown("Ask about **women's healthcare** in **English or Hindi** or use the period tracker. The chatbot will analyze your **sentiment** and provide a response.")

    with gr.Tabs():
        with gr.Tab("ChatBot"):
            with gr.Row():
                with gr.Column():
                    mic_input = gr.Audio(type="filepath", label="🎤 Speak Your Question")
                    text_input = gr.Textbox(label="Ask a question (पूछें)")
                    sentiment_output = gr.Textbox(label="Your Sentiment 😊 / 😟 / 🙂")

                with gr.Column():
                    response_output = gr.Textbox(label="Chatbot Answer", lines=10)
                    speak_button = gr.Audio(interactive=False, label="🔊 Speak Answer", type="filepath")

            with gr.Row():
                mic_button = gr.Button("🎙️ Speak & Get Answer")
                text_button = gr.Button("💬 Ask via Text")

            gr.Markdown("""
            ### Period Tracking Commands
            - Add period: "Add period on DD/MM/YYYY"
            - Predict next period: "Predict my next period"
            - View cycle history: Use the Period Tracker tab
            """)

        with gr.Tab("Period Tracker"):
            with gr.Row():
                with gr.Column():
                    user_id_input = gr.Textbox(label="Your User ID", placeholder="Enter your user ID")
                    date_input = gr.Textbox(label="Add Period Date (DD/MM/YYYY)", placeholder="E.g., 15/02/2024")
                    add_period_button = gr.Button("Add Period Date")
                    predict_button = gr.Button("Predict Next Period")

                with gr.Column():
                    tracker_output = gr.Textbox(label="Tracker Results", lines=10)
                    calendar_html = gr.HTML(label="Period Calendar")

            # Connect period tracker buttons
            def add_period_date(user_id, date_string):
                if not user_id or not date_string:
                    return "Please enter both user ID and date"

                success = add_period_data(user_id, date_string)
                if success:
                    return f"Period date added for {date_string}. Your cycles are being analyzed."
                else:
                    return "Error adding period date. Please use format DD/MM/YYYY"

            add_period_button.click(
                add_period_date,
                inputs=[user_id_input, date_input],
                outputs=[tracker_output]
            )

            predict_button.click(
                lambda user_id: get_period_prediction(user_id) if user_id else "Please enter your user ID",
                inputs=[user_id_input],
                outputs=[tracker_output]
            )

            # Update calendar when user ID is entered
            user_id_input.change(
                lambda user_id: create_period_calendar(user_id) if user_id else "",
                inputs=[user_id_input],
                outputs=[calendar_html]
            )

    # Connect main chatbot buttons
    mic_button.click(process_voice_input, inputs=[mic_input], outputs=[text_input, response_output, sentiment_output, speak_button])
    text_button.click(get_answer, inputs=[text_input], outputs=[text_input, response_output, sentiment_output])

iface.launch()

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/4.12k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/645 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/471M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/480 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.08M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling%2Fconfig.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://b306e305103caa3cfa.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


