In [32]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# SmartHabit — Personalized Workout & Habit Coach (Capstone Final)
**Track:** Freestyle  
**What this notebook contains:**  
- Multi-tool agent: HabitTracker, WorkoutGenerator, SleepLogger, NutritionTracker  
- Generative AI integration (optional, Gemini) for personalized coaching & summaries  
- Embeddings-based similarity search (optional with API; mocked fallback included)  
- Progress analytics & weekly reports (plots)  
- Reproducibility & logging for MLOps-style traceability

**How to run:**  
1. (Optional) Add your Google API key as a Kaggle Secret named `GOOGLE_API_KEY` to enable Gemini + embeddings.  
2. Turn **Internet ON** in notebook settings.  
3. Run cells top-to-bottom.  
4. Interact using `agent.chat("your message")` or use the helper cells at the bottom.


In [33]:
!pip install google-generativeai




In [34]:
import os, json, datetime, random, uuid
from collections import defaultdict
from pathlib import Path

# plotting
import matplotlib.pyplot as plt
import numpy as np

# Optional: Gemini / GenAI client (will only work if API key provided)
try:
    from google import genai
    GENAI_AVAILABLE = True
except Exception as e:
    GENAI_AVAILABLE = False

# Path for persistent memory in Kaggle
MEMORY_FILE = "/kaggle/working/smarthabit_memory.json"

In [35]:
def load_memory():
    if os.path.exists(MEMORY_FILE):
        with open(MEMORY_FILE, "r") as f:
            return json.load(f)
    # default structure
    return {
        "user": {"name": None, "age": None, "level": "beginner"},
        "habits": {},         # habit -> {streak, last_done}
        "workouts": [],       # list of {date, plan}
        "sleep": [],          # list of {date, hours}
        "nutrition": [],      # list of {date, meal, calories}
        "daily_notes": [],    # list of {date, note, vector}
        "logs": []            # event logs for reproducibility
    }

def save_memory(memory):
    with open(MEMORY_FILE, "w") as f:
        json.dump(memory, f, indent=2)

def log_event(memory, event, data=None):
    memory["logs"].append({
        "id": str(uuid.uuid4()),
        "event": event,
        "data": data,
        "timestamp": str(datetime.datetime.now())
    })
    save_memory(memory)


In [36]:
memory = load_memory()

# Ensure keys exist
for k in ["user","habits","workouts","sleep","nutrition","daily_notes","logs"]:
    memory.setdefault(k, {} if k=="user" else [] if k in ["workouts","sleep","nutrition","daily_notes","logs"] else {})

# If a Kaggle Secret named GOOGLE_API_KEY exists, use it
API_KEY = None
if "GOOGLE_API_KEY" in os.environ:
    API_KEY = os.environ["GOOGLE_API_KEY"]
elif Path("/kaggle/working/GOOGLE_API_KEY.txt").exists():
    # fallback: load from file if user put it in working dir (not recommended for submission)
    API_KEY = Path("/kaggle/working/GOOGLE_API_KEY.txt").read_text().strip()

if API_KEY and GENAI_AVAILABLE:
    client = genai.Client(api_key=API_KEY)
    print("GenAI client available.")
else:
    client = None
    print("GenAI not available/disabled. Notebook will use local fallbacks.")

save_memory(memory)


GenAI not available/disabled. Notebook will use local fallbacks.


In [37]:
# We'll attempt to use genai embeddings if client exists.
# Otherwise we create a mock deterministic vector using hashing (sufficient for demo).

def embed_text_api(text):
    # API-based embeddings (if available)
    if client:
        out = client.embeddings.create(model="text-embedding-3-large", content=text)
        return out["embedding"]
    raise RuntimeError("GenAI client not available")

def embed_text_fallback(text, dim=128):
    # Simple deterministic pseudo-embedding fallback
    h = abs(hash(text))
    rng = np.random.RandomState(h % (2**32))
    vec = rng.rand(dim).tolist()
    return vec

def embed_text(text):
    try:
        return embed_text_api(text)
    except Exception:
        return embed_text_fallback(text)


In [38]:
class ToolBase:
    def __init__(self, memory):
        self.memory = memory

class HabitTool(ToolBase):
    def add(self, habit):
        if habit in self.memory["habits"]:
            return f"Habit '{habit}' already exists."
        self.memory["habits"][habit] = {"streak":0, "last_done": None}
        log_event(self.memory, "habit.add", {"habit": habit})
        save_memory(self.memory)
        return f"Added habit '{habit}'."

    def log(self, habit):
        if habit not in self.memory["habits"]:
            return f"Habit '{habit}' not found."
        today = str(datetime.date.today())
        last = self.memory["habits"][habit]["last_done"]
        if last == today:
            return "Already logged today."
        # update streak
        yesterday = str(datetime.date.today() - datetime.timedelta(days=1))
        if last == yesterday:
            self.memory["habits"][habit]["streak"] += 1
        else:
            self.memory["habits"][habit]["streak"] = 1
        self.memory["habits"][habit]["last_done"] = today
        log_event(self.memory, "habit.log", {"habit": habit})
        save_memory(self.memory)
        return f"Logged habit '{habit}'. Streak: {self.memory['habits'][habit]['streak']}"

    def status(self):
        return self.memory["habits"]

class WorkoutTool(ToolBase):
    def __init__(self, memory):
        super().__init__(memory)
        # basic DB of exercises by level
        self.db = {
            "beginner": ["10 pushups","15 squats","20 jumping jacks","10-min walk","30s plank"],
            "intermediate": ["20 pushups","30 squats","40 jumping jacks","2-km jog","60s plank"],
            "advanced": ["40 pushups","50 squats","60 jumping jacks","5-km run","90s plank"]
        }

    def generate(self, level="beginner", duration_minutes=30, focus=None):
        # choose exercises to fit approximate duration — naive heuristic
        pool = list(self.db.get(level, self.db["beginner"]))
        plan = random.sample(pool, min(3, len(pool)))
        entry = {"date": str(datetime.date.today()), "plan": plan, "level": level, "focus": focus, "duration_min": duration_minutes}
        self.memory["workouts"].append(entry)
        log_event(self.memory, "workout.generate", entry)
        save_memory(self.memory)
        return entry

    def recent(self, n=5):
        return self.memory["workouts"][-n:]

class SleepTool(ToolBase):
    def log_sleep(self, hours):
        entry = {"date": str(datetime.date.today()), "hours": float(hours)}
        self.memory["sleep"].append(entry)
        log_event(self.memory, "sleep.log", entry)
        save_memory(self.memory)
        return "Sleep logged."

    def avg_last_n_days(self, n=7):
        items = self.memory["sleep"][-n:]
        if not items:
            return 0.0
        return float(np.mean([x["hours"] for x in items]))

class NutritionTool(ToolBase):
    def add_meal(self, meal, calories):
        entry = {"date": str(datetime.date.today()), "meal": meal, "calories": float(calories)}
        self.memory["nutrition"].append(entry)
        log_event(self.memory, "nutrition.add", entry)
        save_memory(self.memory)
        return "Meal logged."

    def calories_today(self):
        today = str(datetime.date.today())
        return sum([e["calories"] for e in self.memory["nutrition"] if e["date"]==today])


In [39]:
class SmartHabitAgent:
    def __init__(self, memory, client=None):
        self.memory = memory
        self.client = client
        self.habit = HabitTool(memory)
        self.workout = WorkoutTool(memory)
        self.sleep = SleepTool(memory)
        self.nutrition = NutritionTool(memory)

    # --- Core orchestrator: map user intent to tools ---
    def dispatch(self, command:str):
        text = command.lower().strip()

        # Habit commands
        if text.startswith("add habit "):
            return self.habit.add(command[len("add habit "):].strip())
        if text.startswith("log habit "):
            return self.habit.log(command[len("log habit "):].strip())
        if text.startswith("show habits") or text == "habits":
            return self.habit.status()

        # Workout
        if "generate workout" in text or "workout" in text:
            # parse level if included
            if "intermediate" in text: level="intermediate"
            elif "advanced" in text: level="advanced"
            else: level=self.memory.get("user",{}).get("level","beginner")
            return self.workout.generate(level=level)

        # Sleep
        if text.startswith("i slept ") or text.startswith("log sleep "):
            # extract number
            nums = [float(s) for s in text.split() if s.replace('.','',1).isdigit()]
            if nums:
                return self.sleep.log_sleep(nums[0])
            return "Please include hours, e.g., 'I slept 7.5 hours'."

        # Nutrition
        if text.startswith("add meal "):
            # format: add meal <name> ; <calories>
            rest = command[len("add meal "):]
            if ";" in rest:
                meal, cal = map(str.strip, rest.split(";",1))
                try:
                    cal = float(cal)
                    return self.nutrition.add_meal(meal, cal)
                except:
                    return "Calories must be a number. Example: add meal oatmeal; 350"
            return "Use format: add meal <name> ; <calories>"

        # Reports / analytics
        if "weekly report" in text or "summary" in text or "how am i doing" in text:
            return self.weekly_report()

        # AI-enhanced coaching (if user explicitly asks and client exists)
        if ("advice" in text or "coach" in text or "feedback" in text) and self.client:
            return self.ai_feedback(command)

        # fallback generator (if client available)
        if self.client:
            return self.ai_fallback_text(command)

        # fallback non-AI helper
        return "I didn't understand. Try: 'add habit X', 'log habit X', 'generate workout', 'weekly report', or add 'advice' to request AI feedback."

    # --- non-AI weekly report summarizing metrics ---
    def weekly_report(self):
        today = datetime.date.today()
        week_ago = today - datetime.timedelta(days=7)
        workouts_count = len([w for w in self.memory["workouts"] if datetime.date.fromisoformat(w["date"]) >= week_ago])
        avg_sleep = self.sleep.avg_last_n_days(7)
        calories = self.nutrition.calories_today()
        habits = {h:self.memory["habits"][h] for h in self.memory["habits"]}
        return {
            "workouts_last_7_days": workouts_count,
            "avg_sleep_last_7_days": avg_sleep,
            "calories_today": calories,
            "habits": habits
        }

    # --- AI helpers (prompt engineering) ---
    def ai_fallback_text(self, prompt_text):
        # Use LLM to respond (simple prompt) if client available
        try:
            prompt = f"""You are a friendly personal habit & workout coach. User message: {prompt_text}
            Provide a concise, actionable response (2-6 bullet points), and one motivational line."""
            res = self.client.responses.create(
                model="gpt-4o-mini", # or gemini-2.0-flash / choose what's available in your account
                input=prompt
            )
            # Best-effort parse
            return res.output_text if hasattr(res, "output_text") else str(res)
        except Exception as e:
            return f"(AI unavailable) {str(e)}"

    def ai_feedback(self, prompt_text):
        # A structured coaching feedback prompt
        try:
            prompt = f"""You are a expert personal coach. Analyze the user's recent memory and give:
1) 3 quick improvements
2) 2 motivational suggestions
3) one actionable habit to add next week

Memory snapshot: {self.memory_snapshot_short()}
User message: {prompt_text}
"""
            res = self.client.responses.create(
                model="gpt-4o-mini",
                input=prompt
            )
            return res.output_text if hasattr(res, "output_text") else str(res)
        except Exception as e:
            return "(AI feedback unavailable) " + str(e)

    def memory_snapshot_short(self):
        # short summary for prompts
        return {
            "habits": self.memory.get("habits", {}),
            "recent_workout": self.memory.get("workouts", [])[-3:],
            "sleep_last_7": self.memory.get("sleep", [])[-7:],
            "calories_today": self.nutrition.calories_today()
        }


In [40]:
agent = SmartHabitAgent(memory, client=client)
print("SmartHabit Agent is ready. Try `agent.dispatch('generate workout')` or `agent.dispatch('add habit drink water')`.")


SmartHabit Agent is ready. Try `agent.dispatch('generate workout')` or `agent.dispatch('add habit drink water')`.


In [41]:
def plot_habit_streaks(mem):
    habits = list(mem["habits"].keys())
    streaks = [mem["habits"][h]["streak"] for h in habits]
    if not habits:
        print("No habits yet.")
        return
    plt.figure(figsize=(8,4))
    plt.bar(habits, streaks)
    plt.title("Habit streaks")
    plt.xticks(rotation=45)
    plt.show()

def plot_sleep_trend(mem):
    dates = [d["date"] for d in mem["sleep"]]
    hours = [d["hours"] for d in mem["sleep"]]
    if not dates:
        print("No sleep logs yet.")
        return
    plt.figure(figsize=(8,4))
    plt.plot(dates, hours, marker="o")
    plt.title("Sleep hours")
    plt.xticks(rotation=45)
    plt.show()


In [42]:
# Add some sample usage lines you can run / copy
print(agent.dispatch("add habit Drink 2L water"))
print(agent.dispatch("add habit Morning walk"))
print(agent.dispatch("log habit Drink 2L water"))
print(agent.dispatch("generate workout"))

# Log sleep and nutrition
print(agent.dispatch("I slept 7.5 hours"))   # uses parsing in dispatch
print(agent.dispatch("add meal oats; 350"))  # for nutrition tool

# Weekly report
print("Weekly report:", agent.dispatch("weekly report"))


Habit 'Drink 2L water' already exists.
Habit 'Morning walk' already exists.
Already logged today.
{'date': '2025-11-21', 'plan': ['15 squats', '10 pushups', '10-min walk'], 'level': 'beginner', 'focus': None, 'duration_min': 30}
Sleep logged.
Meal logged.
Weekly report: {'workouts_last_7_days': 3, 'avg_sleep_last_7_days': 7.5, 'calories_today': 1050.0, 'habits': {'Drink 2L water': {'streak': 1, 'last_done': '2025-11-21'}, 'Morning walk': {'streak': 0, 'last_done': None}}}


In [43]:
def add_daily_note(note_text):
    vec = embed_text(note_text)
    entry = {"date": str(datetime.date.today()), "note": note_text, "vector": vec}
    memory["daily_notes"].append(entry)
    log_event(memory, "daily_note.add", entry)
    save_memory(memory)
    return "Note saved."

def find_similar_notes(query, top_k=3):
    qv = np.array(embed_text(query))
    sims = []
    for n in memory["daily_notes"]:
        vec = np.array(n["vector"])
        # cosine similarity
        sim = float(np.dot(qv, vec) / (np.linalg.norm(qv)*np.linalg.norm(vec) + 1e-9))
        sims.append((sim, n))
    sims.sort(key=lambda x: x[0], reverse=True)
    return sims[:top_k]

# Demo
add_daily_note("Felt tired today. Did short walk but couldn't go running.")
add_daily_note("Great day. Completed all study tasks and workout.")
print(find_similar_notes("I was tired and couldn't finish my run"))


[(0.8036483318356382, {'date': '2025-11-21', 'note': "Felt tired today. Did short walk but couldn't go running.", 'vector': [0.3231154115923257, 0.8375022999001838, 0.2369362189685038, 0.8618221918445859, 0.7017039813992086, 0.6356065890578247, 0.757306782873821, 0.5071159667244205, 0.10611873508035985, 0.7002284587586114, 0.7775762677911988, 0.07211017150777022, 0.6764988221602614, 0.33305015534419746, 0.2801048313386516, 0.1353031929702908, 0.5692109596162466, 0.16774576892011728, 0.26811984493421115, 0.3117596300779627, 0.736552049920278, 0.5396164143573423, 0.564635559425009, 0.33992476484927503, 0.992240947406679, 0.5135835413797833, 0.9747942196603587, 0.2051388232440169, 0.6536359984826757, 0.08791134600570027, 0.8916941623560581, 0.36214538242861727, 0.5727839500525058, 0.6063927557521326, 0.5764094241607335, 0.9893320229603699, 0.5735019564108499, 0.006098834797201413, 0.4594664359312258, 0.5568736453123388, 0.529429158603256, 0.6708148836976386, 0.9581480714994323, 0.98916983

In [44]:
# Save a versioned snapshot for reproducibility
def export_snapshot(tag="capstone_submission_v1"):
    snapshot = {
        "tag": tag,
        "timestamp": str(datetime.datetime.now()),
        "memory": memory,
        "notebook": "SmartHabit_Capstone"
    }
    path = f"/kaggle/working/{tag}_snapshot.json"
    with open(path,"w") as f:
        json.dump(snapshot, f, indent=2)
    print("Snapshot saved to", path)
    return path

export_snapshot()


Snapshot saved to /kaggle/working/capstone_submission_v1_snapshot.json


'/kaggle/working/capstone_submission_v1_snapshot.json'