In [634]:
import pandas as pd
import json
import re
from langchain_community.llms import Ollama
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from dateutil import parser

In [635]:
# Load the CSV data
csv_file = '1c6d5.csv'  # Update this with your file path
df = pd.read_csv(csv_file, encoding='latin1')


In [636]:
df = df.dropna(subset=["startDateFull", "endDateFull"])

In [637]:
# 🔹 Step 1: Ensure all date values are strings & strip whitespace
df["startDateFull"] = df["startDateFull"].astype(str).str.strip()
df["endDateFull"] = df["endDateFull"].astype(str).str.strip()

In [638]:
# 🔹 Step 1: Remove Non-Standard Characters (e.g., "펝" or any Unicode noise)
df["startDateFull"] = df["startDateFull"].apply(lambda x: re.sub(r"[^\x00-\x7F]+", "", str(x)))
df["endDateFull"] = df["endDateFull"].apply(lambda x: re.sub(r"[^\x00-\x7F]+", "", str(x)))

In [639]:
print(df[["startDateFull", "endDateFull"]])

             startDateFull            endDateFull
0    01/30/2025, 07:15:00   01/30/2025, 08:15:00 
1    02/13/2025, 12:45:00   02/13/2025, 13:15:00 
2    02/07/2025, 21:30:00   02/07/2025, 22:30:00 
3    02/04/2025, 17:30:00   02/04/2025, 18:30:00 
4    02/11/2025, 21:30:00   02/11/2025, 22:30:00 
..                     ...                    ...
452  01/19/2025, 11:00:00   01/19/2025, 12:00:00 
453  02/04/2025, 12:45:00   02/04/2025, 13:15:00 
454  02/01/2025, 12:45:00   02/01/2025, 13:15:00 
455  01/31/2025, 20:00:00   01/31/2025, 21:30:00 
456  02/10/2025, 13:15:00   02/10/2025, 17:30:00 

[457 rows x 2 columns]


In [640]:
# 🔹 Step 3: Convert to Time Objects (Extracting Only Time)
def safe_parse_time(date_str):
    try:
        return parser.parse(date_str).time()  # Extract only time
    except Exception:
        return None  # Return None if parsing fails

df["startTime"] = df["startDateFull"].apply(safe_parse_time)
df["endTime"] = df["endDateFull"].apply(safe_parse_time)

In [641]:
print(df[["startTime", "endTime"]])

    startTime   endTime
0    07:15:00  08:15:00
1    12:45:00  13:15:00
2    21:30:00  22:30:00
3    17:30:00  18:30:00
4    21:30:00  22:30:00
..        ...       ...
452  11:00:00  12:00:00
453  12:45:00  13:15:00
454  12:45:00  13:15:00
455  20:00:00  21:30:00
456  13:15:00  17:30:00

[457 rows x 2 columns]


In [642]:
# 🔹 Step 4: Drop rows where time conversion failed
df = df.dropna(subset=["startTime", "endTime"])

In [643]:
# Keep only relevant columns
df_cleaned = df[["title", "startTime", "endTime"]]

In [644]:
print(df_cleaned)

                 title startTime   endTime
0           Meditation  07:15:00  08:15:00
1                Lunch  12:45:00  13:15:00
2                Books  21:30:00  22:30:00
3           Meditation  17:30:00  18:30:00
4                Books  21:30:00  22:30:00
..                 ...       ...       ...
452         Eat Greens  11:00:00  12:00:00
453              Lunch  12:45:00  13:15:00
454              Lunch  12:45:00  13:15:00
455  Work and Meetings  20:00:00  21:30:00
456        Patent Work  13:15:00  17:30:00

[457 rows x 3 columns]


In [645]:
df_cleaned["startTime"] = df_cleaned["startTime"].astype(str)
df_cleaned["endTime"] = df_cleaned["endTime"].astype(str)
# Convert cleaned data to JSON format for AI processing
csv_data_json = df_cleaned.to_dict(orient="records")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned["startTime"] = df_cleaned["startTime"].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_cleaned["endTime"] = df_cleaned["endTime"].astype(str)


In [646]:
print(csv_data_json)

[{'title': 'Meditation', 'startTime': '07:15:00', 'endTime': '08:15:00'}, {'title': 'Lunch', 'startTime': '12:45:00', 'endTime': '13:15:00'}, {'title': 'Books', 'startTime': '21:30:00', 'endTime': '22:30:00'}, {'title': 'Meditation', 'startTime': '17:30:00', 'endTime': '18:30:00'}, {'title': 'Books', 'startTime': '21:30:00', 'endTime': '22:30:00'}, {'title': 'Newspaper', 'startTime': '10:15:00', 'endTime': '10:30:00'}, {'title': 'Books', 'startTime': '21:30:00', 'endTime': '22:30:00'}, {'title': 'Blinkist', 'startTime': '12:30:00', 'endTime': '12:45:00'}, {'title': 'Wake Up', 'startTime': '07:00:00', 'endTime': '07:15:00'}, {'title': 'Patent Work', 'startTime': '13:15:00', 'endTime': '17:30:00'}, {'title': 'Sleep', 'startTime': '00:00:00', 'endTime': '07:00:00'}, {'title': 'Worship and Prayer', 'startTime': '08:45:00', 'endTime': '09:45:00'}, {'title': 'Meditation', 'startTime': '07:15:00', 'endTime': '08:15:00'}, {'title': 'Worship and Prayer', 'startTime': '08:45:00', 'endTime': '09:

In [647]:
from collections import defaultdict
# Group by activity title and calculate the most common start and end times
activity_summary = defaultdict(list)

for title, group in df.groupby('title'):
    start_times = group['startTime'].astype(str).mode().values[0]
    end_times = group['endTime'].astype(str).mode().values[0]
    activity_summary[title] = f"{start_times} - {end_times}"

# Convert to JSON format
activity_summary_json = json.dumps(activity_summary, indent=4)
activity_summary_json

'{\n    "Blinkist": "12:30:00 - 12:45:00",\n    "Books": "21:30:00 - 22:30:00",\n    "Breakfast": "09:45:00 - 10:15:00",\n    "Dinner": "19:30:00 - 20:00:00",\n    "Dreams": "21:00:00 - 00:00:00",\n    "Eat": "07:00:00 - 09:30:00",\n    "Eat Greens": "11:00:00 - 12:00:00",\n    "Flow": "17:00:00 - 18:00:00",\n    "Get outside": "06:45:00 - 07:00:00",\n    "Gratitude": "19:00:00 - 21:00:00",\n    "Healthy Eats": "18:00:00 - 19:00:00",\n    "Jammin\'": "12:00:00 - 14:00:00",\n    "Lunch": "12:45:00 - 13:15:00",\n    "Meditation": "07:15:00 - 08:15:00",\n    "Meeting": "15:30:00 - 17:00:00",\n    "Megha": "22:30:00 - 00:00:00",\n    "Move": "14:00:00 - 15:00:00",\n    "Newspaper": "10:15:00 - 10:30:00",\n    "Patent Work": "13:15:00 - 17:30:00",\n    "Plan my day": "06:30:00 - 06:45:00",\n    "Play": "18:30:00 - 19:30:00",\n    "Sleep": "00:00:00 - 07:00:00",\n    "Startup Work": "10:30:00 - 12:30:00",\n    "Study": "09:30:00 - 11:00:00",\n    "Twinkle star": "00:00:00 - 06:00:00",\n    "

In [648]:
from datetime import datetime
# Convert activity times into a structured list
day_plan = []
for activity, time_range in activity_summary.items():
    start_time, end_time = time_range.split(" - ")
    start_time = datetime.strptime(start_time, "%H:%M:%S").time()
    end_time = datetime.strptime(end_time, "%H:%M:%S").time()
    day_plan.append((start_time, end_time, activity))

# Sort by start time
day_plan.sort()

# Create a structured daily schedule
formatted_schedule = "\n".join(
    [f"{start.strftime('%I:%M %p')} - {end.strftime('%I:%M %p')}: {activity}" for start, end, activity in day_plan]
)

formatted_schedule

"12:00 AM - 06:00 AM: Twinkle star\n12:00 AM - 07:00 AM: Sleep\n06:30 AM - 06:45 AM: Plan my day\n06:45 AM - 07:00 AM: Get outside\n07:00 AM - 07:15 AM: Wake Up\n07:00 AM - 09:30 AM: Eat\n07:15 AM - 08:15 AM: Meditation\n08:15 AM - 08:45 AM: YSS Books\n08:45 AM - 09:45 AM: Worship and Prayer\n09:30 AM - 11:00 AM: Study\n09:45 AM - 10:15 AM: Breakfast\n10:15 AM - 10:30 AM: Newspaper\n10:30 AM - 12:30 PM: Startup Work\n11:00 AM - 12:00 PM: Eat Greens\n12:00 PM - 02:00 PM: Jammin'\n12:30 PM - 12:45 PM: Blinkist\n12:45 PM - 01:15 PM: Lunch\n01:15 PM - 05:30 PM: Patent Work\n02:00 PM - 03:00 PM: Move\n03:30 PM - 05:00 PM: Meeting\n05:00 PM - 06:00 PM: Flow\n06:00 PM - 07:00 PM: Healthy Eats\n06:30 PM - 07:30 PM: Play\n07:00 PM - 09:00 PM: Gratitude\n07:30 PM - 08:00 PM: Dinner\n08:00 PM - 09:30 PM: Work and Meetings\n09:00 PM - 12:00 AM: Dreams\n09:30 PM - 10:30 PM: Books\n10:30 PM - 12:00 AM: Megha"

In [649]:
llm = Ollama(model="llama3.2:latest")

In [650]:
### **Step 1: Extract User’s Past Activity Patterns**
extract_prompt_template = f"""
You are an AI that helps users optimize their daily schedule based on feedback. You must return the **updated schedule in valid JSON format only**, with no additional explanation.

### **User's Schedule**
```json
{formatted_schedule}

User's Feedback

"{user_feedback}"
Update Instructions

    If the user says "I didn't get enough sleep", adjust bedtime earlier.
    If the user says "I want more time for exercise", extend Play or Move time.
    If the user says "I need more time for work", optimize meetings or Startup Work.
    Ensure there is a balanced schedule.

Expected JSON Output Format

You must strictly return in the following structure with the times:

"{formatted_schedule}"
Return only valid this. Do not include any explanations, introductions, or additional text.

"""

In [651]:
user_feedback = "I want more time to work."

In [652]:
# Format the prompt by injecting variables
formatted_prompt = extract_prompt_template.format(
    formatted_schedule=formatted_schedule,  # Your schedule data
    user_feedback=user_feedback  # The user's feedback
    
)

In [653]:
# Send the properly formatted prompt to Ollama
response_llama_1 = llm.invoke(formatted_prompt)

In [654]:
try: 
    updated_schedule = json.loads(response) 
    print("Updated Schedule:", updated_schedule) 
except json.JSONDecodeError: 
    print(response_llama_1)

```json
12:00 AM - 06:00 AM: Twinkle star
12:00 AM - 07:00 AM: Sleep
06:30 AM - 06:45 AM: Plan my day
06:45 AM - 07:00 AM: Get outside
07:00 AM - 07:15 AM: Wake Up
07:00 AM - 09:30 AM: Eat
07:15 AM - 08:15 AM: Meditation
08:15 AM - 10:45 AM: Worship and Prayer
08:45 AM - 09:45 AM: YSS Books
09:30 AM - 11:00 AM: Study
09:45 AM - 10:15 AM: Breakfast
10:15 AM - 10:30 AM: Newspaper
10:30 AM - 12:30 PM: Startup Work
11:00 AM - 12:00 PM: Eat Greens
12:00 PM - 02:00 PM: Jammin'
12:30 PM - 12:45 PM: Blinkist
12:45 PM - 01:15 PM: Lunch
01:15 PM - 07:30 PM: Patent Work
02:00 PM - 03:00 PM: Move
03:30 PM - 06:00 PM: Meeting
05:00 PM - 06:00 PM: Flow
06:00 PM - 07:00 PM: Healthy Eats
06:30 PM - 08:30 PM: Play
07:00 PM - 09:00 PM: Gratitude
07:30 PM - 08:00 PM: Dinner
08:00 PM - 12:00 AM: Work and Meetings
```


In [655]:
response_llama_2 = f"""
You are an AI that optimizes a daily schedule by ensuring **no overlapping activities** and maintaining a **well-structured, balanced routine**.

### **User's Current Schedule (Needs Optimization)**
```json
{response_llama_1}

"{user_feedback}"
Update Instructions

    If the user says "I didn't get enough sleep", adjust bedtime earlier.
    If the user says "I want more time for exercise", extend Play or Move time.
    If the user says "I need more time for work", optimize meetings or Startup Work.
    Ensure there is a balanced schedule.

Optimization Instructions

    Make sure there are at least 6 hours for sleep every day. 
    Ensure no overlapping activities. Each activity must have a distinct time slot.
    Maintain a 24-hour schedule. The schedule must not exceed 24 hours.
    Preserve all activities, adjusting their order and duration as needed.
    Keep essential activities intact (Sleep, meals, work) and ensure their placement makes sense.
    Optimize gaps. Fill any long periods of inactivity with relevant activities.
    Sort events chronologically from 12:00 AM to 11:59 PM.
    Do not modify activity names, only adjust their time slots.
    Do not include duplicates—each activity should only appear once per day.
    Ensure the total schedule time sums up correctly (no missing time slots or overlaps).

Do not include explanations, introductions, or additional text.



"""

In [656]:
# Format the prompt by injecting variables
formatted_prompt = extract_prompt_template.format(
    response_llama_1=response_llama_1,  # Your schedule data
    user_feedback=user_feedback  # The user's feedback
    
)

optimized_response = llm.invoke(response_llama_2)

In [657]:
try: 
    optimized_schedule = json.loads(optimized_response) 
    print("Optimized Schedule:", optimized_schedule) 
except json.JSONDecodeError: 
    print(optimized_response)

```json
12:00 AM - 02:30 AM: Sleep
02:30 AM - 06:45 AM: Twinkle star
06:45 AM - 07:00 AM: Get outside
07:00 AM - 09:15 AM: Meditation
07:15 AM - 10:15 AM: Startup Work
09:15 AM - 12:15 PM: Worship and Prayer
09:15 AM - 11:15 AM: Eat
09:45 AM - 11:15 AM: Breakfast
10:30 AM - 12:30 PM: Study
11:00 AM - 01:00 PM: Eat Greens
12:00 PM - 02:00 PM: Jammin'
12:30 PM - 01:30 PM: Blinkist
01:30 PM - 04:15 PM: Patent Work
03:00 PM - 05:00 PM: Move
05:00 PM - 06:00 PM: Flow
06:00 PM - 08:00 PM: Healthy Eats
07:00 PM - 09:00 PM: Gratitude
07:30 PM - 09:00 PM: Dinner
08:15 PM - 12:15 AM: Work and Meetings
```


In [678]:
from datetime import datetime, timedelta


In [680]:

def parse_time(time_str):
    """Converts 'hh:mm AM/PM' time to a datetime object for sorting and comparison."""
    return datetime.strptime(time_str, "%I:%M %p")

def format_time(time_obj):
    """Converts datetime object back to 'hh:mm AM/PM' string format."""
    return time_obj.strftime("%I:%M %p")

In [684]:
schedule_sorted = sorted(
    [(parse_time(start), parse_time(end), activity) for start, end, activity in optimized_response],
    key=lambda x: x[0]
)


ValueError: not enough values to unpack (expected 3, got 1)

In [675]:
response_llama_3 = f"""
You are an AI that optimizes a daily schedule by ensuring **no overlapping activities** and maintaining a well-structured, balanced routine.

 User's Current Schedule (Needs Optimization)
{response_llama_2}

Optimization Instructions

    Make sure there are at least 6 hours for sleep every day. 
    Ensure **no overlapping activities**. Each activity must have a distinct time slot.
    Maintain a 24-hour schedule. The schedule must not exceed **24 hours**.
    Preserve all activities, adjusting their order and duration as needed.
    Keep essential activities intact (Sleep, meals, work) and ensure their placement makes sense.
    Optimize gaps. Fill any long periods of inactivity with relevant activities.
    Sort events chronologically from 12:00 AM to 11:59 PM.
    Do not modify activity names, only adjust their time slots.
    Do not include duplicates—each activity should only appear once per day.
    Ensure the total schedule time sums up correctly (no missing time slots or overlaps).

Do not include explanations, introductions, or additional text.



"""

In [676]:
# Format the prompt by injecting variables
formatted_prompt = extract_prompt_template.format(
    response_llama_2=response_llama_2
    
)

response_llama_3 = llm.invoke(response_llama_3)

In [677]:
try: 
    response_llama_3 = json.loads(response_llama_3) 
    print("Optimized Schedule:", response_llama_3) 
except json.JSONDecodeError: 
    print(response_llama_3)

```json
12:00 AM - 06:45 AM: Twinkle star
06:45 AM - 07:15 AM: Sleep
07:15 AM - 08:00 AM: Wake Up
08:00 AM - 09:30 AM: Eat
08:00 AM - 10:00 AM: Meditation
08:15 AM - 11:15 AM: Worship and Prayer
08:45 AM - 10:15 AM: YSS Books
09:30 AM - 12:30 PM: Startup Work
10:00 AM - 11:00 AM: Flow
11:00 AM - 12:00 PM: Eat Greens
12:00 PM - 01:00 PM: Jammin'
12:45 PM - 02:15 PM: Blinkist
01:15 PM - 03:30 PM: Patent Work
03:30 PM - 05:00 PM: Meeting
05:00 PM - 06:30 PM: Healthy Eats
06:00 PM - 08:00 PM: Play
07:00 PM - 09:00 PM: Gratitude
07:30 PM - 08:45 PM: Dinner
08:45 PM - 12:15 AM: Work and Meetings
```


In [None]:
#figure out cost analysis for llama/chatgpt/otherAI
#