In [70]:
import pandas as pd
import json
from datetime import timedelta

# Load the CSV file
file_path = '/assets/logs_rows.csv'
logs_df = pd.read_csv(file_path)
logs_df = logs_df.drop(columns=['id', 'user_id', 'created_at'])

In [None]:
# Function to create rows for each symptom
def expand_symptoms(row):
    try:
        symptoms = json.loads(row['symptoms'])
        expanded_rows = []
        for symptom, details in symptoms.items():
            new_row = row.copy()
            new_row['symptom'] = symptom
            if isinstance(details, dict):
                for detail_key, detail_value in details.items():
                    if detail_key == 'Notes':
                        new_row['Symptom Notes'] = detail_value
                    else:
                        new_row[detail_key] = detail_value
            else:
                new_row['details'] = details
            expanded_rows.append(new_row)
        return expanded_rows
    except Exception as e:
        print(f"Error processing row: {e}")
        print(f"Row data: {row}")
        return [row]

In [None]:
# Expand the symptoms into multiple rows
expanded_rows = logs_df.apply(lambda row: expand_symptoms(row), axis=1).explode().reset_index(drop=True)
expanded_df = pd.DataFrame(expanded_rows.tolist())

# Ensure all expected columns exist
expected_columns = ['title', 'date', 'time', 'symptom', 'Triggers', 'Intensity', 'Frequency', 'Time of Day', 'Symptom Notes', 'medications', 'notes']
for col in expected_columns:
    if col not in expanded_df.columns:
        expanded_df[col] = None

# Rearrange columns
expanded_df = expanded_df[expected_columns]

# Join the values in the 'Triggers' column with ', '
expanded_df['Triggers'] = expanded_df['Triggers'].apply(lambda x: ', '.join(x) if isinstance(x, list) else x)

In [None]:
# Join the medication names into a single string
def join_medications(medications):
    try:
        meds = json.loads(medications)
        return ', '.join(med['name'] for med in meds)
    except (json.JSONDecodeError, TypeError):
        return medications
    
expanded_df['medications'] = expanded_df['medications'].apply(join_medications)

In [None]:
# Append '/10' to the Intensity column if it exists
expanded_df['Intensity'] = expanded_df['Intensity'].apply(lambda x: f"{x}/10" if pd.notna(x) else x)

# Convert the 'time' column to datetime and format it to 'h:mm a'
expanded_df['time'] = pd.to_datetime(expanded_df['time'], format='%H:%M:%S').dt.strftime('%I:%M %p')

In [None]:
# Rename the columns
expanded_df.rename(columns={'title': 'Log Title'}, inplace=True)
expanded_df.rename(columns={'date': 'Date'}, inplace=True)
expanded_df.rename(columns={'time': 'Time'}, inplace=True)
expanded_df.rename(columns={'symptom': 'Symptom Logged'}, inplace=True)
expanded_df.rename(columns={'medications': 'Medications/Treatments'}, inplace=True)
expanded_df.rename(columns={'notes': 'Log Notes'}, inplace=True)

In [74]:
# Save the formatted dataframe to a new CSV file
formatted_file_path = '/assets/data/formatted_logs.csv'
pd.set_option('display.max_colwidth', 100)
expanded_df.to_csv(formatted_file_path, index=False)
expanded_df.head()

# print(expanded_df.head())
# column width? 
# and maybe other statistics... like most common symptom
# and then pdf 

Unnamed: 0,Log Title,Date,Time,Symptom Logged,Triggers,Intensity,Frequency,Time of Day,Symptom Notes,Medications/Treatments,Log Notes
0,Change newest?,2024-07-06,10:22 AM,Visual static,,,,,,,
0,Change newest?,2024-07-06,10:22 AM,Afterimages (Palinopsia),,,,,,,
1,Symptom Log 07/08/24,2024-07-08,03:40 PM,Visual static,,,,,,,
1,Symptom Log 07/08/24,2024-07-08,03:40 PM,Afterimages (Palinopsia),,,,,,,
2,Symptom Log 07/08/24,2024-07-08,05:30 PM,Visual static,,,,,,,
