**Author:** Luís Sousa 

**Contact:** luis.95.sousa.31@gmail.com

**Update log:**


---

*Notebook created by Luís Sousa. Feel free to reach out for any questions or collaborations.*

---

# Fetch data from email

In [None]:
import imaplib
import email
from email.header import decode_header
import os
import re

# Mock credentials (Replace with your own in a secure .env file or config)
EMAIL = "your_email@example.com"
PASSWORD = "your_password"
IMAP_SERVER = "imap.example.com"

# Search criteria
SEARCH_SUBJECT = "Mental Health Log"
DOWNLOAD_FOLDER = './downloads'  # Folder where attachments will be saved

os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)

def sanitize_filename(filename):
    filename = os.path.basename(filename)
    return re.sub(r'[\\/*?:"<>|]', "_", filename)

def connect_to_mail():
    mail = imaplib.IMAP4_SSL(IMAP_SERVER)
    mail.login(EMAIL, PASSWORD)
    return mail

def search_emails(mail, subject):
    mail.select("inbox")
    result, data = mail.search(None, f'(SUBJECT "{subject}")')
    if result == "OK":
        return data[0].split()
    return []

def download_attachments(mail, email_id):
    result, msg_data = mail.fetch(email_id, "(RFC822)")
    if result != "OK":
        print(f"Failed to fetch email {email_id.decode() if isinstance(email_id, bytes) else email_id}")
        return

    for response_part in msg_data:
        if isinstance(response_part, tuple):
            msg = email.message_from_bytes(response_part[1])
            subject, encoding = decode_header(msg.get("Subject"))[0]
            if isinstance(subject, bytes):
                subject = subject.decode(encoding if encoding else "utf-8")
            print(f"Processing email: {subject}")

            for part in msg.walk():
                if part.get_content_maintype() == "multipart":
                    continue
                if part.get("Content-Disposition") is None:
                    continue

                filename = part.get_filename()
                if filename:
                    filename = decode_header(filename)[0][0]
                    if isinstance(filename, bytes):
                        filename = filename.decode()
                    filename = sanitize_filename(filename)
                    filepath = os.path.join(DOWNLOAD_FOLDER, filename)
                    with open(filepath, "wb") as f:
                        f.write(part.get_payload(decode=True))
                    print(f"Downloaded: {filename}")

def main():
    mail = connect_to_mail()
    print("Searching for emails with subject containing:", SEARCH_SUBJECT)
    email_ids = search_emails(mail, SEARCH_SUBJECT)
    if not email_ids:
        print("No emails found with the specified subject.")
    else:
        print(f"Found {len(email_ids)} emails. Downloading attachments...")
        for email_id in email_ids:
            download_attachments(mail, email_id)
    mail.logout()

if __name__ == "__main__":
    main()


# Compile data in a file

In [None]:
import os
import pandas as pd
import re
from typing import Optional, List
from pathlib import Path

# Configuration (mocked for public use)
FOLDER_PATH = "./sample_data"  # Replace with your actual path
OUTPUT_FILE = "./compiled/final_data.xlsx"
DATETIME_COLUMN = "DateTime"

def fix_datetime_format(dt: Optional[str]) -> Optional[str]:
    if not isinstance(dt, str) or dt.strip() == "":
        return None
    dt = dt.strip()
    try:
        date_part, time_part = dt.split(" ")
    except ValueError:
        return None
    patterns = [
        (r"\b(\d{1})/(\d{1})/(\d{2})\b", r"0\1/0\2/20\3"),
        (r"\b(\d{1})/(\d{2})/(\d{2})\b", r"0\1/\2/20\3"),
        (r"\b(\d{2})/(\d{1})/(\d{2})\b", r"\1/0\2/20\3"),
        (r"\b(\d{2})/(\d{2})/(\d{2})\b", r"\1/\2/20\3")
    ]
    for pattern, replacement in patterns:
        date_part = re.sub(pattern, replacement, date_part)
    time_parts = time_part.split(":")
    if len(time_parts) == 2:
        time_part = f"{time_parts[0]}:{time_parts[1]}:00"
    elif len(time_parts) != 3:
        return None
    return f"{date_part} {time_part}"

def standardize_datetime_column(df: pd.DataFrame, datetime_column: str) -> pd.DataFrame:
    if datetime_column in df.columns:
        df = df.copy()
        df[datetime_column] = df[datetime_column].astype(str).str.strip()
        df[datetime_column] = df[datetime_column].apply(fix_datetime_format)
        df[datetime_column] = pd.to_datetime(
            df[datetime_column],
            format="%d/%m/%Y %H:%M:%S",
            errors="coerce"
        )
        failed_rows = df[datetime_column].isna().sum()
        if failed_rows > 0:
            print(f"⚠️ Warning: {failed_rows} rows had invalid DateTime format")
    return df

def read_and_compile_csvs(folder_path: str, datetime_column: str, output_file: Optional[str] = None) -> Optional[pd.DataFrame]:
    folder_path = Path(folder_path)
    if not folder_path.exists():
        print(f"❌ Folder '{folder_path}' does not exist")
        return None
    csv_files = list(folder_path.glob("*.csv"))
    if not csv_files:
        print("❌ No CSV files found in the folder")
        return None
    dataframes: List[pd.DataFrame] = []
    total_rows = 0
    for file in csv_files:
        try:
            df = pd.read_csv(file, encoding="utf-8")
            if datetime_column in df.columns:
                print(f"📂 Processing {file.name} ({len(df)} rows)")
                df = standardize_datetime_column(df, datetime_column)
                dataframes.append(df)
                total_rows += len(df)
            else:
                print(f"⚠️ Skipping {file.name} - No '{datetime_column}' column found")
        except Exception as e:
            print(f"❌ Error reading {file.name}: {str(e)}")
    if not dataframes:
        print("❌ No valid CSV files were processed")
        return None
    print(f"🔄 Combining {len(dataframes)} files ({total_rows} total rows)...")
    compiled_df = pd.concat(dataframes, ignore_index=True)
    initial_rows = len(compiled_df)
    compiled_df = compiled_df.dropna(subset=[datetime_column])
    compiled_df = compiled_df.sort_values(by=datetime_column)
    rows_dropped = initial_rows - len(compiled_df)
    if rows_dropped > 0:
        print(f"⚠️ Dropped {rows_dropped} rows with invalid DateTime values")
    if output_file:
        output_path = Path(output_file)
        output_path.parent.mkdir(parents=True, exist_ok=True)
        compiled_df[datetime_column] = compiled_df[datetime_column].dt.strftime("%d/%m/%Y %H:%M:%S")
        with pd.ExcelWriter(output_path, engine='openpyxl') as writer:
            compiled_df.to_excel(writer, index=False, sheet_name='Data')
        print(f"✅ Saved {len(compiled_df)} rows to '{output_path}'")
    return compiled_df

if __name__ == "__main__":
    result_df = read_and_compile_csvs(FOLDER_PATH, DATETIME_COLUMN, OUTPUT_FILE)
    if result_df is not None:
        print(f"📊 Final DataFrame Statistics:")
        print(f"Total rows: {len(result_df):,}")
        print(f"Date range: {result_df[DATETIME_COLUMN].min()} to {result_df[DATETIME_COLUMN].max()}")


# Dashboard

## General overview

In [None]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Load the sample data
df = pd.read_excel('./compiled/final_data.xlsx') 

# Convert DateTime column
df['DateTime'] = pd.to_datetime(df['DateTime'], format='%d/%m/%Y %H:%M:%S')

# Define ordered feelings
feeling_order = [
    'Very Unpleasant', 'Unpleasant', 'Slightly Unpleasant',
    'Neutral', 'Pleasant', 'Very Pleasant'
]

df['Feeling'] = pd.Categorical(df['Feeling'], categories=feeling_order, ordered=True)

# Emotion palette
emotion_colors = {
    'Joy': '#2196F3',
    'Love': '#FF69B4',
    'Calm': '#90CAF9',
    'Hope': '#AED581',
    'Contentment': '#4CAF50',
    'Surprise': '#FFD54F',
    'Nostalgia': '#CE93D8',
    'Sadness': '#9C27B0',
    'Anger': '#F44336',
    'Fear': '#7B1FA2',
    'Drained': '#B0BEC5',
    'Anxiety': '#FFC107',
    'Frustration': '#FF7043'
}

feeling_colors = {
    'Very Unpleasant': '#d32f2f',
    'Unpleasant': '#f44336',
    'Slightly Unpleasant': '#ff7043',
    'Neutral': '#ffca28',
    'Pleasant': '#81c784',
    'Very Pleasant': '#2e7d32'
}

# Emotion Distribution Pie Chart
fig_emotion = px.pie(
    values=df['Emotion'].value_counts().values,
    names=df['Emotion'].value_counts().index,
    title='Emotion Distribution',
    color=df['Emotion'].value_counts().index,
    color_discrete_map=emotion_colors
)
fig_emotion.show()

# Feeling Distribution Pie Chart
feeling_counts = df['Feeling'].value_counts().reindex(feeling_order)
fig_feeling = go.Figure(data=[go.Pie(
    labels=feeling_counts.index,
    values=feeling_counts.values,
    marker_colors=[feeling_colors.get(feeling, "#ccc") for feeling in feeling_order],
    hovertemplate="<b>%{label}</b><br>Count: %{value}<extra></extra>",
    direction='clockwise',
    sort=False
)])
fig_feeling.update_layout(title='Feeling Distribution')
fig_feeling.show()

# Mood Timeline
fig_timeline = go.Figure()
fig_timeline.add_trace(go.Scatter(
    x=df['DateTime'],
    y=df['Feeling'],
    name='Feeling',
    mode='lines+markers',
    line={'color': '#2196F3'},
    marker={'color': [feeling_colors.get(f, '#ccc') for f in df['Feeling']]}
))
fig_timeline.update_layout(
    title='Mood Timeline',
    xaxis_title='Date',
    yaxis_title='Feeling',
    hovermode='x unified',
    yaxis={'categoryorder': 'array', 'categoryarray': feeling_order}
)
fig_timeline.show()

# Habits Overview
fig_habits = make_subplots(rows=1, cols=2, subplot_titles=('Exercise', 'Meditation'))
habit_colors = {'yes': '#f44336', 'no': '#4CAF50'}

exercise_counts = df['Exercise'].value_counts()
alcohol_counts = df['Meditation'].value_counts()

fig_habits.add_trace(go.Bar(
    x=['Yes', 'No'],
    y=[exercise_counts.get('yes', 0), exercise_counts.get('no', 0)],
    marker_color=[habit_colors['yes'], habit_colors['no']]
), row=1, col=1)

fig_habits.add_trace(go.Bar(
    x=['Yes', 'No'],
    y=[alcohol_counts.get('yes', 0), alcohol_counts.get('no', 0)],
    marker_color=[habit_colors['yes'], habit_colors['no']]
), row=1, col=2)

fig_habits.update_layout(title='Habits Overview', showlegend=False)
fig_habits.show()

# Current Mood Summary
latest = df.iloc[-1]
print("\nCurrent Mood:")
print(f"Feeling: {latest['Feeling']}")
print(f"Emotion: {latest['Emotion']}")
print(f"Thoughts: {latest['Thoughts']}")

# Recent Entries
print("\nRecent Entries:")
print(df.tail(5)[['DateTime', 'Feeling', 'Emotion', 'Thoughts']])

In [None]:
# Import required libraries
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
from datetime import datetime

# Read and prepare data
df = pd.read_excel('./compiled/final_data.xlsx') 
df['DateTime'] = pd.to_datetime(df['DateTime'], format='%d/%m/%Y %H:%M:%S')

# Add derived time features
df['Hour'] = df['DateTime'].dt.hour
df['DayOfWeek'] = df['DateTime'].dt.day_name()
df['Date'] = df['DateTime'].dt.date

# Define feeling order and colors
feeling_order = [
    'Very Unpleasant',
    'Unpleasant',
    'Slightly Unpleasant',
    'Neutral',
    'Pleasant',
    'Very Pleasant'
]

feeling_colors = {
    'Very Unpleasant': '#d32f2f',
    'Unpleasant': '#f44336',
    'Slightly Unpleasant': '#ff7043',
    'Neutral': '#ffca28',
    'Pleasant': '#81c784',
    'Very Pleasant': '#2e7d32'
}

# 1. Create Time-of-Day Mood Pattern Analysis
def create_hourly_mood_heatmap():
    hourly_mood = pd.crosstab(df['Hour'], df['Feeling'])
    
    fig = go.Figure(data=go.Heatmap(
        z=hourly_mood.values,
        x=hourly_mood.columns,
        y=hourly_mood.index,
        colorscale=[
            [0, '#d32f2f'],
            [0.2, '#f44336'],
            [0.4, '#ff7043'],
            [0.6, '#ffca28'],
            [0.8, '#81c784'],
            [1, '#2e7d32']
        ],
        hoverongaps=False,
        hovertemplate='Hour: %{y}<br>Feeling: %{x}<br>Count: %{z}<extra></extra>'
    ))
    
    fig.update_layout(
        title='Mood Patterns by Hour of Day',
        xaxis_title='Feeling',
        yaxis_title='Hour of Day',
        height=400
    )
    
    return fig

# 2. Create corrected Emotion-Habit Sankey
def create_emotion_habit_sankey():
    emotion_exercise = df.groupby(['Emotion', 'Exercise']).size().reset_index(name='count')
    emotion_meditation = df.groupby(['Emotion', 'Meditation']).size().reset_index(name='count')
    
    emotions = df['Emotion'].unique()
    habits = ['Exercise - Yes', 'Exercise - No', 'Meditation - Yes', 'Meditation - No']
    
    labels = list(emotions) + habits
    emotion_to_idx = {emotion: idx for idx, emotion in enumerate(emotions)}
    
    source = []
    target = []
    value = []
    
    for _, row in emotion_exercise.iterrows():
        if row['count'] > 0:
            source.append(emotion_to_idx[row['Emotion']])
            target.append(len(emotions) + (0 if row['Exercise'] == 'yes' else 1))
            value.append(row['count'])
    
    for _, row in emotion_meditation.iterrows():
        if row['count'] > 0:
            source.append(emotion_to_idx[row['Emotion']])
            target.append(len(emotions) + (2 if row['Meditation'] == 'yes' else 3))
            value.append(row['count'])
    
    emotion_colors = {
            'Joy': '#2196F3',
            'Love': '#FF69B4',
            'Calm': '#90CAF9',
            'Hope': '#AED581',
            'Contentment': '#4CAF50',
            'Surprise': '#FFD54F',
            'Nostalgia': '#CE93D8',
            'Sadness': '#9C27B0',
            'Anger': '#F44336',
            'Fear': '#7B1FA2',
            'Drained': '#B0BEC5',
            'Anxiety': '#FFC107',
            'Frustration': '#FF7043'
    }
    
    node_colors = [emotion_colors.get(label, '#ff7043') if label in emotions 
                  else '#ff7043' if 'Yes' in label 
                  else '#81c784' for label in labels]
    
    fig = go.Figure(data=[go.Sankey(
        node=dict(
            pad=15,
            thickness=20,
            line=dict(color="black", width=0.5),
            label=labels,
            color=node_colors
        ),
        link=dict(
            source=source,
            target=target,
            value=value
        )
    )])
    
    fig.update_layout(
        title='Emotion-Exercise and Meditation Relationships',
        height=600
    )
    
    return fig

# 4. Create Contributing Factors Analysis
def create_factors_analysis():
    df['Factors'] = df['Contributing Factors'].str.split(',')
    factors = [factor.strip() for factors in df['Factors'].dropna() for factor in factors]
    factor_counts = pd.Series(factors).value_counts()
    
    fig = go.Figure(go.Sunburst(
        labels=factor_counts.index,
        parents=[''] * len(factor_counts),
        values=factor_counts.values,
        branchvalues='total'
    ))
    
    fig.update_layout(
        title='Contributing Factors Analysis',
        height=500
    )
    
    return fig

# Create and display all visualizations
print("Generating visualizations...\n")

hourly_heatmap = create_hourly_mood_heatmap()
emotion_habit_sankey = create_emotion_habit_sankey()
factors_analysis = create_factors_analysis()

hourly_heatmap.show()
emotion_habit_sankey.show()
factors_analysis.show()

# Calculate and display insights
print("\nKey Insights:")
print("-" * 50)

# Mood volatility
mood_scores = {
    'Very Unpleasant': 0,
    'Unpleasant': 1,
    'Slightly Unpleasant': 2,
    'Neutral': 3,
    'Pleasant': 4,
    'Very Pleasant': 5
}
df['MoodScore'] = df['Feeling'].map(mood_scores)
mood_volatility = df.groupby('Date')['MoodScore'].std().mean()
print(f"Average daily mood volatility: {mood_volatility:.2f}")

# Activity correlations
exercise_impact = df.groupby('Exercise')['MoodScore'].mean()
meditation_impact = df.groupby('Meditation')['MoodScore'].mean()

print("\nAverage mood scores (0-5 scale):")
print(f"When exercising: Yes = {exercise_impact.get('yes', 'N/A'):.2f}, No = {exercise_impact.get('no', 'N/A'):.2f}")
print(f"When meditating: Yes = {meditation_impact.get('yes', 'N/A'):.2f}, No = {meditation_impact.get('no', 'N/A'):.2f}")

# Most common emotion-feeling pairs
emotion_feeling_pairs = pd.crosstab(df['Emotion'], df['Feeling'])
print("\nEmotion-Feeling Combinations:")
print(emotion_feeling_pairs)

# Time of day patterns
print("\nMood by Time of Day:")
hourly_mood_avg = df.groupby('Hour')['MoodScore'].mean().sort_values(ascending=False)
best_hours = hourly_mood_avg.head(3)
worst_hours = hourly_mood_avg.tail(3)

print("\nBest hours for mood:")
for hour, score in best_hours.items():
    print(f"{hour:02d}:00 - Score: {score:.2f}")

print("\nWorst hours for mood:")
for hour, score in worst_hours.items():
    print(f"{hour:02d}:00 - Score: {score:.2f}")

# Emotion frequencies
print("\nEmotion Frequencies:")
print(df['Emotion'].value_counts())

## Day vs Night mood

In [None]:
# Import required libraries
import pandas as pd
import plotly.graph_objects as go

# Read and prepare data
df = pd.read_excel('./compiled/final_data.xlsx')
df['DateTime'] = pd.to_datetime(df['DateTime'], format='%d/%m/%Y %H:%M:%S')
df['Hour'] = df['DateTime'].dt.hour

# Create filter masks for day and night
day_mask = (df['Hour'] >= 7) & (df['Hour'] < 19)
night_mask = ~day_mask

# Function to create Sankey diagram
def create_emotion_activity_sankey(data, title):
    emotion_exercise = data.groupby(['Emotion', 'Exercise']).size().reset_index(name='count')
    emotion_meditation = data.groupby(['Emotion', 'Meditation']).size().reset_index(name='count')
    
    emotions = data['Emotion'].unique()
    activities = ['Exercise - Yes', 'Exercise - No', 'Meditation - Yes', 'Meditation - No']
    
    labels = list(emotions) + activities
    emotion_to_idx = {emotion: idx for idx, emotion in enumerate(emotions)}
    
    source = []
    target = []
    value = []
    
    for _, row in emotion_exercise.iterrows():
        if row['count'] > 0:
            source.append(emotion_to_idx[row['Emotion']])
            target.append(len(emotions) + (0 if row['Exercise'] == 'yes' else 1))
            value.append(row['count'])
    
    for _, row in emotion_meditation.iterrows():
        if row['count'] > 0:
            source.append(emotion_to_idx[row['Emotion']])
            target.append(len(emotions) + (2 if row['Meditation'] == 'yes' else 3))
            value.append(row['count'])
    
    emotion_colors = {
            'Joy': '#2196F3',
            'Love': '#FF69B4',
            'Calm': '#90CAF9',
            'Hope': '#AED581',
            'Contentment': '#4CAF50',
            'Surprise': '#FFD54F',
            'Nostalgia': '#CE93D8',
            'Sadness': '#9C27B0',
            'Anger': '#F44336',
            'Fear': '#7B1FA2',
            'Drained': '#B0BEC5',
            'Anxiety': '#FFC107',
            'Frustration': '#FF7043'
    }
    
    node_colors = [emotion_colors.get(label, '#ff7043') if label in emotions 
                  else '#ff7043' if 'Yes' in label 
                  else '#81c784' for label in labels]
    
    fig = go.Figure(data=[go.Sankey(
        node=dict(
            pad=15,
            thickness=20,
            line=dict(color="black", width=0.5),
            label=labels,
            color=node_colors
        ),
        link=dict(
            source=source,
            target=target,
            value=value
        )
    )])
    
    fig.update_layout(
        title=title,
        font_size=12,
        height=600
    )
    
    return fig

# Create daytime Sankey
day_df = df[day_mask]
day_sankey = create_emotion_activity_sankey(
    day_df, 
    'Daytime Emotion-Activity Relationships (7 AM - 7 PM)'
)
day_sankey.show()

# Create nighttime Sankey
night_df = df[night_mask]
night_sankey = create_emotion_activity_sankey(
    night_df, 
    'Nighttime Emotion-Activity Relationships (7 PM - 7 AM)'
)
night_sankey.show()

# Print summary statistics
print("\nDaytime Summary (7 AM - 7 PM):")
print("-" * 50)
print(f"Total entries: {len(day_df)}")
print("\nEmotion frequencies:")
print(day_df['Emotion'].value_counts())
print("\nExercise frequency:")
print(day_df['Exercise'].value_counts())
print("\nMeditation frequency:")
print(day_df['Meditation'].value_counts())

print("\nNighttime Summary (7 PM - 7 AM):")
print("-" * 50)
print(f"Total entries: {len(night_df)}")
print("\nEmotion frequencies:")
print(night_df['Emotion'].value_counts())
print("\nExercise frequency:")
print(night_df['Exercise'].value_counts())
print("\nMeditation frequency:")
print(night_df['Meditation'].value_counts())

## Exercise patterns

In [None]:
# Import required libraries
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

# Read and prepare data
df = pd.read_excel('./compiled/final_data.xlsx')
df['DateTime'] = pd.to_datetime(df['DateTime'], format='%d/%m/%Y %H:%M:%S')
df['Hour'] = df['DateTime'].dt.hour

# 1. Analyze exercise probability during different emotional states
def analyze_emotion_exercise_relationship():
    # Calculate exercise probability for each emotion
    emotion_exercise = pd.crosstab(df['Emotion'], df['Exercise'], normalize='index')
    
    # Create bar chart
    fig = go.Figure()
    fig.add_trace(go.Bar(
        x=emotion_exercise.index,
        y=emotion_exercise['yes'] * 100,
        name='Exercise Probability',
        marker_color=['#ff7043' if x > emotion_exercise['yes'].mean() else '#81c784' 
                     for x in emotion_exercise['yes']]
    ))
    
    fig.update_layout(
        title='Probability of Exercise by Emotional State',
        xaxis_title='Emotion',
        yaxis_title='Exercise Probability (%)',
        height=400
    )
    
    return fig, emotion_exercise

# 2. Analyze timing patterns of exercise
def analyze_exercise_timing():
    # Calculate exercise probability by hour
    hourly_exercise = pd.crosstab(df['Hour'], df['Exercise'], normalize='index')
    
    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=hourly_exercise.index,
        y=hourly_exercise['yes'] * 100,
        mode='lines+markers',
        name='Exercise Probability',
        line=dict(color='#ff7043')
    ))
    
    fig.update_layout(
        title='Exercise Probability Throughout the Day',
        xaxis_title='Hour of Day',
        yaxis_title='Exercise Probability (%)',
        height=400
    )
    
    return fig, hourly_exercise

# 3. Analyze contributing factors relation to exercise
def analyze_contributing_factors():
    # Split contributing factors and create pairs with exercise status
    df['Factors'] = df['Contributing Factors'].str.split(',')
    factor_exercise_pairs = []
    
    for idx, row in df.iterrows():
        if isinstance(row['Factors'], list):
            for factor in row['Factors']:
                factor = factor.strip()
                factor_exercise_pairs.append({
                    'Factor': factor,
                    'Exercise': row['Exercise']
                })
    
    factor_df = pd.DataFrame(factor_exercise_pairs)
    factor_exercise = pd.crosstab(factor_df['Factor'], factor_df['Exercise'], normalize='index')
    
    # Create bar chart
    fig = go.Figure()
    fig.add_trace(go.Bar(
        x=factor_exercise.index,
        y=factor_exercise['yes'] * 100,
        name='Exercise Probability',
        marker_color='#ff7043'
    ))
    
    fig.update_layout(
        title='Exercise Probability by Contributing Factor',
        xaxis_title='Contributing Factor',
        yaxis_title='Exercise Probability (%)',
        height=500,
        xaxis_tickangle=-45
    )
    
    return fig, factor_exercise

# 4. Analyze feeling intensity and exercise
def analyze_feeling_intensity():
    # Create feeling intensity scale
    feeling_intensity = {
        'Very Pleasant': 2,
        'Pleasant': 1,
        'Neutral': 0,
        'Slightly Unpleasant': -1,
        'Unpleasant': -2,
        'Very Unpleasant': -3
    }
    
    df['Feeling_Intensity'] = df['Feeling'].map(feeling_intensity)
    
    # Calculate average feeling intensity for exercise vs non-exercise
    exercise_intensity = df.groupby('Exercise')['Feeling_Intensity'].agg(['mean', 'std']).round(2)
    
    # Create box plot
    fig = go.Figure()
    fig.add_trace(go.Box(
        x=df[df['Exercise'] == 'yes']['Feeling_Intensity'],
        name='Exercised',
        marker_color='#ff7043'
    ))
    fig.add_trace(go.Box(
        x=df[df['Exercise'] == 'no']['Feeling_Intensity'],
        name='Did Not Exercise',
        marker_color='#81c784'
    ))
    
    fig.update_layout(
        title='Feeling Intensity Distribution: Exercise vs Non-Exercise',
        xaxis_title='Feeling Intensity',
        height=400
    )
    
    return fig, exercise_intensity

# Generate all analyses
emotion_fig, emotion_stats = analyze_emotion_exercise_relationship()
timing_fig, timing_stats = analyze_exercise_timing()
factors_fig, factors_stats = analyze_contributing_factors()
intensity_fig, intensity_stats = analyze_feeling_intensity()

# Display visualizations
emotion_fig.show()
timing_fig.show()
factors_fig.show()
intensity_fig.show()

# Print statistical insights
print("\nActivity Analysis Results:")
print("-" * 50)

print("\n1. Emotion-Based Exercise Probabilities:")
print(emotion_stats['yes'].sort_values(ascending=False) * 100)

print("\n2. Peak Exercise Hours:")
peak_hours = timing_stats['yes'].sort_values(ascending=False).head(3)
print(peak_hours * 100)

print("\n3. Contributing Factors Impact on Exercise:")
print(factors_stats['yes'].sort_values(ascending=False) * 100)

print("\n4. Feeling Intensity Analysis:")
print("Average feeling intensity when exercising vs not exercising:")
print(intensity_stats)

# Calculate conditional probabilities
print("\n5. Additional Insights:")
negative_emotions = ['Anxiety', 'Frustration', 'Anger', 'Sadness', 'Fear', 'Drained']
positive_emotions = ['Joy', 'Contentment', 'Love', 'Calm', 'Hope', 'Surprise', 'Nostalgia']

neg_exercise_prob = len(df[(df['Emotion'].isin(negative_emotions)) & (df['Exercise'] == 'yes')]) / len(df[df['Emotion'].isin(negative_emotions)])
pos_exercise_prob = len(df[(df['Emotion'].isin(positive_emotions)) & (df['Exercise'] == 'yes')]) / len(df[df['Emotion'].isin(positive_emotions)])

print(f"\nExercise probability during negative emotions: {neg_exercise_prob:.2%}")
print(f"Exercise probability during positive emotions: {pos_exercise_prob:.2%}")