# Tool to visualize usage log

In [None]:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Voila Dashboard for Data Analysis
Run with: voila notebook_name.ipynb
"""

import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import datetime as dt
from IPython.display import display
import ipywidgets as widgets

# Load data from notebook_usage.log file
df_original = pd.read_csv('notebook_usage.log')

# Data preprocessing for original data
df_original['DateTime'] = pd.to_datetime(df_original['Date'] + ' ' + df_original['Time'])
df_original['DayOfWeek'] = df_original['DateTime'].dt.day_name()
df_original['Hour'] = df_original['DateTime'].dt.hour
df_original['DayOfWeekNum'] = df_original['DateTime'].dt.dayofweek  # For ordering

# Initialize filtered dataframe
df = df_original.copy()

# Create dashboard title
title_widget = widgets.HTML(
    value="<h1 style='text-align: center; color: #2E86AB;'>Data Usage Dashboard</h1>",
    layout=widgets.Layout(margin='20px 0')
)

# Create collapsible filter controls
filter_toggle_button = widgets.Button(
    description='▶ Show Filters',
    button_style='info',
    layout=widgets.Layout(width='150px', margin='10px 0')
)

filter_content = widgets.VBox(layout=widgets.Layout(display='none'))  # Hidden by default

# Get unique values
all_users = sorted(df_original['User'].unique().tolist())
all_apps = sorted(df_original['App'].unique().tolist())

# User exclusion filter (checkboxes)
user_exclude_title = widgets.HTML(value="<b>Exclude Users:</b>")
user_checkboxes = {}
user_checkbox_widgets = []

for user in all_users:
    checkbox = widgets.Checkbox(
        value=False,  # False means not excluded by default
        description=f'{user} ({df_original[df_original["User"] == user].shape[0]} entries)',
        style={'description_width': 'initial'},
        layout=widgets.Layout(width='300px')
    )
    user_checkboxes[user] = checkbox
    user_checkbox_widgets.append(checkbox)

user_filter_box = widgets.VBox([user_exclude_title] + user_checkbox_widgets[:len(user_checkbox_widgets)//2])
user_filter_box2 = widgets.VBox([widgets.HTML(value="<br>")] + user_checkbox_widgets[len(user_checkbox_widgets)//2:])

# App exclusion filter (checkboxes)  
app_exclude_title = widgets.HTML(value="<b>Exclude Apps:</b>")
app_checkboxes = {}
app_checkbox_widgets = []

for app in all_apps:
    checkbox = widgets.Checkbox(
        value=False,  # False means not excluded by default
        description=f'{app} ({df_original[df_original["App"] == app].shape[0]} entries)',
        style={'description_width': 'initial'},
        layout=widgets.Layout(width='300px')
    )
    app_checkboxes[app] = checkbox
    app_checkbox_widgets.append(checkbox)

app_filter_box = widgets.VBox([app_exclude_title] + app_checkbox_widgets)

# Date range filter
min_date = df_original['DateTime'].dt.date.min()
max_date = df_original['DateTime'].dt.date.max()

date_filter_title = widgets.HTML(value="<b>Date Range:</b>")
start_date_picker = widgets.DatePicker(
    description='From:',
    value=min_date,
    disabled=False
)

end_date_picker = widgets.DatePicker(
    description='To:',
    value=max_date,
    disabled=False
)

# Quick exclude buttons
quick_exclude_title = widgets.HTML(value="<b>Quick Actions:</b>")
exclude_stewards_btn = widgets.Button(
    description='Exclude Stewards',
    button_style='warning',
    layout=widgets.Layout(width='140px', margin='2px'),
    tooltip='Exclude edgar and michaelgoette'
)

# Control buttons
reset_button = widgets.Button(
    description='Reset All Filters',
    button_style='info',
    layout=widgets.Layout(width='150px', margin='10px 5px')
)

update_button = widgets.Button(
    description='Update Charts',
    button_style='primary',
    layout=widgets.Layout(width='150px', margin='10px 5px')
)

# Auto-update checkbox
auto_update_checkbox = widgets.Checkbox(
    value=True,
    description='Auto-update charts',
    style={'description_width': 'initial'}
)

# Populate filter content
filters_row1 = widgets.HBox([
    user_filter_box, 
    user_filter_box2, 
    app_filter_box
], layout=widgets.Layout(align_items='flex-start'))

date_section = widgets.VBox([
    date_filter_title,
    widgets.HBox([start_date_picker, end_date_picker])
])

quick_actions = widgets.VBox([
    quick_exclude_title,
    exclude_stewards_btn
])

controls_section = widgets.HBox([
    date_section,
    widgets.HTML(value="&nbsp;" * 10),  # spacer
    quick_actions
], layout=widgets.Layout(align_items='flex-start'))

filter_buttons = widgets.HBox([
    auto_update_checkbox,
    widgets.HTML(value="&nbsp;" * 5),  # spacer
    update_button, 
    reset_button
])

# Add all filter content to the collapsible container
filter_content.children = [
    filters_row1,
    controls_section,
    filter_buttons
]

# Create output widgets for each plot
out1 = widgets.Output()
out2 = widgets.Output()
out3 = widgets.Output()
out4 = widgets.Output()
out5 = widgets.Output()

# Create summary widget
summary_widget = widgets.HTML()

# Function to update data based on filters
def update_filtered_data():
    global df
    
    # Start with original data
    filtered_df = df_original.copy()
    
    # User exclusion filter
    excluded_users = [user for user, checkbox in user_checkboxes.items() if checkbox.value]
    if excluded_users:
        filtered_df = filtered_df[~filtered_df['User'].isin(excluded_users)]
    
    # App exclusion filter
    excluded_apps = [app for app, checkbox in app_checkboxes.items() if checkbox.value]
    if excluded_apps:
        filtered_df = filtered_df[~filtered_df['App'].isin(excluded_apps)]
    
    # Date range filter
    start_date = start_date_picker.value
    end_date = end_date_picker.value
    if start_date and end_date:
        filtered_df = filtered_df[
            (filtered_df['DateTime'].dt.date >= start_date) & 
            (filtered_df['DateTime'].dt.date <= end_date)
        ]
    
    df = filtered_df
    return df

# Function to create all charts
def create_charts():
    # Clear previous outputs
    for out in [out1, out2, out3, out4, out5]:
        out.clear_output()
    
    if len(df) == 0:
        with out1:
            print("No data matches the current filters.")
        return
    
    # 1. Histogram per days of the week
    day_counts = df.groupby(['DayOfWeek', 'DayOfWeekNum']).size().reset_index(name='Count')
    day_counts = day_counts.sort_values('DayOfWeekNum')

    fig1 = px.bar(
        day_counts, 
        x='DayOfWeek', 
        y='Count',
        title='Usage by Day of Week',
        color='Count',
        color_continuous_scale='Blues'
    )
    fig1.update_layout(
        title_x=0.5,
        height=400,
        xaxis_title="Day of Week",
        yaxis_title="Number of Uses"
    )

    # 2. Histogram per hour of the day
    hour_counts = df.groupby('Hour').size().reset_index(name='Count')

    fig2 = px.bar(
        hour_counts,
        x='Hour',
        y='Count',
        title='Usage by Hour of Day',
        color='Count',
        color_continuous_scale='Greens'
    )
    fig2.update_layout(
        title_x=0.5,
        height=400,
        xaxis_title="Hour of Day",
        yaxis_title="Number of Uses",
        xaxis=dict(tickmode='linear', tick0=0, dtick=2)
    )

    # 3. Column plot by person (ordered by decreasing)
    user_counts = df.groupby('User').size().reset_index(name='Count')
    user_counts = user_counts.sort_values('Count', ascending=False)

    fig3 = px.bar(
        user_counts,
        x='User',
        y='Count',
        title='Usage by Person (Decreasing Order)',
        color='Count',
        color_continuous_scale='Reds'
    )
    fig3.update_layout(
        title_x=0.5,
        height=400,
        xaxis_title="User",
        yaxis_title="Number of Uses"
    )

    # 4. Column plot by app
    app_counts = df.groupby('App').size().reset_index(name='Count')

    fig4 = px.bar(
        app_counts,
        x='App',
        y='Count',
        title='Usage by Application',
        color='Count',
        color_continuous_scale='Oranges'
    )
    fig4.update_layout(
        title_x=0.5,
        height=400,
        xaxis_title="Application",
        yaxis_title="Number of Uses"
    )

    # 5. Column plot by file (ordered by decreasing)
    file_counts = df.groupby('File').size().reset_index(name='Count')
    file_counts = file_counts.sort_values('Count', ascending=False)

    # Truncate long file names for better display
    file_counts['FileShort'] = file_counts['File'].apply(
        lambda x: x.split('/')[-1] if '/' in x else x
    )

    fig5 = px.bar(
        file_counts,
        x='FileShort',
        y='Count',
        title='Usage by File (Decreasing Order)',
        color='Count',
        color_continuous_scale='Purples',
        hover_data=['File']  # Show full path on hover
    )
    fig5.update_layout(
        title_x=0.5,
        height=400,
        xaxis_title="File",
        yaxis_title="Number of Uses",
        xaxis_tickangle=-45
    )

    # Display charts in output widgets
    with out1:
        fig1.show()
        
    with out2:
        fig2.show()
        
    with out3:
        fig3.show()
        
    with out4:
        fig4.show()
        
    with out5:
        fig5.show()
    
    # Update summary statistics
    update_summary_stats()

# Function to update summary statistics
def update_summary_stats():
    if len(df) > 0:
        user_counts = df.groupby('User').size().reset_index(name='Count').sort_values('Count', ascending=False)
        file_counts = df.groupby('File').size().reset_index(name='Count').sort_values('Count', ascending=False)
        
        summary_stats = f"""
        <div style='background-color: #f0f0f0; padding: 20px; margin: 20px 0; border-radius: 10px;'>
        <h3 style='color: #2E86AB;'>Summary Statistics (Filtered Data)</h3>
        <ul>
        <li><strong>Total Records:</strong> {len(df)} (of {len(df_original)} total)</li>
        <li><strong>Unique Users:</strong> {df['User'].nunique()}</li>
        <li><strong>Unique Apps:</strong> {df['App'].nunique()}</li>
        <li><strong>Unique Files:</strong> {df['File'].nunique()}</li>
        <li><strong>Date Range:</strong> {df['Date'].min()} to {df['Date'].max()}</li>
        <li><strong>Most Active User:</strong> {user_counts.iloc[0]['User']} ({user_counts.iloc[0]['Count']} uses)</li>
        <li><strong>Most Used File:</strong> {file_counts.iloc[0]['File'].split('/')[-1]} ({file_counts.iloc[0]['Count']} uses)</li>
        </ul>
        </div>
        """
    else:
        summary_stats = """
        <div style='background-color: #f0f0f0; padding: 20px; margin: 20px 0; border-radius: 10px;'>
        <h3 style='color: #2E86AB;'>Summary Statistics</h3>
        <p>No data matches the current filters.</p>
        </div>
        """
    
    summary_widget.value = summary_stats

# Function to toggle filter visibility
def toggle_filters(b):
    if filter_content.layout.display == 'none':
        filter_content.layout.display = 'block'
        filter_toggle_button.description = '▼ Hide Filters'
        filter_toggle_button.button_style = 'warning'
    else:
        filter_content.layout.display = 'none'
        filter_toggle_button.description = '▶ Show Filters'
        filter_toggle_button.button_style = 'info'

filter_toggle_button.on_click(toggle_filters)
# Event handlers
# Event handlers
def on_update_click(b):
    update_filtered_data()
    create_charts()

def on_reset_click(b):
    # Reset all user checkboxes
    for checkbox in user_checkboxes.values():
        checkbox.value = False
    # Reset all app checkboxes  
    for checkbox in app_checkboxes.values():
        checkbox.value = False
    # Reset date pickers
    start_date_picker.value = min_date
    end_date_picker.value = max_date
    update_filtered_data()
    create_charts()

def on_exclude_stewards_click(b):
    # Exclude edgar and michaelgoette (stewards)
    stewards = ['edgar', 'michaelgoette']
    for steward in stewards:
        if steward in user_checkboxes:
            user_checkboxes[steward].value = True
    if auto_update_checkbox.value:
        update_filtered_data()
        create_charts()

def on_checkbox_change(change):
    if auto_update_checkbox.value:
        update_filtered_data()
        create_charts()

def on_date_change(change):
    if auto_update_checkbox.value:
        update_filtered_data()
        create_charts()

# Bind event handlers
update_button.on_click(on_update_click)
reset_button.on_click(on_reset_click)
exclude_stewards_btn.on_click(on_exclude_stewards_click)

# Bind checkbox change events
for checkbox in user_checkboxes.values():
    checkbox.observe(on_checkbox_change, names='value')
for checkbox in app_checkboxes.values():
    checkbox.observe(on_checkbox_change, names='value')

# Bind date picker events
start_date_picker.observe(on_date_change, names='value')
end_date_picker.observe(on_date_change, names='value')

# Display dashboard
display(title_widget)
display(filter_toggle_button)
display(filter_content)

# Create a tabbed interface for better organization
tab_contents = [out1, out2, out3, out4, out5]
tab_titles = [
    'Days of Week', 
    'Hours of Day', 
    'By Person', 
    'By App', 
    'By File'
]

tab = widgets.Tab(children=tab_contents)
for i, title in enumerate(tab_titles):
    tab.set_title(i, title)

display(tab)
display(summary_widget)

# Initial setup
create_charts()

print("Dashboard loaded successfully!")
print("✓ Click '▶ Show Filters' to open the filter panel")
print("✓ Use 'Exclude Stewards' to quickly exclude edgar and michaelgoette")
print("✓ Auto-update enabled - changes apply immediately")
print("✓ Filters are hidden by default for cleaner interface")
print("To run with Voila: voila your_notebook_name.ipynb")

# Tool to save a zipfile with all the contents

In [3]:
import zipfile
import os
from IPython.display import FileLink
from datetime import datetime

def create_folder_zip(folder_path, zip_name):
    """Create a zip file of the entire folder"""
    if not os.path.exists(folder_path):
        print(f"Error: Folder '{folder_path}' does not exist!")
        return None
    
    file_count = 0
    with zipfile.ZipFile(zip_name, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                file_path = os.path.join(root, file)
                # Use relative path within the folder
                arcname = os.path.relpath(file_path, folder_path)
                zipf.write(file_path, arcname)
                file_count += 1
                if file_count % 5 == 0:  # Show progress every 5 files
                    print(f"Added {file_count} files...")
    
    print(f"\nCreated {zip_name} with {file_count} files")
    return FileLink(zip_name)

# Get today's date
today = datetime.now().strftime("%Y%m%d")
zip_filename = f"analysis_tools_backup_{today}.zip"

# Use current directory since you're already in the target folder
folder_path = "."

print("Creating backup of current directory...")
download_link = create_folder_zip(folder_path, zip_filename)
if download_link:
    download_link

Creating backup of current directory...
Added 5 files...
Added 10 files...
Added 15 files...
Added 20 files...
Added 25 files...
Added 30 files...
Added 35 files...
Added 40 files...
Added 45 files...
Added 50 files...
Added 55 files...
Added 60 files...
Added 65 files...
Added 70 files...
Added 75 files...
Added 80 files...
Added 85 files...
Added 90 files...
Added 95 files...
Added 100 files...
Added 105 files...
Added 110 files...
Added 115 files...
Added 120 files...
Added 125 files...
Added 130 files...
Added 135 files...
Added 140 files...
Added 145 files...
Added 150 files...
Added 155 files...
Added 160 files...
Added 165 files...
Added 170 files...
Added 175 files...
Added 180 files...
Added 185 files...
Added 190 files...
Added 195 files...
Added 200 files...
Added 205 files...
Added 210 files...
Added 215 files...
Added 220 files...
Added 225 files...
Added 230 files...

Created analysis_tools_backup_20250526.zip with 233 files
