# Visualization - Data

In [2]:

import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px


# Comprehensive Accident Analysis (2000-2020) Historical data analysis

In [3]:
def full_analysis_plotly(raw_data_path):
    # Load and preprocess data
    df = pd.read_csv(raw_data_path)
    
    
    data_filtered = df[df['JAHR'] <= 2020]  # Filter for years up to 2020
    data_filtered = data_filtered[data_filtered['MONAT'] != 'Summe']
    
    # Convert date - ensure MONAT is string first
    data_filtered['date'] = pd.to_datetime(data_filtered['MONAT'].astype(str), format='%Y%m')
    
    # Filter for 'insgesamt' if needed
    data_filtered = data_filtered[data_filtered['AUSPRAEGUNG'] == 'insgesamt']
    
    # Create subplots
    fig = make_subplots(
        rows=3, cols=2,
        subplot_titles=(
            'Absolute Accident Counts (WERT)',
            'Comparison to Previous Year (VORJAHRESWERT)',
            'Monthly Change (%)',
            'Year-over-Year Change (%)',
            '12-Month Moving Average',
            ''
        ),
        vertical_spacing=0.1,
        horizontal_spacing=0.1,
        specs=[[{}, {}], [{}, {}], [{}, None]]  # Last subplot empty
    )
    
    # Get color palette
    colors = px.colors.qualitative.Plotly
    
    # Plot each category
    categories = data_filtered['MONATSZAHL'].unique()
    
    # 1. Absolute values (WERT)
    for i, category in enumerate(categories):
        subset = data_filtered[data_filtered['MONATSZAHL'] == category]
        fig.add_trace(
            go.Scatter(
                x=subset['date'],
                y=subset['WERT'],
                name=category,
                line=dict(color=colors[i])),
            row=1, col=1
        )
    
    # 2. Year-over-year comparison
    for i, category in enumerate(categories):
        subset = data_filtered[data_filtered['MONATSZAHL'] == category]
        fig.add_trace(
            go.Scatter(
                x=subset['date'],
                y=subset['VORJAHRESWERT'],
                name=f"{category} (prev year)",
                line=dict(color=colors[i], dash='dash'),
                showlegend=False),  # Avoid duplicate legend entries
            row=1, col=2
        )
    
    # 3. Monthly changes
    for i, category in enumerate(categories):
        subset = data_filtered[data_filtered['MONATSZAHL'] == category]
        fig.add_trace(
            go.Scatter(
                x=subset['date'],
                y=subset['VERAEND_VORMONAT_PROZENT'],
                name=category,
                line=dict(color=colors[i]),
                showlegend=False),
            row=2, col=1
        )
    
    # 4. Yearly changes
    for i, category in enumerate(categories):
        subset = data_filtered[data_filtered['MONATSZAHL'] == category]
        fig.add_trace(
            go.Scatter(
                x=subset['date'],
                y=subset['VERAEND_VORJAHRESMONAT_PROZENT'],
                name=category,
                line=dict(color=colors[i]),
                showlegend=False),
            row=2, col=2
        )
    
    # 5. Moving averages
    for i, category in enumerate(categories):
        subset = data_filtered[data_filtered['MONATSZAHL'] == category]
        fig.add_trace(
            go.Scatter(
                x=subset['date'],
                y=subset['ZWOELF_MONATE_MITTELWERT'],
                name=category,
                line=dict(color=colors[i]),
                showlegend=False),
            row=3, col=1
        )
    
    # Update layout
    fig.update_layout(
        title_text='Comprehensive Accident Analysis (2000-2020)',
        height=1200,
        width=1400,
        showlegend=True,
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=-0.15,  
            xanchor="center",
            x=0.5,
            font=dict(size=10)  # Smaller font
        ),
        hovermode="x unified",
        margin=dict(b=100)  # Bottom margin for legend
    )
    
    # Add horizontal zero lines
    fig.add_hline(y=0, row=2, col=1, line_dash="dot", line_color="gray")
    fig.add_hline(y=0, row=2, col=2, line_dash="dot", line_color="gray")
    
    # Update axes
    fig.update_xaxes(title_text="Year", row=3, col=1)
    fig.update_yaxes(title_text="Accident Count", row=1, col=1)
    fig.update_yaxes(title_text="Accident Count", row=1, col=2)
    fig.update_yaxes(title_text="% Change", row=2, col=1)
    fig.update_yaxes(title_text="% Change", row=2, col=2)
    fig.update_yaxes(title_text="Moving Average", row=3, col=1)
    
    # Format x-axes
    for i in [1, 2, 3]:
        fig.update_xaxes(tickformat="%Y", row=i, col=1)
        if i < 3:
            fig.update_xaxes(tickformat="%Y", row=i, col=2)
    
    # Save and show
    fig.write_html('../results/interactive_accident_analysis.html', include_plotlyjs='cdn')
    fig.show()

full_analysis_plotly('../data/preprocessed/complete_accidents_data.csv')

In [None]:
def create_visualizations_plotly(data):
    """
    Create interactive visualizations of alcohol-related accidents data using Plotly.
    
    Parameters:
    data (pd.DataFrame): Preprocessed data containing 'time_series' and 'WERT' columns.
    """
    # Input validation
    if 'time_series' not in data.columns or 'WERT' not in data.columns:
        raise ValueError("Data must contain 'time_series' and 'WERT' columns.")
    
    # Convert to datetime if not already
    if not pd.api.types.is_datetime64_any_dtype(data['time_series']):
        data['time_series'] = pd.to_datetime(data['time_series'])
    
    # Create subplots
    fig = make_subplots(
        rows=3, cols=1,
        subplot_titles=(
            'Alcohol-Related Accidents Over Time',
            'Yearly Alcohol-Related Accidents',
            'Average Monthly Distribution'
        ),
        vertical_spacing=0.1
    )
    
    # 1. Monthly trend 
    fig.add_trace(
        go.Scatter(
            x=data['time_series'],
            y=data['WERT'],
            mode='lines+markers',
            name='Monthly Accidents',
            line=dict(color='royalblue')),
        row=1, col=1
    )
    
    # 2. Yearly trend 
    yearly_data = data.groupby(data['time_series'].dt.year)['WERT'].sum().reset_index()
    fig.add_trace(
        go.Bar(
            x=yearly_data['time_series'],
            y=yearly_data['WERT'],
            name='Yearly Total',
            marker=dict(color='coral')),
        row=2, col=1
    )
    
    # 3. Monthly distribution 
    monthly_avg = data.groupby(data['time_series'].dt.month)['WERT'].mean().reset_index()
    fig.add_trace(
        go.Bar(
            x=['Jan','Feb','Mar','Apr','May','Jun',
               'Jul','Aug','Sep','Oct','Nov','Dec'],
            y=monthly_avg['WERT'],
            name='Monthly Average',
            marker=dict(color='lightseagreen')),
        row=3, col=1
    )
    
    # Update layout
    fig.update_layout(
        title_text='Alcohol-Related Accident Analysis',
        height=900,
        width=1200,
        showlegend=True,
        hovermode="x unified"
    )
    
    # Update axes
    fig.update_xaxes(title_text="Date", row=1, col=1)
    fig.update_xaxes(title_text="Year", row=2, col=1)
    fig.update_xaxes(title_text="Month", row=3, col=1)
    
    fig.update_yaxes(title_text="Number of Accidents", row=1, col=1)
    fig.update_yaxes(title_text="Total Accidents", row=2, col=1)
    fig.update_yaxes(title_text="Average Accidents", row=3, col=1)
    
    # Format date axes
    fig.update_xaxes(tickformat="%Y-%m", row=1, col=1)
    fig.update_xaxes(tickformat="%Y", row=2, col=1)
    
    # Save and show
    fig.write_html('../results/interactive_alcohol_accidents.html')
    fig.show()


data = pd.read_csv('../data/preprocessed/alcohol_accidents_preprocessed.csv')
create_visualizations_plotly(data)