In [None]:
import pandas as pd
import json
from google.colab import files
from datetime import datetime, timedelta
import numpy as np
from IPython.display import display, Markdown
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import calendar

class CustomJSONEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, (np.integer)):
            return int(obj)
        if isinstance(obj, (np.floating)):
            return float(obj)
        if isinstance(obj, (datetime, pd.Timestamp)):
            return obj.isoformat()
        return super().default(obj)

def load_and_clean_data():
    """Handles file upload and data cleaning with enhanced preprocessing"""
    print("Please upload your Excel file:")
    try:
        uploaded = files.upload()
        if not uploaded:
            raise ValueError("No file uploaded.")
        filename = list(uploaded.keys())[0]
        df = pd.read_excel(filename, header=3)
    except Exception as e:
        print(f"Error uploading or reading file: {e}")
        return None, None, None, None

    # Validate required columns
    required_columns = ['EVENT DAY', 'RESOLVED TIME', 'SOURCE NAME', 'NAME', 'PATH', 'CRITICALITY', 'CLIENT NAME']
    missing_cols = [col for col in required_columns if col not in df.columns]
    if missing_cols:
        print(f"Missing required columns: {', '.join(missing_cols)}")
        return None, None, None, None

    # Enhanced data cleaning
    df['EVENT DAY'] = pd.to_datetime(df['EVENT DAY'], format='%d %b %Y %H:%M:%S', errors='coerce', dayfirst=True)
    df['RESOLVED TIME'] = pd.to_datetime(df['RESOLVED TIME'], errors='coerce')
    df['CLIENT NAME'] = df['CLIENT NAME'].replace({'americanauae': 'UAE', 'americanaksa': 'KSA'})
    df['SOURCE NAME'] = df['SOURCE NAME'].str.strip()

    # Calculate time metrics
    df['resolution_minutes'] = (df['RESOLVED TIME'] - df['EVENT DAY']).dt.total_seconds() / 60
    df['response_minutes'] = df.groupby('SOURCE NAME')['resolution_minutes'].transform('mean')

    # Extract temporal features
    df['month'] = df['EVENT DAY'].dt.strftime('%B')
    df['week_of_month'] = (df['EVENT DAY'].dt.day - 1) // 7 + 1
    df['week'] = df['EVENT DAY'].dt.isocalendar().week
    df['day_of_week'] = df['EVENT DAY'].dt.day_name()
    df['hour'] = df['EVENT DAY'].dt.hour
    df['date'] = df['EVENT DAY'].dt.date

    # Extract location hierarchy from PATH
    df['country'] = df['PATH'].str.split('/').str[0]
    df['city'] = df['PATH'].str.split('/').str[1].fillna('Unknown')

    # Categorize time periods
    df['time_of_day'] = pd.cut(df['hour'],
                              bins=[0, 6, 12, 18, 24],
                              labels=['Night', 'Morning', 'Afternoon', 'Evening'],
                              right=False)

    # Get dataset date range
    min_date = df['EVENT DAY'].min()
    max_date = df['EVENT DAY'].max()

    # Filter valid resolutions
    valid_resolutions = df.dropna(subset=['RESOLVED TIME'])
    valid_resolutions = valid_resolutions[valid_resolutions['RESOLVED TIME'] >= valid_resolutions['EVENT DAY']]

    return df, valid_resolutions, min_date, max_date

def visualize_data(df, min_date, max_date):
    """Enhanced visualization with comprehensive insights"""
    print("\nGenerating Comprehensive Visualizations...")

    # Set style
    sns.set(style="whitegrid")
    plt.figure(figsize=(15, 20))

    # 1. Overall Alarm Statistics
    fig = make_subplots(rows=3, cols=2,
                       specs=[[{'type':'domain'}, {'type':'domain'}],
                             [{'type':'xy'}, {'type':'xy'}],
                             [{'colspan':2}, None]],
                       subplot_titles=("Alarm Criticality Distribution", "Client Distribution",
                                      "Top 20 Equipment by Alarm Count", "Top 20 Alarm Conditions",
                                      "Alarms Over Time"))

    # Criticality pie chart
    crit_counts = df['CRITICALITY'].value_counts()
    fig.add_trace(go.Pie(labels=crit_counts.index, values=crit_counts.values, name="Criticality"), 1, 1)

    # Client distribution
    client_counts = df['CLIENT NAME'].value_counts()
    fig.add_trace(go.Pie(labels=client_counts.index, values=client_counts.values, name="Client"), 1, 2)

    # Top equipment
    top_equip = df['SOURCE NAME'].value_counts().nlargest(20)
    fig.add_trace(go.Bar(x=top_equip.values, y=top_equip.index, orientation='h', name="Equipment"), 2, 1)

    # Top conditions
    top_conds = df['NAME'].value_counts().nlargest(20)
    fig.add_trace(go.Bar(x=top_conds.values, y=top_conds.index, orientation='h', name="Conditions"), 2, 2)

    # Alarms over time
    time_series = df.resample('D', on='EVENT DAY').size()
    fig.add_trace(go.Scatter(x=time_series.index, y=time_series.values, mode='lines', name="Daily Alarms"), 3, 1)

    fig.update_layout(height=1200, showlegend=True, title_text="Overall Alarm Statistics")
    fig.show()

    # 2. Temporal Patterns
    fig = make_subplots(rows=2, cols=2,
                       subplot_titles=("Alarms by Hour of Day", "Alarms by Day of Week",
                                      "Alarms by Month", "Alarms by Time of Day"))

    # Hourly distribution
    hour_counts = df['hour'].value_counts().sort_index()
    fig.add_trace(go.Bar(x=hour_counts.index, y=hour_counts.values, name="Hour"), 1, 1)

    # Day of week
    day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    day_counts = df['day_of_week'].value_counts().reindex(day_order)
    fig.add_trace(go.Bar(x=day_counts.index, y=day_counts.values, name="Day"), 1, 2)

    # Monthly
    month_order = list(calendar.month_name)[1:]
    month_counts = df['month'].value_counts().reindex(month_order)
    fig.add_trace(go.Bar(x=month_counts.index, y=month_counts.values, name="Month"), 2, 1)

    # Time of day
    time_counts = df['time_of_day'].value_counts()
    fig.add_trace(go.Bar(x=time_counts.index, y=time_counts.values, name="Time Period"), 2, 2)

    fig.update_layout(height=800, showlegend=False, title_text="Temporal Alarm Patterns")
    fig.show()

    # 3. Geographical Analysis
    if 'country' in df.columns:
        fig = px.treemap(df, path=['country', 'city', 'SOURCE NAME'],
                         title='Alarm Distribution by Geography and Equipment')
        fig.show()

    # 4. Resolution Time Analysis
    resolved_df = df[df['resolution_minutes'].notna()]
    if not resolved_df.empty:
        fig = make_subplots(rows=1, cols=2,
                           subplot_titles=("Resolution Time Distribution", "Resolution Time by Criticality"))

        # Histogram
        fig.add_trace(go.Histogram(x=resolved_df['resolution_minutes'], nbinsx=50, name="Resolution Time"), 1, 1)

        # Boxplot by criticality
        fig.add_trace(go.Box(x=resolved_df['CRITICALITY'], y=resolved_df['resolution_minutes'], name="By Criticality"), 1, 2)

        fig.update_layout(height=500, title_text="Alarm Resolution Time Analysis")
        fig.show()

    # 5. Equipment Performance Dashboard
    equip_stats = df.groupby('SOURCE NAME').agg(
        total_alarms=('NAME', 'count'),
        avg_resolution=('resolution_minutes', 'mean'),
        critical_alarms=('CRITICALITY', lambda x: (x == 'CRITICAL').sum()))

    fig = make_subplots(rows=1, cols=2,
                       specs=[[{'type':'xy'}, {'type':'xy'}]],
                       subplot_titles=("Equipment by Alarm Count", "Equipment by Resolution Time"))

    top_equip = equip_stats.sort_values('total_alarms', ascending=False).head(20)
    fig.add_trace(go.Bar(x=top_equip.index, y=top_equip['total_alarms'], name="Alarm Count"), 1, 1)

    slow_equip = equip_stats[equip_stats['avg_resolution'].notna()].sort_values('avg_resolution', ascending=False).head(20)
    fig.add_trace(go.Bar(x=slow_equip.index, y=slow_equip['avg_resolution'], name="Avg Resolution (min)"), 1, 2)

    fig.update_layout(height=600, title_text="Equipment Performance Dashboard")
    fig.show()

def analyze_equipment_performance(data):
    """Enhanced equipment performance analysis"""
    equip_stats = data.groupby('SOURCE NAME').agg({
        'NAME': ['count', lambda x: x.nunique()],
        'CRITICALITY': lambda x: (x == 'CRITICAL').sum(),
        'resolution_minutes': ['mean', 'median', 'std'],
        'PATH': 'first',
        'CLIENT NAME': 'first'
    }).reset_index()

    equip_stats.columns = [
        'Equipment', 'Total Alarms', 'Unique Alarm Types', 'Critical Alarms',
        'Mean Resolution (min)', 'Median Resolution (min)', 'Resolution Std Dev',
        'Path', 'Client'
    ]

    # Calculate alarm frequency metrics
    date_range = (data['EVENT DAY'].max() - data['EVENT DAY'].min()).days
    equip_stats['Alarms per Day'] = equip_stats['Total Alarms'] / date_range if date_range > 0 else 0

    return equip_stats.sort_values('Total Alarms', ascending=False)

def analyze_alarm_patterns(data):
    """Comprehensive alarm pattern analysis"""
    patterns = {}

    # Alarm type analysis
    alarm_types = data.groupby('NAME').agg({
        'SOURCE NAME': ['count', 'nunique'],
        'CRITICALITY': lambda x: x.value_counts().to_dict(),
        'resolution_minutes': 'mean'
    }).reset_index()

    alarm_types.columns = [
        'Alarm Type', 'Total Occurrences', 'Affected Equipment Count',
        'Criticality Distribution', 'Average Resolution (min)'
    ]
    patterns['alarm_types'] = alarm_types.sort_values('Total Occurrences', ascending=False).to_dict('records')

    # Temporal patterns
    temporal = {
        'by_hour': data['hour'].value_counts().sort_index().to_dict(),
        'by_day': data['day_of_week'].value_counts().to_dict(),
        'by_month': data['month'].value_counts().to_dict()
    }
    patterns['temporal_patterns'] = temporal

    # Geographical patterns
    if 'country' in data.columns:
        geo_patterns = data.groupby(['country', 'city']).agg({
            'SOURCE NAME': 'count',
            'CRITICALITY': lambda x: (x == 'CRITICAL').mean()
        }).reset_index()
        geo_patterns.columns = ['Country', 'City', 'Total Alarms', 'Critical Alarm Ratio']
        patterns['geographical_patterns'] = geo_patterns.to_dict('records')

    return patterns

def analyze_maintenance_efficiency(data):
    """Maintenance performance analysis"""
    efficiency = {}

    # Resolution time analysis
    resolved = data[data['resolution_minutes'].notna()]
    if not resolved.empty:
        resolution_stats = resolved.groupby('NAME').agg({
            'resolution_minutes': ['mean', 'median', 'std', 'count']
        }).reset_index()
        resolution_stats.columns = [
            'Alarm Type', 'Mean Resolution', 'Median Resolution',
            'Resolution Std Dev', 'Count'
        ]
        efficiency['resolution_times'] = resolution_stats.sort_values('Mean Resolution', ascending=False).to_dict('records')

    # Recurring issues
    recurring = data.groupby(['SOURCE NAME', 'NAME']).size().reset_index(name='count')
    recurring = recurring[recurring['count'] > 1].sort_values('count', ascending=False)
    efficiency['recurring_issues'] = recurring.to_dict('records')

    # Unresolved alarms
    unresolved = data[data['RESOLVED TIME'].isna()]
    if not unresolved.empty:
        efficiency['unresolved_alarms'] = {
            'count': len(unresolved),
            'by_equipment': unresolved['SOURCE NAME'].value_counts().head(20).to_dict(),
            'by_type': unresolved['NAME'].value_counts().head(20).to_dict()
        }

    return efficiency

def save_and_download(data, filename_prefix):
    """Save analysis results to JSON and download"""
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"{filename_prefix}_{timestamp}.json"
    with open(filename, 'w') as f:
        json.dump(data, f, indent=2, cls=CustomJSONEncoder)
    files.download(filename)
    print(f"Downloaded: {filename}")

def main():
    df, valid_resolutions, min_date, max_date = load_and_clean_data()
    if df is None or valid_resolutions is None:
        print("Exiting due to data loading error.")
        return

    # Visualize the data first
    visualize_data(df, min_date, max_date)

    # Perform comprehensive analysis
    equipment_analysis = analyze_equipment_performance(df)
    alarm_patterns = analyze_alarm_patterns(df)
    maintenance_efficiency = analyze_maintenance_efficiency(df)

    while True:
        print("\nMAIN MENU")
        print("1. Equipment Performance Analysis")
        print("2. Alarm Pattern Analysis")
        print("3. Maintenance Efficiency Analysis")
        print("4. Export All Results")
        print("5. Exit")

        main_choice = input("Select option: ")

        if main_choice == "1":
            display(Markdown("## Equipment Performance Analysis"))
            display(equipment_analysis.head(20))
            save_and_download(equipment_analysis.to_dict('records'), "Equipment_Performance")

        elif main_choice == "2":
            display(Markdown("## Alarm Pattern Analysis"))
            display(pd.DataFrame(alarm_patterns['alarm_types']).head(20))
            save_and_download(alarm_patterns, "Alarm_Patterns")

        elif main_choice == "3":
            display(Markdown("## Maintenance Efficiency Analysis"))
            display(pd.DataFrame(maintenance_efficiency['resolution_times']).head(20))
            save_and_download(maintenance_efficiency, "Maintenance_Efficiency")

        elif main_choice == "4":
            full_results = {
                "equipment_performance": equipment_analysis.to_dict('records'),
                "alarm_patterns": alarm_patterns,
                "maintenance_efficiency": maintenance_efficiency,
                "metadata": {
                    "date_range": f"{min_date} to {max_date}",
                    "total_alarms": len(df),
                    "total_equipment": df['SOURCE NAME'].nunique(),
                    "total_alarm_types": df['NAME'].nunique()
                }
            }
            save_and_download(full_results, "Full_Analysis_Results")
            print("All results exported")

        elif main_choice == "5":
            print("Exiting program...")
            break

        else:
            print("Invalid selection")

if __name__ == "__main__":
    main()

Please upload your Excel file:


Saving combine.xlsx to combine.xlsx

Generating Comprehensive Visualizations...



MAIN MENU
1. Equipment Performance Analysis
2. Alarm Pattern Analysis
3. Maintenance Efficiency Analysis
4. Export All Results
5. Exit
