In [None]:
import pandas as pd
import json
from google.colab import files
from datetime import datetime
import numpy as np
from IPython.display import display, Markdown

class CustomJSONEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, (np.integer)):
            return int(obj)
        if isinstance(obj, (np.floating)):
            return float(obj)
        if isinstance(obj, (datetime, pd.Timestamp)):
            return obj.isoformat()
        return super().default(obj)

def load_and_clean_data():
    """Handles file upload and data cleaning"""
    print("Please upload your Excel file:")
    try:
        uploaded = files.upload()
        if not uploaded:
            raise ValueError("No file uploaded.")
        filename = list(uploaded.keys())[0]
        df = pd.read_excel(filename, header=3)
    except Exception as e:
        print(f"Error uploading or reading file: {e}")
        return None, None

    # Validate required columns
    required_columns = ['EVENT DAY', 'RESOLVED TIME', 'SOURCE NAME', 'NAME', 'PATH']
    missing_cols = [col for col in required_columns if col not in df.columns]
    if missing_cols:
        print(f"Missing required columns: {', '.join(missing_cols)}")
        return None, None

    # Data cleaning
    df['EVENT DAY'] = pd.to_datetime(df['EVENT DAY'], format='%d %b %Y %H:%M:%S', errors='coerce', dayfirst=True)
    df['RESOLVED TIME'] = pd.to_datetime(df['RESOLVED TIME'], errors='coerce')
    df['CLIENT NAME'] = df.get('CLIENT NAME', '').replace({'americanauae': 'UAE', 'americanaksa': 'KSA'})
    df['SOURCE NAME'] = df['SOURCE NAME'].str.strip()
    df['resolution_minutes'] = (df['RESOLVED TIME'] - df['EVENT DAY']).dt.total_seconds() / 60
    # Use full month names
    df['month'] = df['EVENT DAY'].dt.strftime('%B')
    df['week_of_month'] = (df['EVENT DAY'].dt.day - 1) // 7 + 1
    df['week'] = df['EVENT DAY'].dt.isocalendar().week

    # Filter valid resolutions
    valid_resolutions = df.dropna(subset=['RESOLVED TIME'])
    valid_resolutions = valid_resolutions[valid_resolutions['RESOLVED TIME'] >= valid_resolutions['EVENT DAY']]

    # Debug: Print month distribution
    print("All months in dataset:", df['month'].value_counts().to_dict())
    print("Months in valid_resolutions:", valid_resolutions['month'].value_counts().to_dict())

    return df, valid_resolutions

def analyze_equipment_source(data, full_data):
    """Analyze equipment sources, including multiple alarm names with counts"""
    results = []

    # Group by SOURCE NAME to get alarm counts and details
    name_count = full_data.groupby('SOURCE NAME').agg({
        'NAME': lambda x: dict(x.value_counts()),  # Count each alarm type
        'PATH': 'first',
        'month': lambda x: sorted(x.unique(), key=lambda m: pd.to_datetime(m, format='%B').month),  # List all unique months, sorted
        'week_of_month': lambda x: [f"Week {w}" for w in sorted(x.unique())],  # List all unique weeks of month
        'resolution_minutes': [
            'size',  # Total count (includes NaN)
            lambda x: x.isna().sum(),  # Unresolved
            lambda x: x.notna().sum(),  # Resolved
            'mean'  # Mean resolution time for resolved
        ]
    }).reset_index()

    name_count.columns = ['Source name', 'Name', 'Path', 'Month', 'Week of Month', 'Total Count', 'Unresolved', 'Resolved', 'Time taken to resolve in minutes']

    # Format Name as list of "Alarm: Count"
    name_count['Name'] = name_count['Name'].apply(lambda x: [f"{k}: {v}" for k, v in x.items()])
    name_count['Time taken to resolve in minutes'] = name_count['Time taken to resolve in minutes'].round(2)
    name_count = name_count.sort_values('Total Count', ascending=False)

    results = name_count.to_dict('records')

    return results

def analyze_specific_equipment(data, source_name):
    """Analyze a specific equipment by SOURCE NAME"""
    # Case-insensitive matching for SOURCE NAME
    source_name = next((s for s in data['SOURCE NAME'].unique() if s.lower() == source_name.lower()), source_name)
    filtered_data = data[data['SOURCE NAME'] == source_name]
    if filtered_data.empty:
        return f"No data available for equipment: {source_name}"

    results = filtered_data.groupby('SOURCE NAME').agg({
        'NAME': lambda x: dict(x.value_counts()),  # Count each alarm type
        'PATH': 'first',
        'month': lambda x: sorted(x.unique(), key=lambda m: pd.to_datetime(m, format='%B').month),  # List all unique months, sorted
        'week_of_month': lambda x: [f"Week {w}" for w in sorted(x.unique())],  # List all unique weeks of month
        'resolution_minutes': [
            'size',  # Total count (includes NaN)
            lambda x: x.isna().sum(),  # Unresolved
            lambda x: x.notna().sum(),  # Resolved
            'mean'  # Mean resolution time for resolved
        ]
    }).reset_index()

    results.columns = ['Source name', 'Name', 'Path', 'Month', 'Week of Month', 'Total Count', 'Unresolved', 'Resolved', 'Time taken to resolve in minutes']
    results['Name'] = results['Name'].apply(lambda x: [f"{k}: {v}" for k, v in x.items()])
    results['Time taken to resolve in minutes'] = results['Time taken to resolve in minutes'].round(2)
    results = results.sort_values('Total Count', ascending=False)

    return results.to_dict('records')

def analyze_condition_name(data):
    """Analyze all alarm conditions dynamically"""
    results = {}
    conditions = data['NAME'].unique()

    for cond in conditions:
        cond_key = cond.lower().replace(' ', '_')
        cond_data = data[data['NAME'] == cond]
        if not cond_data.empty:
            cond_analysis = cond_data.groupby('SOURCE NAME').agg({
                'PATH': 'first',
                'resolution_minutes': ['count', 'mean']
            }).reset_index()
            cond_analysis.columns = ['Source name', 'Path', 'count', 'time taken']
            cond_analysis['time taken'] = cond_analysis['time taken'].round(2)
            cond_analysis = cond_analysis.sort_values('count', ascending=False)
            results[cond_key] = cond_analysis.to_dict('records')
        else:
            results[cond_key] = f"No data available for condition: {cond}"

    return results

def analyze_specific_condition(data, condition):
    """Analyze a specific condition"""
    # Case-insensitive matching for condition
    condition = next((c for c in data['NAME'].unique() if c.lower() == condition.lower()), condition)
    cond_data = data[data['NAME'] == condition]
    if cond_data.empty:
        return f"No data available for condition: {condition}"

    results = cond_data.groupby('SOURCE NAME').agg({
        'PATH': 'first',
        'resolution_minutes': ['count', 'mean']
    }).reset_index()
    results.columns = ['Source name', 'Path', 'count', 'time taken']
    results['time taken'] = results['time taken'].round(2)
    results = results.sort_values('count', ascending=False)

    return results.to_dict('records')

def save_and_download(data, filename_prefix):
    """Save analysis results to JSON and download"""
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"{filename_prefix}_{timestamp}.json"
    with open(filename, 'w') as f:
        json.dump(data, f, indent=2, cls=CustomJSONEncoder)
    files.download(filename)
    print(f"✅ Downloaded: {filename}")

def main():
    df, valid_resolutions = load_and_clean_data()
    if df is None or valid_resolutions is None:
        print("Exiting due to data loading error.")
        return

    equipment_results = analyze_equipment_source(valid_resolutions, df)
    condition_results = analyze_condition_name(valid_resolutions)

    while True:
        print("\n🔍 MAIN MENU")
        print("1. Alarm Equipment Analysis")
        print("2. Alarm Condition Name Analysis")
        print("3. Export Full Results")
        print("4. Exit")

        main_choice = input("Select option: ")

        if main_choice == "1":
            while True:
                print("\nALARM EQUIPMENT ANALYSIS")
                print("1. All equipment analysis")
                print("2. Type the specific equipment to analyze")
                print("3. Exit")

                equip_choice = input("Select analysis: ")

                if equip_choice == "1":
                    # Preview results
                    display(Markdown("## All Equipment Analysis"))
                    display(pd.DataFrame(equipment_results))
                    save_and_download(equipment_results, "Equipment_All_Analysis")

                elif equip_choice == "2":
                    print("Available equipment:", ", ".join(df['SOURCE NAME'].unique()))
                    source_name = input("Enter equipment name (e.g., KFC Abdul Nasser New - 112750): ")
                    specific_results = analyze_specific_equipment(df, source_name)
                    if isinstance(specific_results, str):
                        print(specific_results)
                    else:
                        display(Markdown(f"## Analysis for Equipment: {source_name}"))
                        display(pd.DataFrame(specific_results))
                        save_and_download(specific_results, f"Equipment_{source_name.replace(' ', '_')}_Analysis")

                elif equip_choice == "3":
                    break
                else:
                    print("Invalid selection. Please choose 1, 2, or 3.")

        elif main_choice == "2":
            while True:
                print("\nALARM CONDITION NAME ANALYSIS")
                print("1. All alarm name condition analysis")
                print("2. Type available condition for analysis")
                print("3. Exit")

                cond_choice = input("Select condition: ")

                if cond_choice == "1":
                    display(Markdown("## All Alarm Condition Analysis"))
                    for cond, result in condition_results.items():
                        display(Markdown(f"### {cond.replace('_', ' ').title()}"))
                        if isinstance(result, str):
                            print(result)
                        else:
                            display(pd.DataFrame(result))
                    save_and_download(condition_results, "Condition_All_Analysis")

                elif cond_choice == "2":
                    print("Available conditions:", ", ".join(df['NAME'].unique()))
                    condition = input("Enter condition name (e.g., Site Not Communicating, Door Open): ")
                    specific_results = analyze_specific_condition(df, condition)
                    if isinstance(specific_results, str):
                        print(specific_results)
                    else:
                        display(Markdown(f"## Analysis for Condition: {condition}"))
                        display(pd.DataFrame(specific_results))
                        save_and_download(specific_results, f"Condition_{condition.replace(' ', '_')}_Analysis")

                elif cond_choice == "3":
                    break
                else:
                    print("Invalid selection. Please choose 1, 2, or 3.")

        elif main_choice == "3":
            full_results = {
                "Equipment_Analysis": equipment_results,
                "Condition_Analysis": condition_results
            }
            save_and_download(full_results, "Full_Analysis_Results")

        elif main_choice == "4":
            print("Exiting program...")
            break
        else:
            print("Invalid selection. Please choose 1, 2, 3, or 4.")

if __name__ == "__main__":
    main()

Please upload your Excel file:


Saving Alarms 2001-3000.xlsx to Alarms 2001-3000.xlsx
All months in dataset: {'May': 1000}
Months in valid_resolutions: {'May': 1000}

🔍 MAIN MENU
1. Alarm Equipment Analysis
2. Alarm Condition Name Analysis
3. Export Full Results
4. Exit
Select option: 1

ALARM EQUIPMENT ANALYSIS
1. All equipment analysis
2. Type the specific equipment to analyze
3. Exit
Select analysis: 1


## All Equipment Analysis

Unnamed: 0,Source name,Name,Path,Month,Week of Month,Total Count,Unresolved,Resolved,Time taken to resolve in minutes
0,T 118709 CH 02,"[Door Open: 8, High Temperature: 2, Extremely ...",UAE/Dubai/TGIF Jumeirah - 118709,[May],[Week 2],11,0,11,55.89
1,H 114828 CH 01,"[Door Open: 7, High Temperature: 2, Extremely ...",UAE/Ajman/Hardees City LifeTallah Mall - 114828,[May],[Week 2],10,0,10,55.55
2,T 5518707 FZ 01,"[High Temperature: 4, Extremely High Temperatu...",KSA/KSA Western/TGIF Al Saneaya Yanbu - 5518707,[May],[Week 2],8,0,8,62.22
3,PH 116916 FZ 01,"[High Temperature: 3, Door Open: 3, Extremely ...",UAE/Dubai/Pizza Hut Nashama Town Square - 116916,[May],[Week 2],8,0,8,76.75
4,PH 116906 CH 01,"[Door Open: 5, Extremely High Temperature: 2, ...",UAE/Dubai/Pizza Hut Al Nahda Dubai - 116906,[May],[Week 2],8,0,8,58.87
...,...,...,...,...,...,...,...,...,...
349,PH 116859 CH 01,[Extremely High Temperature: 1],UAE/Dubai/Pizza Hut Al Warqa - 116859,[May],[Week 2],1,0,1,63.72
350,PH 116864 CH 02,[Door Open: 1],UAE/Ajman/Pizza Hut Mall Ajman - 116864,[May],[Week 2],1,0,1,23.87
351,PH 116864 FZ 01,[Door Open: 1],UAE/Ajman/Pizza Hut Mall Ajman - 116864,[May],[Week 2],1,0,1,17.18
352,PH 116916 CH 01,[Low Temperature: 1],UAE/Dubai/Pizza Hut Nashama Town Square - 116916,[May],[Week 2],1,0,1,56.70


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ Downloaded: Equipment_All_Analysis_20250528_115216.json

ALARM EQUIPMENT ANALYSIS
1. All equipment analysis
2. Type the specific equipment to analyze
3. Exit
