In [None]:
import pandas as pd
import json
from google.colab import files
from datetime import datetime
import numpy as np
from IPython.display import display, Markdown

class CustomJSONEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, (np.integer)):
            return int(obj)
        if isinstance(obj, (np.floating)):
            return float(obj)
        if isinstance(obj, (datetime, pd.Timestamp)):
            return obj.isoformat()
        return super().default(obj)

def load_and_clean_data():
    """Handles file upload and data cleaning"""
    print("Please upload your Excel file:")
    uploaded = files.upload()
    filename = list(uploaded.keys())[0]

    # Load and clean data
    df = pd.read_excel(filename, header=3)
    df['EVENT DAY'] = pd.to_datetime(df['EVENT DAY'], errors='coerce')
    df['RESOLVED TIME'] = pd.to_datetime(df['RESOLVED TIME'], errors='coerce')
    df['CLIENT NAME'] = df['CLIENT NAME'].replace({'americanauae': 'UAE', 'americanaksa': 'KSA'})
    df['SOURCE NAME'] = df['SOURCE NAME'].str.strip()
    df['resolution_minutes'] = (df['RESOLVED TIME'] - df['EVENT DAY']).dt.total_seconds() / 60
    df['month'] = df['EVENT DAY'].dt.month_name()
    df['week_of_month'] = (df['EVENT DAY'].dt.day - 1) // 7 + 1
    df['week'] = df['EVENT DAY'].dt.isocalendar().week

    # Filter valid resolutions
    valid_resolutions = df.dropna(subset=['RESOLVED TIME'])
    valid_resolutions = valid_resolutions[valid_resolutions['RESOLVED TIME'] >= valid_resolutions['EVENT DAY']]

    # Debug: Print month distribution
    print("All months in dataset:", df['EVENT DAY'].dt.month_name().value_counts().to_dict())
    print("Months in valid_resolutions:", valid_resolutions['EVENT DAY'].dt.month_name().value_counts().to_dict())

    return df, valid_resolutions

def analyze_equipment_source(data, full_data):
    results = {
        "name_count": {
            "dsc": None,
            "asc": None
        },
        "in_month": {
            "dsc": None,
            "asc": None
        },
        "time_taken_to_resolve": {
            "dsc": None,
            "asc": None
        }
    }

    # 1. Name Count Analysis (using valid_resolutions)
    name_count = data['SOURCE NAME'].value_counts().reset_index()
    name_count.columns = ['Source name', 'Count']
    source_info = data.drop_duplicates('SOURCE NAME').set_index('SOURCE NAME')[['NAME', 'PATH']]
    name_count = name_count.join(source_info, on='Source name')
    results["name_count"]["dsc"] = name_count.sort_values('Count', ascending=False).to_dict('records')
    results["name_count"]["asc"] = name_count.sort_values('Count', ascending=True).to_dict('records')

    # 2. In Month Analysis (using full_data for all alarms)
    month_analysis = full_data.groupby(['month', 'week_of_month', 'NAME']).agg({
        'SOURCE NAME': 'count',
        'PATH': lambda x: list(x.unique())  # Collect all unique paths
    }).reset_index()
    month_analysis.columns = ['Month', 'Week of Month', 'Alarm Type', 'Count', 'All Paths']

    # Get top alarms per month-week
    top_monthly = (month_analysis.sort_values(['Month', 'Week of Month', 'Count'],
                                             ascending=[True, True, False])
                  .groupby(['Month', 'Week of Month']).head(3))

    # Format for JSON output
    monthly_results = []
    for (month, week), group in top_monthly.groupby(['Month', 'Week of Month']):
        month_data = {
            'Month': month,
            'Week of Month': f"Week {week}",
            'Total Alarms': group['Count'].sum(),
            'Top Alarms': group[['Alarm Type', 'Count', 'All Paths']].to_dict('records')
        }
        monthly_results.append(month_data)

    results["in_month"]["dsc"] = sorted(monthly_results, key=lambda x: x['Total Alarms'], reverse=True)
    results["in_month"]["asc"] = sorted(monthly_results, key=lambda x: x['Total Alarms'])

    # 3. Resolution Time Analysis (using valid_resolutions)
    resolve_time = data.groupby('SOURCE NAME').agg({
        'resolution_minutes': 'mean',
        'NAME': 'first',
        'PATH': 'first'
    }).reset_index()
    resolve_time.columns = ['Source name', 'Time taken to resolve in minutes', 'Alarm Type', 'Path']
    results["time_taken_to_resolve"]["dsc"] = resolve_time.sort_values('Time taken to resolve in minutes', ascending=False).to_dict('records')
    results["time_taken_to_resolve"]["asc"] = resolve_time.sort_values('Time taken to resolve in minutes', ascending=True).to_dict('records')

    return results

def analyze_condition_name(data):
    conditions = {
        'door_open': 'Door Open',
        'extremely_high_temp': 'Extremely High Temperature',
        'high_temp': 'High Temperature',
        'low_temp': 'Low Temperature',
        'no_power': 'No power',
        'site_not_communicating': 'Site Not Communicating'
    }

    results = {}

    for cond_key, cond_name in conditions.items():
        cond_data = data[data['NAME'] == cond_name]
        if not cond_data.empty:
            # Count analysis
            count = cond_data['SOURCE NAME'].value_counts().reset_index()
            count.columns = ['Source name', 'Count']
            source_info = cond_data.drop_duplicates('SOURCE NAME').set_index('SOURCE NAME')[['PATH']]
            count = count.join(source_info, on='Source name')

            # Time taken analysis
            time_taken = cond_data.groupby('SOURCE NAME').agg({
                'resolution_minutes': 'mean',
                'PATH': 'first'
            }).reset_index()
            time_taken.columns = ['Source name', 'Time taken', 'Path']

            results[cond_key] = {
                'count': {
                    'dsc': count.sort_values('Count', ascending=False).to_dict('records'),
                    'asc': count.sort_values('Count', ascending=True).to_dict('records')
                },
                'time_taken': {
                    'dsc': time_taken.sort_values('Time taken', ascending=False).to_dict('records'),
                    'asc': time_taken.sort_values('Time taken', ascending=True).to_dict('records')
                }
            }
        else:
            results[cond_key] = "No data available for this condition"

    return results

def save_and_download(data, filename):
    """Save analysis results to JSON and download"""
    with open(filename, 'w') as f:
        json.dump(data, f, indent=2, cls=CustomJSONEncoder)
    files.download(filename)
    print(f"✅ Downloaded: {filename}")

def main():
    df, valid_resolutions = load_and_clean_data()
    equipment_results = analyze_equipment_source(valid_resolutions, df)
    condition_results = analyze_condition_name(valid_resolutions)

    while True:
        print("\n🔍 MAIN MENU")
        print("1. Equipment Source Analysis")
        print("2. Condition Name Analysis")
        print("3. Export Full Results")
        print("4. Exit")

        main_choice = input("Select option: ")

        if main_choice == "1":
            while True:
                print("\nEQUIPMENT SOURCE ANALYSIS")
                print("1. Name Count")
                print("2. Monthly/Weekly Trends")
                print("3. Time Taken to Resolve")
                print("4. Back to Main Menu")

                equip_choice = input("Select analysis: ")

                if equip_choice == "1":
                    print("\nNAME COUNT")
                    print("1. Descending (DSC)")
                    print("2. Ascending (ASC)")
                    print("3. Back")

                    sort_choice = input("Select sort: ")
                    if sort_choice == "1":
                        save_and_download(
                            equipment_results["name_count"]["dsc"],
                            "Equipment_Name_Count_DSC.json"
                        )
                    elif sort_choice == "2":
                        save_and_download(
                            equipment_results["name_count"]["asc"],
                            "Equipment_Name_Count_ASC.json"
                        )
                    elif sort_choice == "3":
                        continue

                elif equip_choice == "2":
                    print("\nMONTHLY/WEEKLY TRENDS")
                    print("1. Descending (DSC)")
                    print("2. Ascending (ASC)")
                    print("3. Back")

                    trend_choice = input("Select option: ")
                    if trend_choice == "1":
                        save_and_download(
                            equipment_results["in_month"]["dsc"],
                            "Equipment_Monthly_Trends_DSC.json"
                        )
                    elif trend_choice == "2":
                        save_and_download(
                            equipment_results["in_month"]["asc"],
                            "Equipment_Monthly_Trends_ASC.json"
                        )
                    elif trend_choice == "3":
                        continue

                elif equip_choice == "3":
                    print("\nTIME TAKEN TO RESOLVE")
                    print("1. Descending (DSC)")
                    print("2. Ascending (ASC)")
                    print("3. Back")

                    sort_choice = input("Select sort: ")
                    if sort_choice == "1":
                        save_and_download(
                            equipment_results["time_taken_to_resolve"]["dsc"],
                            "Equipment_Time_Taken_DSC.json"
                        )
                    elif sort_choice == "2":
                        save_and_download(
                            equipment_results["time_taken_to_resolve"]["asc"],
                            "Equipment_Time_Taken_ASC.json"
                        )
                    elif sort_choice == "3":
                        continue

                elif equip_choice == "4":
                    break

        elif main_choice == "2":
            while True:
                print("\nCONDITION NAME ANALYSIS")
                conditions = [
                    ("1", "door_open", "Door Open"),
                    ("2", "extremely_high_temp", "Extremely High Temperature"),
                    ("3", "high_temp", "High Temperature"),
                    ("4", "low_temp", "Low Temperature"),
                    ("5", "no_power", "No Power"),
                    ("6", "site_not_communicating", "Site Not Communicating"),
                    ("7", "", "Back to Main Menu")
                ]

                for num, _, name in conditions:
                    print(f"{num}. {name}")

                cond_choice = input("Select condition: ")

                if cond_choice == "7":
                    break

                selected_cond = None
                cond_name = ""
                for num, key, name in conditions:
                    if cond_choice == num and key:
                        selected_cond = key
                        cond_name = name
                        break

                if selected_cond and selected_cond in condition_results:
                    while True:
                        print(f"\n{cond_name.upper()}")
                        print("1. Count Analysis")
                        print("2. Time Taken Analysis")
                        print("3. Back")

                        analysis_choice = input("Select analysis: ")

                        if analysis_choice == "1":
                            print("\nCOUNT ANALYSIS")
                            print("1. Descending (DSC)")
                            print("2. Ascending (ASC)")
                            print("3. Back")

                            sort_choice = input("Select sort: ")
                            if sort_choice == "1":
                                save_and_download(
                                    condition_results[selected_cond]["count"]["dsc"],
                                    f"{cond_name.replace(' ', '_')}_Count_DSC.json"
                                )
                            elif sort_choice == "2":
                                save_and_download(
                                    condition_results[selected_cond]["count"]["asc"],
                                    f"{cond_name.replace(' ', '_')}_Count_ASC.json"
                                )
                            elif sort_choice == "3":
                                continue

                        elif analysis_choice == "2":
                            print("\nTIME TAKEN ANALYSIS")
                            print("1. Descending (DSC)")
                            print("2. Ascending (ASC)")
                            print("3. Back")

                            sort_choice = input("Select sort: ")
                            if sort_choice == "1":
                                save_and_download(
                                    condition_results[selected_cond]["time_taken"]["dsc"],
                                    f"{cond_name.replace(' ', '_')}_Time_Taken_DSC.json"
                                )
                            elif sort_choice == "2":
                                save_and_download(
                                    condition_results[selected_cond]["time_taken"]["asc"],
                                    f"{cond_name.replace(' ', '_')}_Time_Taken_ASC.json"
                                )
                            elif sort_choice == "3":
                                continue

                        elif analysis_choice == "3":
                            break
                else:
                    print("Invalid selection or no data available")

        elif main_choice == "3":
            filename = "Full_Analysis_Results.json"
            full_results = {
                "Equipment_Source_Analysis": equipment_results,
                "Condition_Name_Analysis": condition_results
            }
            save_and_download(full_results, filename)

        elif main_choice == "4":
            print("Exiting program...")
            break

if __name__ == "__main__":
    main()

Please upload your Excel file:


Saving test1.xlsx to test1 (1).xlsx
All months in dataset: {'May': 25, 'September': 1, 'October': 1, 'August': 1, 'June': 1, 'April': 1, 'February': 1}
Months in valid_resolutions: {'May': 25}

🔍 MAIN MENU
1. Equipment Source Analysis
2. Condition Name Analysis
3. Export Full Results
4. Exit
Select option: 1

EQUIPMENT SOURCE ANALYSIS
1. Name Count
2. Monthly/Weekly Trends
3. Time Taken to Resolve
4. Back to Main Menu
Select analysis: 1

NAME COUNT
1. Descending (DSC)
2. Ascending (ASC)
3. Back
Select sort: 3

EQUIPMENT SOURCE ANALYSIS
1. Name Count
2. Monthly/Weekly Trends
3. Time Taken to Resolve
4. Back to Main Menu
Select analysis: 2

MONTHLY/WEEKLY TRENDS
1. Descending (DSC)
2. Ascending (ASC)
3. Back
Select option: 3

EQUIPMENT SOURCE ANALYSIS
1. Name Count
2. Monthly/Weekly Trends
3. Time Taken to Resolve
4. Back to Main Menu
Select analysis: 3

TIME TAKEN TO RESOLVE
1. Descending (DSC)
2. Ascending (ASC)
3. Back
Select sort: 3

EQUIPMENT SOURCE ANALYSIS
1. Name Count
2. Monthly

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ Downloaded: Full_Analysis_Results.json

🔍 MAIN MENU
1. Equipment Source Analysis
2. Condition Name Analysis
3. Export Full Results
4. Exit
Select option: 4
Exiting program...
