In [None]:
import pandas as pd
import json
from google.colab import files
from datetime import datetime
import numpy as np

class CustomJSONEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, (np.integer)):
            return int(obj)
        if isinstance(obj, (np.floating)):
            return float(obj)
        if isinstance(obj, (datetime, pd.Timestamp)):
            return obj.isoformat()
        return super().default(obj)

# Upload and read file
uploaded = files.upload()
filename = list(uploaded.keys())[0]
df = pd.read_excel(filename, header=3)

# Data cleaning
df['EVENT DAY'] = pd.to_datetime(df['EVENT DAY'], errors='coerce')
df['RESOLVED TIME'] = pd.to_datetime(df['RESOLVED TIME'], errors='coerce')
df['CLIENT NAME'] = df['CLIENT NAME'].replace({'americanauae': 'UAE', 'americanaksa': 'KSA'})
df['SOURCE NAME'] = df['SOURCE NAME'].str.strip()
df['resolution_minutes'] = (df['RESOLVED TIME'] - df['EVENT DAY']).dt.total_seconds() / 60
df['week'] = df['EVENT DAY'].dt.isocalendar().week

# Filter valid resolutions
valid_resolutions = df.dropna(subset=['RESOLVED TIME'])
valid_resolutions = valid_resolutions[valid_resolutions['RESOLVED TIME'] >= valid_resolutions['EVENT DAY']]

# Create analysis functions with NAME and PATH included
def analyze_equipment_source(data):
    results = {}

    # Add month and week-of-month columns
    data['month'] = data['EVENT DAY'].dt.month_name()
    data['week_of_month'] = (data['EVENT DAY'].dt.day - 1) // 7 + 1

    # 1. Name count (unchanged)
    name_count = data['SOURCE NAME'].value_counts().reset_index()
    name_count.columns = ['Source name', 'Count']
    source_info = data.drop_duplicates('SOURCE NAME').set_index('SOURCE NAME')[['NAME', 'PATH']]
    name_count = name_count.join(source_info, on='Source name')
    results['name_count'] = {
        'dsc': name_count.sort_values('Count', ascending=False).to_dict('records'),
        'asc': name_count.sort_values('Count', ascending=True).to_dict('records')
    }

    # 2. Updated "In month" analysis
    month_analysis = data.groupby(['month', 'week_of_month', 'NAME']).agg({
        'SOURCE NAME': 'count',
        'PATH': lambda x: x.value_counts().index[0]
    }).reset_index()
    month_analysis.columns = ['Month', 'Week of Month', 'Alarm Type', 'Count', 'Most Common Path']

    # Get top alarms per month-week
    top_monthly = (month_analysis.sort_values(['Month', 'Week of Month', 'Count'],
                                             ascending=[True, True, False])
                  .groupby(['Month', 'Week of Month']).head(3))

    # Format for JSON output
    monthly_results = []
    for (month, week), group in top_monthly.groupby(['Month', 'Week of Month']):
        month_data = {
            'Month': month,
            'Week of Month': f"Week {week}",
            'Total Alarms': group['Count'].sum(),
            'Top Alarms': group[['Alarm Type', 'Count', 'Most Common Path']]
                          .to_dict('records')
        }
        monthly_results.append(month_data)

    results['in_month'] = {
        'dsc': sorted(monthly_results,
                     key=lambda x: x['Total Alarms'],
                     reverse=True),
        'asc': sorted(monthly_results,
                     key=lambda x: x['Total Alarms'])
    }

    # 3. Time taken to resolve (unchanged)
    resolve_time = data.groupby('SOURCE NAME').agg({
        'resolution_minutes': 'mean',
        'NAME': 'first',
        'PATH': 'first'
    }).reset_index()
    resolve_time.columns = ['Source name', 'Time taken to resolve in minutes', 'NAME', 'PATH']

    results['time_taken_to_resolve'] = {
        'dsc': resolve_time.sort_values('Time taken to resolve in minutes', ascending=False).to_dict('records'),
        'asc': resolve_time.sort_values('Time taken to resolve in minutes', ascending=True).to_dict('records')
    }

    return results

def analyze_condition_name(data):
    results = {}
    conditions = {
        'door_open': 'Door Open',
        'extremely_high_temp': 'Extremely High Temperature',
        'high_temp': 'High Temperature',
        'low_temp': 'Low Temperature',
        'no_power': 'No power',
        'site_not_communicating': 'Site Not Communicating'
    }

    for cond_key, cond_name in conditions.items():
        cond_data = data[data['NAME'] == cond_name]
        if not cond_data.empty:
            # Count analysis - include PATH
            count = cond_data['SOURCE NAME'].value_counts().reset_index()
            count.columns = ['Source name', 'Count']

            # Get PATH for each source
            source_info = cond_data.drop_duplicates('SOURCE NAME').set_index('SOURCE NAME')[['PATH']]
            count = count.join(source_info, on='Source name')

            # Time taken analysis - include PATH
            time_taken = cond_data.groupby('SOURCE NAME').agg({
                'resolution_minutes': 'mean',
                'PATH': 'first'
            }).reset_index()
            time_taken.columns = ['Source name', 'Time taken', 'PATH']

            results[cond_key] = {
                'count': {
                    'dsc': count.sort_values('Count', ascending=False).to_dict('records'),
                    'asc': count.sort_values('Count', ascending=True).to_dict('records')
                },
                'time_taken': {
                    'dsc': time_taken.sort_values('Time taken', ascending=False).to_dict('records'),
                    'asc': time_taken.sort_values('Time taken', ascending=True).to_dict('records')
                }
            }
        else:
            results[cond_key] = "No data available for this condition"

    return results

# Perform analyses
analysis_results = {
    'Equipment_source_name': analyze_equipment_source(valid_resolutions),
    'Condition_name': analyze_condition_name(valid_resolutions)
}

# Save the full analysis results with NAME and PATH included
with open('full_analysis_results.json', 'w') as f:
    json.dump(analysis_results, f, indent=2, cls=CustomJSONEncoder)
files.download('full_analysis_results.json')

Saving combine.xlsx to combine.xlsx


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>