In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from google.colab import files
import json
from datetime import datetime
import numpy as np

# Custom JSON encoder
class CustomJSONEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, (np.integer)):
            return int(obj)
        if isinstance(obj, (np.floating)):
            return float(obj)
        if isinstance(obj, (datetime, pd.Timestamp)):
            return obj.isoformat()
        return super().default(obj)

# Upload and read file
uploaded = files.upload()
filename = list(uploaded.keys())[0]
df = pd.read_excel(filename, header=3)

# Data cleaning
df['EVENT DAY'] = pd.to_datetime(df['EVENT DAY'], errors='coerce')
df['RESOLVED TIME'] = pd.to_datetime(df['RESOLVED TIME'], errors='coerce')
df['CLIENT NAME'] = df['CLIENT NAME'].replace({'americanauae': 'UAE', 'americanaksa': 'KSA'})
df['SOURCE NAME'] = df['SOURCE NAME'].str.strip()

# Calculate resolution time in minutes
df['resolution_minutes'] = (df['RESOLVED TIME'] - df['EVENT DAY']).dt.total_seconds() / 60
valid_resolutions = df.dropna(subset=['RESOLVED TIME'])
valid_resolutions = valid_resolutions[valid_resolutions['RESOLVED TIME'] >= valid_resolutions['EVENT DAY']]

# Initialize analysis results
analysis_results = {}

# 1-4. Basic alarm distributions
analysis_results['alarm_by_client'] = df['CLIENT NAME'].value_counts().to_dict()
if 'SOURCE TYPE NAME' in df.columns:
    analysis_results['alarm_by_source_type'] = df['SOURCE TYPE NAME'].value_counts().to_dict()
if 'GROUP' in df.columns:
    analysis_results['alarm_by_group'] = df['GROUP'].value_counts().to_dict()
analysis_results['alarm_by_name'] = df['NAME'].value_counts().to_dict()

# 5. Weekly and monthly trends with PATH
if 'SOURCE NAME' in df.columns and 'PATH' in df.columns:
    source_path_mapping = df.groupby('SOURCE NAME')['PATH'].agg(lambda x: x.mode()[0]).to_dict()

    # Weekly analysis
    weekly = (df.groupby(['SOURCE NAME', df['EVENT DAY'].dt.isocalendar().week.astype(str)])
              .size().unstack(fill_value=0))
    weekly = weekly.loc[(weekly > 0).any(axis=1)]
    weekly_dict = {}
    for source, week_data in weekly.to_dict('index').items():
        weekly_dict[source] = {
            'path': source_path_mapping.get(source, 'N/A'),
            'counts': {week: count for week, count in week_data.items() if count > 0}
        }
    analysis_results['alarm_by_source_weekly'] = weekly_dict

    # Monthly analysis
    monthly = (df.groupby(['SOURCE NAME', df['EVENT DAY'].dt.month_name().astype(str)])
               .size().unstack(fill_value=0))
    monthly = monthly.loc[(monthly > 0).any(axis=1)]
    monthly_dict = {}
    for source, month_data in monthly.to_dict('index').items():
        monthly_dict[source] = {
            'path': source_path_mapping.get(source, 'N/A'),
            'counts': {month: count for month, count in month_data.items() if count > 0}
        }
    analysis_results['alarm_by_source_monthly'] = monthly_dict

# 6. Hourly distribution
df['hour'] = df['EVENT DAY'].dt.hour.astype(str)
analysis_results['alarm_by_hour'] = df['hour'].value_counts().sort_index().to_dict()

# 7. Top/Bottom 20 sources with PATH
if 'SOURCE NAME' in df.columns and 'PATH' in df.columns:
    source_counts = df['SOURCE NAME'].value_counts()
    analysis_results['top20_sources'] = {
        source: {
            'count': int(count),
            'path': source_path_mapping.get(source, 'N/A')
        } for source, count in source_counts.head(20).items()
    }
    analysis_results['bottom20_sources'] = {
        source: {
            'count': int(count),
            'path': source_path_mapping.get(source, 'N/A')
        } for source, count in source_counts.tail(20).items()
    }

# 8. Resolution time stats
analysis_results['resolution_time_stats'] = valid_resolutions.groupby('NAME')['resolution_minutes'].agg(
    ['mean', 'median', 'count']).round(2).to_dict('index')

# 9. Highest and Lowest 20 resolution times with PATH
if 'SOURCE NAME' in df.columns and 'PATH' in df.columns:
    # Get top 20 longest resolutions
    top20_longest = valid_resolutions.nlargest(20, 'resolution_minutes')[['SOURCE NAME', 'PATH', 'resolution_minutes', 'EVENT DAY', 'RESOLVED TIME']]
    analysis_results['top20_longest_resolutions'] = [
        {
            'source': row['SOURCE NAME'],
            'path': row['PATH'],
            'time_minutes': round(row['resolution_minutes'], 2),
            'event_day': row['EVENT DAY'].isoformat(),
            'resolved_time': row['RESOLVED TIME'].isoformat()
        } for _, row in top20_longest.iterrows()
    ]

    # Get top 20 fastest resolutions
    top20_fastest = valid_resolutions.nsmallest(20, 'resolution_minutes')[['SOURCE NAME', 'PATH', 'resolution_minutes', 'EVENT DAY', 'RESOLVED TIME']]
    analysis_results['top20_fastest_resolutions'] = [
        {
            'source': row['SOURCE NAME'],
            'path': row['PATH'],
            'time_minutes': round(row['resolution_minutes'], 2),
            'event_day': row['EVENT DAY'].isoformat(),
            'resolved_time': row['RESOLVED TIME'].isoformat()
        } for _, row in top20_fastest.iterrows()
    ]

# Reorganize results with resolution extremes before temporal trends
final_results = {
    "summary": {
        "total_alarms": len(df),
        "resolved_alarms": len(valid_resolutions),
        "resolution_rate in Percentage": str(round(len(valid_resolutions)/len(df)*100, 2)) + "%",
        "time_period": {
            "start": df['EVENT DAY'].min().isoformat(),
            "end": df['EVENT DAY'].max().isoformat()
        }
    },
    "distributions": {
        "by_client": analysis_results['alarm_by_client'],
        "by_type": analysis_results.get('alarm_by_source_type', {}),
        "by_group": analysis_results.get('alarm_by_group', {}),
        "by_name": analysis_results['alarm_by_name'],
        "by_hour": analysis_results['alarm_by_hour']
    },
    "source_analysis": {
        "top_20": analysis_results.get('top20_sources', {}),
        "bottom_20": analysis_results.get('bottom20_sources', {})
    },
    "performance": {
        "resolution_times": analysis_results['resolution_time_stats'],
        "sla_compliance/Resolved within the time limit": {
            "within_15min": round((valid_resolutions['resolution_minutes'] <= 15).mean()*100, 2),
            "within_30min": round((valid_resolutions['resolution_minutes'] <= 30).mean()*100, 2),
            "within_1hour": round((valid_resolutions['resolution_minutes'] <= 60).mean()*100, 2)
        }
    },
    "resolution_extremes": {
        "longest_20_resolutions": analysis_results.get('top20_longest_resolutions', []),
        "fastest_20_resolutions": analysis_results.get('top20_fastest_resolutions', [])
    },
    "temporal_trends": {
        "weekly": analysis_results.get('alarm_by_source_weekly', {}),
        "monthly": analysis_results.get('alarm_by_source_monthly', {})
    }
}

# Save final organized results
with open('alarm_analysis_results.json', 'w') as f:
    json.dump(final_results, f, indent=2, cls=CustomJSONEncoder)

files.download('alarm_analysis_results.json')

In [1]:
# Install required libraries
!pip install transformers -q

import json
from google.colab import files
from transformers import pipeline
from datetime import datetime

# Sample JSON data
sample_json = '''
{
  "summary": {
    "total_alarms": 10380,
    "resolved_alarms": 9999,
    "resolution_rate in Percentage": "96.33%",
    "time_period": {
      "start": "2025-05-01T20:00:34",
      "end": "2025-05-12T03:11:35"
    }
  },
  "distributions": {
    "by_client": {
      "KSA": 5701,
      "UAE": 4672,
      "Bahrain": 7
    },
    "by_type": {
      "Freezer": 5400,
      "Chiller": 4713,
      "LV Panel Meter": 183,
      "Commercial Tower": 67,
      "Thermostat": 16,
      "Sub Community": 1
    },
    "by_group": {
      "ENVIRONMENTAL": 10129,
      "ELECTRICAL": 183,
      "PREVENTIVE": 68
    },
    "by_name": {
      "Door Open": 4714,
      "Extremely High Temperature": 2251,
      "High Temperature": 2201,
      "Low Temperature": 963,
      "No power": 183,
      "Site Not Communicating": 68
    }
  },
  "source_analysis": {
    "top_20": {
      "T 118709 CH 02": {
        "count": 77,
        "path": "UAE/Dubai/TGIF Jumeirah - 118709"
      }
    }
  },
  "performance": {
    "resolution_times": {
      "Door Open": {
        "mean": 50.42,
        "median": 14.68,
        "count": 4561
      }
    },
    "sla_compliance/Resolved within the time limit": {
      "within_15min": 42.89,
      "within_30min": 59.09,
      "within_1hour": 73.33
    }
  },
  "temporal_trends": {
    "weekly": {
      "T 118709 CH 02": {
        "path": "UAE/Dubai/TGIF Jumeirah - 118709",
        "counts": {
          "19": 77
        }
      }
    }
  }
}
'''

def parse_json_to_text(json_data, filename="Unknown"):
    """Convert JSON to concise text for the chatbot."""
    try:
        data = json.loads(json_data)
        text = f"Data from {filename}:\n"

        # Summary
        summary = data.get("summary", {})
        text += f"Total Alarms: {summary.get('total_alarms', 'N/A')}, Resolved: {summary.get('resolved_alarms', 'N/A')}, Rate: {summary.get('resolution_rate in Percentage', 'N/A')}\n"
        time_period = summary.get("time_period", {})
        text += f"Period: {time_period.get('start', 'N/A')[:10]} to {time_period.get('end', 'N/A')[:10]}\n"

        # Distributions (top items only)
        distributions = data.get("distributions", {})
        for dist_type in ["by_client", "by_type", "by_name"]:
            if dist_type in distributions:
                items = sorted(distributions[dist_type].items(), key=lambda x: x[1], reverse=True)[:2]
                text += f"{dist_type.replace('_', ' ').title()}: {items[0][0]} ({items[0][1]}), {items[1][0]} ({items[1][1]})\n"

        # Top Source
        source_analysis = data.get("source_analysis", {})
        for source, details in source_analysis.get("top_20", {}).items():
            text += f"Top Source: {source} ({details.get('count', 'N/A')} alarms)\n"
            break

        # Performance
        performance = data.get("performance", {})
        resolution_times = performance.get("resolution_times", {}).get("Door Open", {})
        text += f"Door Open Resolution: Mean {resolution_times.get('mean', 'N/A')} min\n"
        sla = performance.get("sla_compliance/Resolved within the time limit", {})
        text += f"SLA: 15min {sla.get('within_15min', 'N/A')}%\n"

        return text, data
    except json.JSONDecodeError:
        return f"Invalid JSON in {filename}.", {}

def preprocess_query(user_query, json_data_list, previous_query=None):
    """Answer queries directly from JSON data."""
    user_query = user_query.lower().strip()

    # Handle clarifications
    if "not client" in user_query and previous_query:
        if "type" in user_query:
            user_query = previous_query.replace("client", "type")

    # Broad queries
    if any(phrase in user_query for phrase in ["details", "about alarms", "tell me about", "overview"]):
        for json_data in json_data_list:
            summary = json_data.get("summary", {})
            total = summary.get("total_alarms", "N/A")
            resolved = summary.get("resolved_alarms", "N/A")
            rate = summary.get("resolution_rate in Percentage", "N/A")
            by_client = sorted(json_data.get("distributions", {}).get("by_client", {}).items(), key=lambda x: x[1], reverse=True)[:2]
            by_type = sorted(json_data.get("distributions", {}).get("by_type", {}).items(), key=lambda x: x[1], reverse=True)[:2]
            by_name = sorted(json_data.get("distributions", {}).get("by_name", {}).items(), key=lambda x: x[1], reverse=True)[:1]
            source = list(json_data.get("source_analysis", {}).get("top_20", {}).items())[0][0] if json_data.get("source_analysis", {}).get("top_20", {}) else "N/A"
            source_count = json_data.get("source_analysis", {}).get("top_20", {}).get(source, {}).get("count", "N/A") if source != "N/A" else "N/A"
            resolution_times = json_data.get("performance", {}).get("resolution_times", {}).get("Door Open", {})
            sla = json_data.get("performance", {}).get("sla_compliance/Resolved within the time limit", {})
            return (f"{total} alarms, {resolved} resolved ({rate}). Top clients: {by_client[0][0]} ({by_client[0][1]}), "
                    f"{by_client[1][0]} ({by_client[1][1]}). Top types: {by_type[0][0]} ({by_type[0][1]}), {by_type[1][0]} ({by_type[1][1]}). "
                    f"Top alarm: {by_name[0][0]} ({by_name[0][1]}). Top source: {source} ({source_count} alarms). "
                    f"Door Open resolution: {resolution_times.get('mean', 'N/A')} min avg. SLA: {sla.get('within_15min', 'N/A')}% in 15 min.")

    # Least alarm count
    if any(phrase in user_query for phrase in ["least alarm", "lowest alarm", "least count", "lowest count"]):
        for json_data in json_data_list:
            by_type = json_data.get("distributions", {}).get("by_type", {})
            by_name = json_data.get("distributions", {}).get("by_name", {})
            by_client = json_data.get("distributions", {}).get("by_client", {})
            by_group = json_data.get("distributions", {}).get("by_group", {})

            response = ""
            if by_type:
                min_type = min(by_type, key=by_type.get, default=None)
                response += f"By type, {min_type} has the least alarms with {by_type[min_type]}."
            if by_name:
                min_name = min(by_name, key=by_name.get, default=None)
                response += f" By name, {min_name} has {by_name[min_name]} alarms."
            if by_client:
                min_client = min(by_client, key=by_client.get, default=None)
                response += f" By client, {min_client} has {by_client[min_client]} alarms."
            if by_group:
                min_group = min(by_group, key=by_group.get, default=None)
                response += f" By group, {min_group} has {by_group[min_group]} alarms."
            if response:
                response += " Specify a category (e.g., type, name) for more focus."
                return response
            return "No data available for least alarm counts."

    # Total alarms
    if "total" in user_query and "alarm" in user_query:
        for json_data in json_data_list:
            total_alarms = json_data.get("summary", {}).get("total_alarms", "N/A")
            if total_alarms != "N/A":
                return f"The total number of alarms is {total_alarms}."

    # Resolved alarms
    if "resolved" in user_query and "alarm" in user_query:
        for json_data in json_data_list:
            resolved_alarms = json_data.get("summary", {}).get("resolved_alarms", "N/A")
            if resolved_alarms != "N/A":
                return f"{resolved_alarms} alarms were resolved."

    # Resolution rate
    if "resolution rate" in user_query:
        for json_data in json_data_list:
            resolution_rate = json_data.get("summary", {}).get("resolution_rate in Percentage", "N/A")
            if resolution_rate != "N/A":
                return f"The resolution rate is {resolution_rate}."

    # Time period
    if any(phrase in user_query for phrase in ["time period", "when", "date"]):
        for json_data in json_data_list:
            time_period = json_data.get("summary", {}).get("time_period", {})
            start = time_period.get("start", "N/A")[:10]
            end = time_period.get("end", "N/A")[:10]
            if start != "N/A" and end != "N/A":
                return f"The alarms were recorded from {start} to {end}."

    # Daily alarms
    if "day" in user_query and "alarm" in user_query:
        for json_data in json_data_list:
            total_alarms = json_data.get("summary", {}).get("total_alarms", "N/A")
            time_period = json_data.get("summary", {}).get("time_period", {})
            start = time_period.get("start", "N/A")
            end = time_period.get("end", "N/A")
            if total_alarms != "N/A" and start != "N/A" and end != "N/A":
                start_date = datetime.strptime(start[:10], "%Y-%m-%d")
                end_date = datetime.strptime(end[:10], "%Y-%m-%d")
                days = (end_date - start_date).days + 1
                avg_alarms = round(total_alarms / days)
                return f"The data doesn’t give daily breakdowns, but with {total_alarms} alarms over {days} days, that’s about {avg_alarms} alarms per day."

    # Weekly alarms or frequency
    if ("week" in user_query or "frequency" in user_query) and "alarm" in user_query:
        for json_data in json_data_list:
            total_alarms = json_data.get("summary", {}).get("total_alarms", "N/A")
            time_period = json_data.get("summary", {}).get("time_period", {})
            start = time_period.get("start", "N/A")
            end = time_period.get("end", "N/A")
            weekly = json_data.get("temporal_trends", {}).get("weekly", {})
            if weekly:
                for source, details in weekly.items():
                    counts = details.get("counts", {})
                    for week, count in counts.items():
                        if str(week) in user_query or "most" in user_query or "highest" in user_query:
                            return f"{source} had {count} alarms in week {week}."
            if total_alarms != "N/A" and start != "N/A" and end != "N/A":
                start_date = datetime.strptime(start[:10], "%Y-%m-%d")
                end_date = datetime.strptime(end[:10], "%Y-%m-%d")
                days = (end_date - start_date).days + 1
                avg_weekly = round(total_alarms * 7 / days)
                return f"The data doesn’t give full weekly breakdowns, but with {total_alarms} alarms over {days} days, that’s about {avg_weekly} alarms per week."

    # Alarm type queries
    if "type" in user_query or any(type_name.lower() in user_query for type_name in ["freezer", "chiller", "lv panel meter", "commercial tower", "thermostat", "sub community"]):
        by_type = {}
        for json_data in json_data_list:
            by_type.update(json_data.get("distributions", {}).get("by_type", {}))

        if "highest" in user_query or "most" in user_query:
            max_type = max(by_type, key=by_type.get, default=None)
            if max_type:
                return f"The most common alarm type is {max_type} with {by_type[max_type]} alarms."
        elif "fewest" in user_query or "least" in user_query:
            min_type = min(by_type, key=by_type.get, default=None)
            if min_type:
                return f"The least common alarm type is {min_type} with {by_type[min_type]} alarms."
        else:
            for type_name in by_type:
                if type_name.lower() in user_query:
                    return f"There were {by_type[type_name]} {type_name} alarms."

    # Client queries
    if "client" in user_query or any(client.lower() in user_query for client in ["ksa", "uae", "bahrain"]):
        by_client = {}
        for json_data in json_data_list:
            by_client.update(json_data.get("distributions", {}).get("by_client", {}))

        if "highest" in user_query or "most" in user_query:
            max_client = max(by_client, key=by_client.get, default=None)
            if max_client:
                return f"The client with the most alarms is {max_client} with {by_client[max_client]} alarms."
        elif "fewest" in user_query or "least" in user_query:
            min_client = min(by_client, key=by_client.get, default=None)
            if min_client:
                return f"The client with the fewest alarms is {min_client} with {by_client[min_client]} alarms."
        else:
            for client in by_client:
                if client.lower() in user_query:
                    return f"{client} had {by_client[client]} alarms."

    # Group queries
    if "group" in user_query or any(group.lower() in user_query for group in ["environmental", "electrical", "preventive"]):
        by_group = {}
        for json_data in json_data_list:
            by_group.update(json_data.get("distributions", {}).get("by_group", {}))

        if "highest" in user_query or "most" in user_query:
            max_group = max(by_group, key=by_group.get, default=None)
            if max_group:
                return f"The most common alarm group is {max_group} with {by_group[max_group]} alarms."
        elif "fewest" in user_query or "least" in user_query:
            min_group = min(by_group, key=by_group.get, default=None)
            if min_group:
                return f"The least common alarm group is {min_group} with {by_group[min_group]} alarms."
        else:
            for group in by_group:
                if group.lower() in user_query:
                    return f"There were {by_group[group]} {group} alarms."

    # Alarm name queries
    if "name" in user_query or any(name.lower() in user_query for name in ["door open", "extremely high temperature", "high temperature", "low temperature", "no power", "site not communicating"]):
        by_name = {}
        for json_data in json_data_list:
            by_name.update(json_data.get("distributions", {}).get("by_name", {}))

        if "highest" in user_query or "most" in user_query:
            max_name = max(by_name, key=by_name.get, default=None)
            if max_name:
                return f"The most common alarm name is {max_name} with {by_name[max_name]} alarms."
        elif "fewest" in user_query or "least" in user_query:
            min_name = min(by_name, key=by_name.get, default=None)
            if min_name:
                return f"The least common alarm name is {min_name} with {by_name[min_name]} alarms."
        else:
            for name in by_name:
                if name.lower() in user_query:
                    return f"There were {by_name[name]} {name} alarms."

    # Source queries
    if "source" in user_query or "t 118709 ch 02" in user_query:
        for json_data in json_data_list:
            top_20 = json_data.get("source_analysis", {}).get("top_20", {})
            for source, details in top_20.items():
                if source.lower() in user_query or "most" in user_query or "highest" in user_query:
                    return f"{source} at {details.get('path', 'N/A')} had {details.get('count', 'N/A')} alarms."

    # Resolution time queries
    if "resolution time" in user_query and "door open" in user_query:
        for json_data in json_data_list:
            resolution_times = json_data.get("performance", {}).get("resolution_times", {}).get("Door Open", {})
            mean = resolution_times.get("mean", "N/A")
            median = resolution_times.get("median", "N/A")
            if mean != "N/A":
                return f"Door Open alarms have an average resolution time of {mean} minutes and a median of {median} minutes."

    # SLA compliance queries
    if "sla" in user_query or "within" in user_query:
        for json_data in json_data_list:
            sla = json_data.get("performance", {}).get("sla_compliance/Resolved within the time limit", {})
            if "15" in user_query:
                return f"{sla.get('within_15min', 'N/A')}% of alarms were resolved within 15 minutes."
            if "30" in user_query:
                return f"{sla.get('within_30min', 'N/A')}% of alarms were resolved within 30 minutes."
            if "1 hour" in user_query or "1hr" in user_query:
                return f"{sla.get('within_1hour', 'N/A')}% of alarms were resolved within 1 hour."

    # Comparative queries
    if any(word in user_query for word in ["more", "less", "compare", "than"]):
        by_type = {}
        by_client = {}
        by_name = {}
        for json_data in json_data_list:
            by_type.update(json_data.get("distributions", {}).get("by_type", {}))
            by_client.update(json_data.get("distributions", {}).get("by_client", {}))
            by_name.update(json_data.get("distributions", {}).get("by_name", {}))

        for item1 in by_type:
            for item2 in by_type:
                if item1.lower() in user_query and item2.lower() in user_query and item1 != item2:
                    count1, count2 = by_type[item1], by_type[item2]
                    return f"{item1} had {count1} alarms, {'more' if count1 > count2 else 'fewer'} than {item2} with {count2} alarms."

        for item1 in by_client:
            for item2 in by_client:
                if item1.lower() in user_query and item2.lower() in user_query and item1 != item2:
                    count1, count2 = by_client[item1], by_client[item2]
                    return f"{item1} had {count1} alarms, {'more' if count1 > count2 else 'fewer'} than {item2} with {count2} alarms."

        for item1 in by_name:
            for item2 in by_name:
                if item1.lower() in user_query and item2.lower() in user_query and item1 != item2:
                    count1, count2 = by_name[item1], by_name[item2]
                    return f"{item1} had {count1} alarms, {'more' if count1 > count2 else 'fewer'} than {item2} with {count2} alarms."

    # Hourly data
    if "hour" in user_query:
        return "Sorry, the data doesn't include hourly alarm breakdowns. Try asking about weekly trends or other data."

    # Ambiguous queries
    if "not" in user_query and not previous_query:
        return "Can you clarify what you mean? For example, are you asking about alarm types or names?"

    return None

def generate_response(user_query, context, json_data_list, previous_query=None):
    """Generate a concise response using T5."""
    direct_answer = preprocess_query(user_query, json_data_list, previous_query)
    if direct_answer:
        return direct_answer

    try:
        generator = pipeline('text2text-generation', model='t5-small')
        prompt = f"Answer concisely using only this data. Do not invent numbers or details: {context} Query: {user_query}"
        response = generator(prompt, max_length=50, num_return_sequences=1)
        answer = response[0]['generated_text'].strip()
        if not answer or any(word in answer.lower() for word in ["unknown", "no data"]):
            return "Sorry, I couldn't find an answer. Can you be more specific?"
        return answer
    except Exception as e:
        return f"Sorry, I couldn't find an answer. Can you be more specific?"

def main():
    print("Upload JSON files or use sample data.")
    use_sample = input("Use sample JSON? (y/n): ").lower() == 'y'

    contexts = []
    json_data_list = []
    previous_query = None

    if use_sample:
        context, json_data = parse_json_to_text(sample_json, "sample.json")
        contexts.append(context)
        json_data_list.append(json_data)
    else:
        uploaded = files.upload()
        if not uploaded:
            print("No files uploaded. Using sample JSON.")
            context, json_data = parse_json_to_text(sample_json, "sample.json")
            contexts.append(context)
            json_data_list.append(json_data)
        else:
            for filename, content in uploaded.items():
                json_data = content.decode('utf-8')
                context, parsed_data = parse_json_to_text(json_data, filename)
                contexts.append(context)
                json_data_list.append(parsed_data)

    print("\nData loaded. Ask away!")

    while True:
        user_query = input("Ask a question (or type 'exit' to quit): ")
        if user_query.lower() == 'exit':
            print("Goodbye!")
            break

        response = generate_response(user_query, "\n".join(contexts), json_data_list, previous_query)
        print(f"Answer: {response}")
        previous_query = user_query

if __name__ == "__main__":
    main()

Upload JSON files or use sample data.


Saving alarm_analysis_results (20).json to alarm_analysis_results (20).json

Data loaded. Ask away!
Answer: The data doesn’t give full weekly breakdowns, but with 10380 alarms over 12 days, that’s about 6055 alarms per week.


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

Device set to use cpu


Answer: : 5747), Freezer (5400), Chiller (4713) By Name: Door Open (4714), Extremely High Temperature (2251) Top Source: T 118709 CH 02 (77 alarms) Door
Ask a question (or type 'exit' to quit): exit
Goodbye!
