In [1]:
import os
import sys
from google.colab import drive
from google.colab import userdata

drive.mount("/content/drive")

HF_TOKEN = userdata.get("HF_TOKEN")
os.environ["HF_TOKEN"] = HF_TOKEN

print("Hugging Face Token successfully set.")

%cd /content/drive/MyDrive/ES-CSA/data/
sys.path.append('/content/drive/My Drive/ES-CSA/src')

Mounted at /content/drive
Hugging Face Token successfully set.
/content/drive/MyDrive/ES-CSA/data


In [2]:
!pip install pandas numpy



In [3]:
import json
import pandas as pd
import numpy as np

In [4]:
# Load Consumer Data

consumer_data_path = 'processed/consumer_data.json'

with open(consumer_data_path, "r", encoding="utf-8") as file:
    consumer_data = json.load(file)

# Structured Summaries for Embeddings

def generate_structured_summary(user):
    user_profile = {
        "Name": user["Name"],
        "City": user["City"],
        "Plan Type": user["User_Type"]
    }

    cdrs = []
    for cdr in user.get("CDRS", []):
        cdrs.append({
            "Date": cdr["Datetime_Charged"],
            "Resource Used": f"{cdr['Resource_Value']} {cdr['Resource_Type'].lower()}",
            "Charge (PKR)": cdr["Amount_Charged"]
        })

    purchases = []
    for purchase in user.get("Purchases", []):
        purchases.append({
            "Date": purchase["Datetime"],
            "Amount Spent (PKR)": purchase["Amount"],
            "Data Browsing (MB)": purchase["Data_Browsing_Allowance"],
            "Data Social (MB)": purchase["Data_Social_Allowance"],
            "SMS": purchase["SMS_Allowance"],
            "Voice On-Net (min)": purchase["Voice_On-Net_Allowance"],
            "Voice Off-Net (min)": purchase["Voice_Off-Net_Allowance"]
        })

    tickets = []
    for ticket in user.get("Tickets", []):
        tickets.append({
            "Ticket ID": ticket["Ticket_ID"],
            "Category": ticket["Category"],
            "Description": ticket["Description"],
            "Logged Time": ticket["Log_Time"],
            "Resolved Time": ticket["Resolution_Time"],
            "Resolution": ticket["Resolution"]
        })

    return {
        "MSISDN": user["MSISDN"],
        "User Profile": user_profile,
        "CDRS": cdrs,
        "Purchases": purchases,
        "Tickets": tickets
    }

summarized_data = [generate_structured_summary(user) for user in consumer_data]

print(summarized_data[0])

{'MSISDN': 9230610000463, 'User Profile': {'Name': 'User 1', 'City': 'Lahore', 'Plan Type': 'Prepaid'}, 'CDRS': [{'Date': '2024-10-26T22:44:08', 'Resource Used': '211 voice', 'Charge (PKR)': 10}, {'Date': '2023-05-23T07:54:33', 'Resource Used': '34 data', 'Charge (PKR)': 0}, {'Date': '2023-10-12T09:06:27', 'Resource Used': '36 voice', 'Charge (PKR)': 0}, {'Date': '2024-01-21T13:20:44', 'Resource Used': '25 data', 'Charge (PKR)': 0}, {'Date': '2023-07-31T16:27:19', 'Resource Used': '18 data', 'Charge (PKR)': 11}, {'Date': '2023-12-24T01:07:33', 'Resource Used': '229 voice', 'Charge (PKR)': 5}, {'Date': '2024-11-09T17:08:02', 'Resource Used': '12 sms', 'Charge (PKR)': 0}], 'Purchases': [{'Date': '2024-11-06T15:50:55', 'Amount Spent (PKR)': 912, 'Data Browsing (MB)': 4572, 'Data Social (MB)': 836, 'SMS': 564, 'Voice On-Net (min)': 416, 'Voice Off-Net (min)': 168}, {'Date': '2024-01-13T08:17:43', 'Amount Spent (PKR)': 639, 'Data Browsing (MB)': 4545, 'Data Social (MB)': 2373, 'SMS': 488, '

In [5]:
# Save Consumer Data Summary as JSON

output_path = "processed/consumer_data_summary.json"
with open(output_path, "w") as file:
    json.dump(summarized_data, file, indent=4)

print("Saved Consumer Data Summary.")

Saved Consumer Data Summary.


In [6]:
# Load Consumer Insights

consumer_insights_path = 'processed/consumer_insights.json'

with open(consumer_insights_path, "r", encoding="utf-8") as file:
    consumer_insights = json.load(file)

# Structured Summaries for Embeddings

def generate_insight_summary(user_id, insights):
    """
    Convert user-specific insights into a structured format for easy retrieval.
    """
    summary = {
        "MSISDN": user_id,
        "User Insights": {}
    }

    for key, value in insights.items():
        cleaned_key = key.replace("_", " ")
        if isinstance(value, dict):
            summary["Insights"][cleaned_key] = {k.replace("_", " "): v for k, v in value.items()}
        elif isinstance(value, list):
            summary["Insights"][cleaned_key] = value
        else:
            summary["User Insights"][cleaned_key] = value

    return summary

# User Insights

user_insight_summary = [
    {
        "MSISDN": user_id,
        "User Insights": {key.replace("_", " "): value for key, value in insights.items()}
    }
    for user_id, insights in consumer_insights.items() if user_id != "General_Insights"
]

# General Insights

general_insights_summary = {
    "General Insights": {
        key.replace("_", " "): value for key, value in consumer_insights["General_Insights"].items()
    }
}

user_insight_summary.append(general_insights_summary)

print(user_insight_summary[0])
print(general_insights_summary)

{'MSISDN': '9230101039883', 'User Insights': {'Total Data Consumed': 167, 'Total SMS Consumed': 79, 'Total Voice Consumed': 262, 'Total Amount Spent on Data': 24, 'Total Amount Spent on SMS': 24, 'Total Amount Spent on Voice': 33, 'Total Amount Charged': 81, 'Transaction Count': 10, 'Total Amount Purchases': 9287, 'Total Purchase Transactions': 16, 'Total Tickets': 15}}
{'General Insights': {'Regional Popularity': [{'City': 'Quetta', 'User_Count': 75}, {'City': 'Lahore', 'User_Count': 73}, {'City': 'Peshawar', 'User_Count': 62}, {'City': 'Faisalabad', 'User_Count': 61}, {'City': 'Rawalpindi', 'User_Count': 60}, {'City': 'Islamabad', 'User_Count': 59}, {'City': 'Multan', 'User_Count': 58}, {'City': 'Karachi', 'User_Count': 52}], 'User Type Distribution': [{'User_Type': 'Postpaid', 'Count': 263}, {'User_Type': 'Prepaid', 'Count': 237}], 'Regional User Type Distribution': [{'Postpaid': 33, 'Prepaid': 28}, {'Postpaid': 32, 'Prepaid': 27}, {'Postpaid': 24, 'Prepaid': 28}, {'Postpaid': 42, '

In [7]:
consumer_insights_path = "processed/consumer_insights.json"

with open(consumer_insights_path, "r", encoding="utf-8") as file:
    consumer_insights = json.load(file)

# Extract Individual User Insights (Excluding "General_Insights")
user_insight_summaries = [
    {
        "MSISDN": user_id,
        "User Insights": {key.replace("_", " "): value for key, value in insights.items()}
    }
    for user_id, insights in consumer_insights.items() if user_id != "General_Insights"
]

# Extract General Insights Separately
general_insights_summary = {
    "General Insights": {
        key.replace("_", " "): value for key, value in consumer_insights["General_Insights"].items()
    }
}

# Save Individual Insights
with open("processed/consumer_insights_summary.json", "w", encoding="utf-8") as f:
    json.dump(user_insight_summaries, f, indent=4)

print("Saved Individual Consumer Insights Summary.")

# Save General Insights Separately
with open("processed/general_insights_summary.json", "w", encoding="utf-8") as f:
    json.dump(general_insights_summary, f, indent=4)

print("Saved General Consumer Insights Summary.")

Saved Individual Consumer Insights Summary.
Saved General Consumer Insights Summary.
