In [49]:
import random
import json
from datetime import datetime, timedelta
# from datasets import Dataset

# Define possible issues with log templates
ISSUES = [
    {
        "name": "RRC Connection Failure",
        "log_template": "[RRC] [ERROR] RRC Connection Setup Failure\nCause: {cause}\nUE ID: {ue_id}\nCell ID: {cell_id}\nPCI: {pci}\nSINR: {sinr} dB\nRSRP: {rsrp} dBm",
        "causes": ["Radio Link Failure (RLF)", "Network Congestion", "Authentication Failure"],
        "resolution": "Check signal strength, reduce congestion, and ensure proper authentication handling. If RLF occurs frequently, consider adjusting handover parameters and increasing transmission power. Authentication failures should be debugged by reviewing security credentials and verifying UE identity."
    },
    {
        "name": "Handover Failure",
        "log_template": "[HO] [ERROR] Handover Failure\nCause: {cause}\nSource Cell ID: {source_cell}\nTarget Cell ID: {target_cell}\nUE ID: {ue_id}\nSINR: {sinr} dB\nRSRP Source: {rsrp_source} dBm\nRSRP Target: {rsrp_target} dBm",
        "causes": ["Interference", "Timing Advance Issue", "Resource Unavailability"],
        "resolution": "Analyze interference levels, adjust timing advance, and ensure target cell has available resources. If resource unavailability is the issue, increase cell capacity or implement dynamic spectrum sharing to improve handover success rates."
    },
    {
        "name": "Interference Issue",
        "log_template": "[INTERFERENCE] [WARNING] High Interference Detected\nUE ID: {ue_id}\nCell ID: {cell_id}\nInterference Level: {interference_level} dBm",
        "causes": ["External Interference", "Hardware Failure", "Neighboring Cell Congestion"],
        "resolution": "Identify interference sources by conducting spectrum analysis. Adjust antenna parameters, modify frequency reuse patterns, and apply filtering techniques to mitigate external noise. If hardware issues are detected, perform immediate maintenance or replacement."
    },
    {
        "name": "Network Slicing Misconfiguration",
        "log_template": "[SLICING] [ERROR] Slice Configuration Mismatch\nSlice Type: {slice_type}\nSlice ID: {slice_id}\nUE ID: {ue_id}\nCell ID: {cell_id}",
        "causes": ["Misconfigured Slice Parameters", "Improper Resource Allocation", "UPF Routing Issue"],
        "resolution": "Verify slice parameters to match the network's QoS requirements. Ensure correct slice allocation per service type (eMBB, URLLC, mMTC) and confirm UPF routes traffic correctly through network elements."
    },
    {
        "name": "Transport Network Failure",
        "log_template": "[TRANSPORT] [ERROR] Packet Loss Detected\nUPF IP: {upf_ip}\nPacket Loss: {packet_loss}%\nUE ID: {ue_id}\nCell ID: {cell_id}",
        "causes": ["Link Failure", "High Latency", "Router Congestion"],
        "resolution": "Check transport link status, reduce congestion, and optimize routing paths. Use QoS prioritization for critical traffic, and deploy redundancy mechanisms like secondary transport paths to prevent failures."
    },
    {
        "name": "Massive UE Drops",
        "log_template": "[OVERLOAD] [CRITICAL] High UE Drop Rate\nDropped UEs: {dropped_ues}\nBefore Drop: {before_drop}\nCPU Utilization: {cpu_utilization}%\nCell ID: {cell_id}",
        "causes": ["CPU Overload", "High Traffic Volume", "Resource Exhaustion"],
        "resolution": "Optimize resource allocation, increase CPU capacity, and implement load balancing strategies. If the issue persists, consider dynamic scaling and network slicing to distribute load effectively."
    },
    {
        "name": "Synchronization Issue",
        "log_template": "[SYNC] [ERROR] Synchronization Failure Detected\nSuspect Cell: {suspect_cell}\nUE ID: {ue_id}\nCell ID: {cell_id}",
        "causes": ["Timing Offset", "GPS Signal Loss", "Synchronization Source Failure"],
        "resolution": "Verify synchronization source, check GPS signal, and recalibrate timing settings. Deploy alternative sync sources like IEEE 1588 PTP in case of GPS failures."
    },
    {
        "name": "Paging Failure",
        "log_template": "[PAGING] [ERROR] Paging Message Not Delivered\nUE ID: {ue_id}\nCell ID: {cell_id}\nPaging Cause: {cause}",
        "causes": ["UE Unreachable", "Paging Channel Congestion", "Core Network Issue"],
        "resolution": "Increase paging retries, optimize paging channel, and check core network connectivity. If congestion is persistent, consider increasing paging capacity or dynamically allocating resources based on network load."
    }
]

def random_time():
    return (datetime.now() - timedelta(minutes=random.randint(0, 1440))).strftime("%Y-%m-%d %H:%M:%S")

def random_id():
    return f"UE{random.randint(100000, 999999)}"
def generate_log_entry():
    issue = random.choice(ISSUES)
    log_entry = issue["log_template"].format(
        ue_id=random_id(),
        cell_id=random.randint(1001, 1099),
        pci=random.randint(1, 512),
        sinr=random.randint(-20, 10),
        rsrp=random.randint(-130, -80),
        source_cell=random.randint(1001, 1099),
        target_cell=random.randint(1001, 1099),
        rsrp_source=random.randint(-130, -80),
        rsrp_target=random.randint(-130, -80),
        interference_level=random.randint(-120, -60),
        suspect_cell=random.randint(1001, 1099),
        upf_ip=f"192.168.{random.randint(0, 255)}.{random.randint(0, 255)}",
        packet_loss=random.randint(0, 100),
        before_drop=random.randint(1000, 5000),
        dropped_ues=random.randint(50, 1000),
        cpu_utilization=random.randint(50, 100),
        slice_type=random.choice(["eMBB", "URLLC", "mMTC"]),
        slice_id=random.randint(1, 1000),
        cause=random.choice(issue["causes"])
    )

    # Check if "Cause: " exists before splitting
    

    return {
        "instruction": "Analyze the following 5G network log trace and provide the root cause and resolution steps.",
        "input": "{} {}".format(random_time(), log_entry),
        
        "output": "Issue: {}\nRoot Cause: {}".format(
            issue["name"], issue["causes"]
        ),
    }
 

# Generate dataset
data = [generate_log_entry() for _ in range(10)]
# dataset = Dataset.from_list(data)
# dataset.push_to_hub("5g_network_debugging_dataset")

In [35]:
data

[{'instruction': 'Analyze the following 5G network log trace and provide the root cause and resolution steps.',
  'input': '2025-03-03 11:59:11 [HO] [ERROR] Handover Failure\nCause: Interference\nSource Cell ID: 1054\nTarget Cell ID: 1085\nUE ID: UE683783\nSINR: -18 dB\nRSRP Source: -122 dBm\nRSRP Target: -95 dBm',
  'output': "Issue: Handover Failure\nRoot Cause: ['Interference', 'Timing Advance Issue', 'Resource Unavailability']"},
 {'instruction': 'Analyze the following 5G network log trace and provide the root cause and resolution steps.',
  'input': '2025-03-03 11:40:11 [SLICING] [ERROR] Slice Configuration Mismatch\nSlice Type: URLLC\nSlice ID: 188\nUE ID: UE776322\nCell ID: 1040',
  'output': "Issue: Network Slicing Misconfiguration\nRoot Cause: ['Misconfigured Slice Parameters', 'Improper Resource Allocation', 'UPF Routing Issue']"},
 {'instruction': 'Analyze the following 5G network log trace and provide the root cause and resolution steps.',
  'input': '2025-03-03 05:59:11 [P

In [None]:
import random
from datasets import Dataset

def random_id():
    return f"UE{random.randint(100000, 999999)}"

# List of 10 very complex 5G issues
COMPLEX_ISSUES = [
    {
        "name": "Multi-step RRC and Handover Failure",
        "log_template": """
        [RRC] [ERROR] RRC Connection Setup Failure
        Cause: {rrc_cause}
        UE ID: {ue_id}
        Cell ID: {cell_id}
        PCI: {pci}
        SINR: {sinr} dB
        RSRP: {rsrp} dBm

        [HO] [ERROR] Handover Failure
        Cause: {ho_cause}
        Source Cell ID: {source_cell}
        Target Cell ID: {target_cell}
        UE ID: {ue_id}
        RSRP (Source): {rsrp_source} dBm
        RSRP (Target): {rsrp_target} dBm
        """,
        "causes": ["Radio Link Failure", "Network Congestion", "Weak Target Cell Signal"],
        "resolution": "Optimize radio parameters, reduce congestion, and tune handover thresholds."
    },
    {
        "name": "Correlated Transport and UE Drop",
        "log_template": """
        [TRANSPORT] [CRITICAL] S1-U Path Failure
        Cause: {transport_cause}
        gNB ID: {gnb_id}
        UPF IP: {upf_ip}
        Packet Loss: {packet_loss}%

        [UE_MGMT] [ALERT] Massive UE Drop Detected
        Total Connected UEs Before Drop: {before_drop}
        Dropped UEs: {dropped_ues}
        Cause: {ue_drop_cause}
        CPU Utilization: {cpu_utilization}%
        """,
        "causes": ["Fiber Link Failure", "Backhaul Congestion", "CPU Overload"],
        "resolution": "Check fiber integrity, optimize backhaul routing, and scale CPU resources."
    },
    {
        "name": "Interference-Induced Handover Loop",
        "log_template": """
        [PHY] [WARNING] High Interference Detected
        Cell ID: {cell_id}
        PCI: {pci}
        Interference Level: {interference_level} dBm
        Suspected Source: {suspect_cell}

        [HO] [ERROR] Handover Failure Due to Interference
        UE ID: {ue_id}
        Source Cell ID: {source_cell}
        Target Cell ID: {target_cell}
        RSRP (Source): {rsrp_source} dBm
        RSRP (Target): {rsrp_target} dBm
        """,
        "causes": ["External RF Interference", "Power Control Issues"],
        "resolution": "Adjust frequency planning, implement interference cancellation techniques."
    },
    {
        "name": "Slice Admission Failure and UE Registration Drop",
        "log_template": """
        [SLICE] [ERROR] Slice Admission Failure
        Slice Type: {slice_type}
        Slice ID: {slice_id}
        Cause: {slice_cause}

        [UE_REG] [ALERT] UE Registration Drop
        UE ID: {ue_id}
        Cause: {ue_drop_cause}
        """,
        "causes": ["Insufficient Slice Resources", "Incorrect Slice Mapping", "Slice Policy Violation"],
        "resolution": "Reconfigure slice policies and ensure resource allocation is optimized."
    },
    {
        "name": "Clock Sync Failure and Paging Congestion",
        "log_template": """
        [SYNC] [CRITICAL] Clock Sync Failure
        gNB ID: {gnb_id}
        Cause: {sync_cause}

        [PAGING] [WARNING] High Paging Load Detected
        Load: {paging_load}%
        Cause: {paging_cause}
        """,
        "causes": ["Clock Drift", "Backhaul Latency", "Sync Signal Loss"],
        "resolution": "Synchronize network clocks and optimize paging configurations."
    },
    {
        "name": "Signaling Storm and Control Plane Congestion",
        "log_template": """
        [CONTROL] [ERROR] Control Plane Congestion
        gNB ID: {gnb_id}
        Cause: {control_cause}

        [CORE] [CRITICAL] Excessive Signaling Messages
        Cause: {signaling_cause}
        """,
        "causes": ["Excessive UE Registrations", "Frequent Session Modifications", "Signaling Storm"],
        "resolution": "Mitigate excessive signaling by adjusting session timers and load balancing."
    },
    {
        "name": "Backhaul Packet Loss and Service Degradation",
        "log_template": """
        [BACKHAUL] [ERROR] High Packet Loss Detected
        Cause: {backhaul_cause}
        Packet Loss: {packet_loss}%
        
        [SERVICE] [CRITICAL] Service Degradation Detected
        UE ID: {ue_id}
        Slice ID: {slice_id}
        SINR: {sinr} dB
        """,
        "causes": ["Fluctuating Fiber Quality", "Routing Instability", "Packet Loss Burst"],
        "resolution": "Stabilize backhaul routing and implement quality-of-service monitoring."
    },
    {
        "name": "Core Network Overload and Call Setup Failure",
        "log_template": """
        [CORE] [ERROR] Core Network Overload
        Load: {cpu_utilization}%
        
        [CALL] [CRITICAL] Call Setup Failure
        Cause: {call_cause}
        """,
        "causes": ["Network Load", "Interference", "Resource Preemption"],
        "resolution": "Balance core network traffic and prioritize critical calls."
    },
    {
        "name": "Unstable Carrier Aggregation and UE Throughput Drop",
        "log_template": """
        [CA] [WARNING] Carrier Aggregation Instability
        Primary Cell: {cell_id}
        Secondary Cell: {source_cell}
        Cause: {ca_cause}

        [THROUGHPUT] [ALERT] UE Throughput Drop
        UE ID: {ue_id}
        RSRP: {rsrp} dBm
        SINR: {sinr} dB
        """,
        "causes": ["Poor Secondary Cell Quality", "Carrier Scheduling Conflict"],
        "resolution": "Optimize carrier aggregation parameters and improve scheduling algorithms."
    },
    {
        "name": "Frequent RLFs and Poor Mobility Performance",
        "log_template": """
        [RLF] [ERROR] Repeated Radio Link Failures
        UE ID: {ue_id}
        Cell ID: {cell_id}
        Cause: {rlf_cause}

        [MOBILITY] [WARNING] Poor Mobility Performance
        Handover Attempts: {ho_attempts}
        Successful Handovers: {ho_success}
        """,
        "causes": ["Coverage Gaps", "Interference", "Improper Power Control"],
        "resolution": "Enhance coverage planning and fine-tune handover parameters."
    }
]

# Generate log samples
def generate_samples(n=5000):
    samples = []
    for _ in range(n):
        issue = random.choice(COMPLEX_ISSUES)
        log = issue["log_template"].format(
            ue_id=random_id(),
            cell_id=random.randint(1001, 1099),
            pci=random.randint(1, 512),
            sinr=random.randint(-20, 10),
            rsrp=random.randint(-130, -80),
            source_cell=random.randint(1001, 1099),
            target_cell=random.randint(1001, 1099),
            rsrp_source=random.randint(-100, -80),
            rsrp_target=random.randint(-130, -90),
            interference_level=random.randint(-90, -60),
            suspect_cell=random.randint(1001, 1099),
            gnb_id=random.randint(5000, 5999),
            upf_ip=f"10.{random.randint(1, 255)}.{random.randint(1, 255)}.{random.randint(1, 255)}",
            packet_loss=random.randint(50, 99),
            before_drop=random.randint(300, 800),
            dropped_ues=random.randint(50, 200),
            cpu_utilization=random.randint(80, 99),
            ho_attempts=random.randint(10, 50),
            ho_success=random.randint(5, 30),
            rrc_cause=random.choice(["Radio Link Failure", "Network Congestion", "Weak Signal Strength"]),
            ho_cause=random.choice(["Handover Timeout", "Target Cell Rejection", "High Latency"]),
            transport_cause=random.choice(["Fiber Link Failure", "Router Overload", "Packet Buffer Overflow"]),
            ue_drop_cause=random.choice(["Unexpected UE Detach", "Core Network Signaling Failure", "Authentication Timeout"]),
            slice_cause=random.choice(["Insufficient Slice Resources", "Incorrect Slice Mapping", "Slice Policy Violation"]),
            sync_cause=random.choice(["Clock Drift", "Backhaul Latency", "Sync Signal Loss"]),
            paging_load=random.randint(70, 100),
            paging_cause=random.choice(["UE Misbehavior", "Core Network Overload", "Paging Rate Exceeded"]),
            control_cause=random.choice(["Excessive UE Registrations", "Frequent Session Modifications", "Signaling Storm"]),
            backhaul_cause=random.choice(["Fluctuating Fiber Quality", "Routing Instability", "Packet Loss Burst"]),
            call_cause=random.choice(["Network Load", "Interference", "Resource Preemption"])
        )
        samples.append({"log": log, "issue": issue["name"], "resolution": issue["resolution"]})
    return samples

In [None]:
import google.generativeai as genai

GOOGLE_API_KEY=
genai.configure(api_key=GOOGLE_API_KEY)
def get_model_resp(prompt):




    try:
      model = genai.GenerativeModel('models/gemini-1.5-flash')
      response = model.generate_content(
          prompt,
          generation_config = genai.GenerationConfig(

              temperature=0.1,
              top_p=0.90,
          )
      )
      return response.text
    except Exception as e:
      print(e)
      return False


response = get_model_resp("Explain how AI works")
if response:

  print(response)

Artificial intelligence (AI) is a broad field encompassing many techniques, but at its core, it aims to create systems that can perform tasks that typically require human intelligence.  These tasks include learning, reasoning, problem-solving, perception, and natural language understanding.  There's no single "how it works" answer, as different AI approaches use different methods. However, we can break it down into key concepts:

**1. Data is King:**  AI systems learn from data.  The more relevant and high-quality data they are trained on, the better they perform. This data can be anything from images and text to sensor readings and financial transactions.

**2. Algorithms are the Engine:**  Algorithms are sets of rules and statistical techniques that AI systems use to process data and learn patterns.  These algorithms are the "brains" of the system, determining how it analyzes information and makes decisions.  Different algorithms are suited for different tasks.  Examples include:

* 

In [76]:
def get_prompt(log_trace,root_cause):
    prompt  = f''' You are an expert network engineer tasked with debugging a 5G network issue. 
    Analyze the following 5G network log trace and the root cause.
    Think step by step and  do clear reasoning to  provide clear  to the point  resolution steps on basis of the log trace and root cause in 100 words.
    
    
     Output guidelines:
    -Please always respond with a valid well-formed JSON object with the following format.
    -Do not include newline charecters in output. Dont include word json in output.

    {{
      "resolution":<<output the resolution steps here>>
      "chain_of_thoughts":<<output the step by step by thinking to arrive at resolution here>>
    }}
    log trace: {log_trace}
    root cause: {root_cause}
    '''
    
    return prompt


In [77]:
print(get_prompt(data[0]["input"],data[0]["output"]))

 You are an expert network engineer tasked with debugging a 5G network issue. 
    Analyze the following 5G network log trace and the root cause.
    Think step by step and  do clear reasoning to  provide clear  to the point  resolution steps on basis of the log trace and root cause in 100 words.
    
    
     Output guidelines:
    -Please always respond with a valid well-formed JSON object with the following format.
    -Do not include newline charecters in output. Dont include word json in output.

    {
      "resolution":<<output the resolution steps here>>
      "chain_of_thoughts":<<output the step by step by thinking to arrive at resolution here>>
    }
UE ID: UE730893
Cell ID: 1002
Interference Level: -61 dBm
    root cause: Issue: Interference Issue
Root Cause: ['External Interference', 'Hardware Failure', 'Neighboring Cell Congestion']
    


In [85]:
for rows in data:
    log_trace = rows['input']
    root_cause = rows['output']
    prompt = get_prompt(log_trace,root_cause)
    print(log_trace)
    print(root_cause)
    response = get_model_resp(prompt)
    if response:
        # print(response)
        resp   = json.loads(response)
        print("reasoning:",resp['chain_of_thoughts'])
        print("resolution:",resp['resolution'])
        
        print("------------------------\n\n")
    else:
        print("Failed to get response")
    

UE ID: UE730893
Cell ID: 1002
Interference Level: -61 dBm
Issue: Interference Issue
Root Cause: ['External Interference', 'Hardware Failure', 'Neighboring Cell Congestion']
reasoning: The log shows high interference (-61 dBm) affecting UE730893 on Cell ID 1002. The root cause analysis points to external interference, hardware failure, or neighboring cell congestion.  To resolve, we systematically investigate each possibility. First, check for external sources like microwaves or other radio emitters near the cell site. Second, inspect UE730893 and the base station equipment for any hardware problems. Finally, analyze network load on Cell ID 1002 and its neighbors to identify potential congestion. Based on the findings, we'll adjust antenna positioning, replace faulty hardware, or optimize cell planning to mitigate interference and congestion.
resolution: Verify external interference sources near Cell ID 1002. Check UE730893's hardware for faults. Analyze neighboring cell load for conges