In [11]:
# -------------------------
# 📌 Step 1: Import libraries
# -------------------------
import pandas as pd
import uuid
import json

# -------------------------
# 📌 Step 2: Define function to load CSV with flexible column mapping
# -------------------------
def load_csv_with_flex(file_path):
    # Read CSV
    df = pd.read_csv(r"C:\Users\-ARPIT-\Downloads\lemonrice_with_phone.csv")

    # Normalize column names (lowercase, strip spaces)
    df.columns = df.columns.str.strip().str.lower()

    # Define expected mappings (aliasing for flexibility)
    col_map = {
        "timestamp": ["timestamp", "time", "date"],
        "session_id": ["session_id", "sid"],
        "src_ip": ["src_ip", "source_ip", "ip_src", "ip_a"],
        "src_port": ["src_port", "source_port", "sport"],
        "dst_ip": ["dst_ip", "dest_ip", "destination_ip", "ip_dst", "ip_b"],
        "dst_port": ["dst_port", "dest_port", "dport"],
        "protocol": ["protocol", "proto"],
        "duration_sec": ["duration_sec", "duration"],
        "bytes": ["bytes", "size", "traffic"],
        "cell_tower_lat": ["cell_tower_lat", "latitude", "lat"],
        "cell_tower_lon": ["cell_tower_lon", "longitude", "lon"],
        "anomalystatus": ["anomalystatus", "anomaly", "status"],
        "phone a": ["phone a", "phone_a", "src_phone", "phone number a"],
        "phone b": ["phone b", "phone_b", "dst_phone", "phone number b"]

    }

    # Create reverse lookup
    norm_map = {}
    for key, aliases in col_map.items():
        for alias in aliases:
            if alias in df.columns:
                norm_map[alias] = key

    # Rename columns consistently
    df = df.rename(columns=norm_map)

    return df

# -------------------------
# 📌 Step 3: Load dataset
# -------------------------
file_path = r"C:\Users\-ARPIT-\Downloads\ACTUAL DATA.csv"  # ✅ raw string
df = load_csv_with_flex(file_path)

print("✅ Data loaded with columns:", df.columns.tolist())

# -------------------------
# 📌 Step 4: Create unique node mapping (ip + phone → uuid)
# -------------------------
node_dict = {}
for _, row in df.iterrows():
    for ip, phone in [(row["src_ip"], row.get("phone a", None)),
                      (row["dst_ip"], row.get("phone b", None))]:
        key = (ip, phone)
        if key not in node_dict:
            node_dict[key] = str(uuid.uuid4())

print("✅ Total unique nodes:", len(node_dict))

# -------------------------
# 📌 Step 5: Build session response array
# -------------------------
response = []
for _, row in df.iterrows():
    src_key = (row["src_ip"], row.get("phone a", None))
    dst_key = (row["dst_ip"], row.get("phone b", None))

    src_obj = {
        "node_id": node_dict[src_key],
        "ip": row["src_ip"],
        "port": row["src_port"],
        "phone": row.get("phone a", None),
        "tower_lat": row.get("cell_tower_lat", None),
        "tower_lon": row.get("cell_tower_lon", None),
    }

    des_obj = {
        "node_id": node_dict[dst_key],
        "ip": row["dst_ip"],
        "port": row["dst_port"],
        "phone": row.get("phone b", None),
        "tower_lat": row.get("cell_tower_lat", None),
        "tower_lon": row.get("cell_tower_lon", None),
    }

    response.append({
        "session_id": row.get("session_id", None),
        "protocol": row.get("protocol", None),
        "duration": row.get("duration_sec", None),
        "bytes": row.get("bytes", None),
        "anomalystatus": row.get("anomalystatus", None),
        "src": src_obj,
        "des": des_obj
    })

# -------------------------
# 📌 Step 6: Save session dictionary
# -------------------------
session = {"sessions": response}

with open("session_output.json", "w") as f:
    json.dump(session, f, indent=4)

print("✅ Session data saved to session_output.json")


✅ Data loaded with columns: ['timestamp', 'session_id', 'src_ip', 'src_port', 'dst_ip', 'dst_port', 'protocol', 'duration_sec', 'bytes', 'cell_tower_lat', 'cell_tower_lon', 'anomalystatus', 'phone a', 'phone b']
✅ Total unique nodes: 3452
✅ Session data saved to session_output.json


In [12]:
# ✅ Print first 10 nodes (IP + phone + UUID)
print("\n🔹 Sample Nodes (Top 10):")
for i, ((ip, phone), uid) in enumerate(node_dict.items()):
    print(f"{i+1}. IP: {ip}, Phone: {phone}, UUID: {uid}")
    if i >= 30:  # Only first 10
        break

# ✅ Print first 5 sessions
print("\n🔹 Sample Sessions (Top 5):")
for i, sess in enumerate(response[:5]):
    print(json.dumps(sess, indent=4))  # Pretty print JSON



🔹 Sample Nodes (Top 10):
1. IP: 115.96.187.198, Phone: 7806964299, UUID: 778adac6-d884-44eb-bbef-cefb1d307bd6
2. IP: 115.96.225.98, Phone: 7616051499, UUID: 1dd3eb3d-c058-4d8c-9070-434ea8a5d0ba
3. IP: 47.8.167.247, Phone: 8187667228, UUID: 2c636c7b-54d2-4058-9dbd-9ffad8e70b05
4. IP: 47.8.203.46, Phone: 9744503964, UUID: 4d8fa8c1-33cd-4b1c-b978-eb1658975ab7
5. IP: 117.194.115.59, Phone: 9988846648, UUID: 54feaddc-9496-4b93-9974-b17b31c7e808
6. IP: 106.51.62.205, Phone: 8573932561, UUID: c5a709ec-1d4b-4551-90ce-42433e0744f3
7. IP: 115.96.7.230, Phone: 9141894718, UUID: 0d7b2929-41bc-44c3-8ae2-7b804b6d0392
8. IP: 106.51.108.193, Phone: 9273452574, UUID: d8ad32bf-33a3-45fc-b95d-edf8888cd167
9. IP: 47.8.76.48, Phone: 9995415428, UUID: 79c60d7f-734d-47d7-bc7b-58288944abb3
10. IP: 47.8.57.166, Phone: 9778019931, UUID: 18c9b635-7d4c-4c06-a8bc-7d3f99802637
11. IP: 117.194.210.64, Phone: 8396362843, UUID: c863ce4a-b84b-49e9-89ad-9e0be45f5f55
12. IP: 115.96.245.43, Phone: 8758079832, UUID: b8621