# Phonepe Project

## Aggregated datas convert JSON into CSVs
- data
  - aggregated
    - transaction
    - insurance
    - user

In [8]:
import os
import json
import pandas as pd

# Base folder of Aggregated data
base_folder = r"C:\Users\YAZHINI.YAZHNI-EM-SYS\Downloads\pulse-master\phonepe_project\data\aggregated"

# Output folder for CSVs
output_folder = os.path.join(base_folder, "csv_outputs")
os.makedirs(output_folder, exist_ok=True)

# Function to process aggregated JSON
def process_aggregated_json(category):
    folder_path = os.path.join(base_folder, category, "country", "india", "state")
    all_rows = []
    
    for state in os.listdir(folder_path):
        state_path = os.path.join(folder_path, state)
        if os.path.isdir(state_path):
            for year in os.listdir(state_path):
                year_path = os.path.join(state_path, year)
                if os.path.isdir(year_path):
                    for file in os.listdir(year_path):
                        if file.endswith(".json"):
                            file_path = os.path.join(year_path, file)
                            with open(file_path, 'r') as f:
                                data = json.load(f)
                            
                            # Aggregated Transaction & Insurance
                            if category in ["transaction", "insurance"]:
                                trans_data = data.get("data", {}).get("transactionData", [])
                                for item in trans_data:
                                    for instr in item.get("paymentInstruments", []):
                                        all_rows.append({
                                            "state": state,
                                            "year": year,
                                            "quarter": file.replace(".json", ""),
                                            "name": item.get("name"),
                                            "type": instr.get("type"),
                                            "count": instr.get("count"),
                                            "amount": instr.get("amount"),
                                            "from": data["data"].get("from"),
                                            "to": data["data"].get("to")
                                        })
                            
                            # Aggregated User
                            elif category == "user":
                                users_data = data.get("data", {})
                                aggregated = users_data.get("aggregated", {})
                                all_rows.append({
                                    "state": state,
                                    "year": year,
                                    "quarter": file.replace(".json", ""),
                                    "registeredUsers": aggregated.get("registeredUsers"),
                                    "appOpens": aggregated.get("appOpens")
                                })
                                # Safely check if 'usersByDevice' exists
                                users_by_device = users_data.get("usersByDevice")
                                if users_by_device:  # Only iterate if not None
                                    for device in users_by_device:
                                        all_rows.append({
                                            "state": state,
                                            "year": year,
                                            "quarter": file.replace(".json", ""),
                                            "device_brand": device.get("brand"),
                                            "device_count": device.get("count"),
                                            "device_percentage": device.get("percentage")
                                        })
    
    # Convert to DataFrame
    df = pd.DataFrame(all_rows)
    output_csv = os.path.join(output_folder, f"{category}_data.csv")
    df.to_csv(output_csv, index=False)
    print(f"CSV created for {category} at: {output_csv}, total rows: {len(df)}")

# Convert Aggregated datasets one by one
categories = ["transaction", "user", "insurance"]
for cat in categories:
    process_aggregated_json(cat)


CSV created for transaction at: C:\Users\YAZHINI.YAZHNI-EM-SYS\Downloads\pulse-master\phonepe_project\data\aggregated\csv_outputs\transaction_data.csv, total rows: 5034
CSV created for user at: C:\Users\YAZHINI.YAZHNI-EM-SYS\Downloads\pulse-master\phonepe_project\data\aggregated\csv_outputs\user_data.csv, total rows: 7740
CSV created for insurance at: C:\Users\YAZHINI.YAZHNI-EM-SYS\Downloads\pulse-master\phonepe_project\data\aggregated\csv_outputs\insurance_data.csv, total rows: 682


## Map datas convert JSON into CSVs
- data
  - map
    - transaction
    - insurance
    - user

In [9]:
import os
import json
import pandas as pd

# Base folder of Map data
base_folder = r"C:\Users\YAZHINI.YAZHNI-EM-SYS\Downloads\pulse-master\phonepe_project\data\map"

# Output folder for CSVs
output_folder = os.path.join(base_folder, "csv_outputs")
os.makedirs(output_folder, exist_ok=True)

# Function to process Map JSON
def process_map_json(category):
    folder_path = os.path.join(base_folder, category, "hover", "country", "india", "state")
    all_rows = []
    
    for state in os.listdir(folder_path):
        state_path = os.path.join(folder_path, state)
        if os.path.isdir(state_path):
            for year in os.listdir(state_path):
                year_path = os.path.join(state_path, year)
                if os.path.isdir(year_path):
                    for file in os.listdir(year_path):
                        if file.endswith(".json"):
                            file_path = os.path.join(year_path, file)
                            with open(file_path, 'r') as f:
                                data = json.load(f)
                            
                            year_val = year
                            quarter = file.replace(".json", "")
                            
                            # hoverDataList (transaction/insurance)
                            hover_list = data.get("data", {}).get("hoverDataList")
                            if hover_list:
                                for item in hover_list:
                                    for metric in item.get("metric", []):
                                        all_rows.append({
                                            "state": state,
                                            "year": year_val,
                                            "quarter": quarter,
                                            "name": item.get("name"),
                                            "type": metric.get("type"),
                                            "count": metric.get("count"),
                                            "amount": metric.get("amount")
                                        })
                            
                            # hoverData dict (user)
                            hover_dict = data.get("data", {}).get("hoverData")
                            if hover_dict:
                                for name, value in hover_dict.items():
                                    all_rows.append({
                                        "state": state,
                                        "year": year_val,
                                        "quarter": quarter,
                                        "name": name,
                                        "registeredUsers": value.get("registeredUsers"),
                                        "appOpens": value.get("appOpens")
                                    })
    
    # Convert to DataFrame
    df = pd.DataFrame(all_rows)
    output_csv = os.path.join(output_folder, f"{category}_data.csv")
    df.to_csv(output_csv, index=False)
    print(f" CSV created for {category} at: {output_csv}, total rows: {len(df)}")

# Map categories
map_categories = ["transaction", "user", "insurance"]
for cat in map_categories:
    process_map_json(cat)


 CSV created for transaction at: C:\Users\YAZHINI.YAZHNI-EM-SYS\Downloads\pulse-master\phonepe_project\data\map\csv_outputs\transaction_data.csv, total rows: 20604
 CSV created for user at: C:\Users\YAZHINI.YAZHNI-EM-SYS\Downloads\pulse-master\phonepe_project\data\map\csv_outputs\user_data.csv, total rows: 20608
 CSV created for insurance at: C:\Users\YAZHINI.YAZHNI-EM-SYS\Downloads\pulse-master\phonepe_project\data\map\csv_outputs\insurance_data.csv, total rows: 13876


## Top datas convert JSON into CSVs
- data
  - Top
    - transaction
    - insurance
    - user

In [11]:
import os
import json
import pandas as pd

# Base folder of Top data
base_folder = r"C:\Users\YAZHINI.YAZHNI-EM-SYS\Downloads\pulse-master\phonepe_project\data\top"

# Output folder for CSVs
output_folder = os.path.join(base_folder, "csv_outputs")
os.makedirs(output_folder, exist_ok=True)

# Function to process Top JSON
def process_top_json(category):
    folder_path = os.path.join(base_folder, category, "country", "india", "state")
    all_rows = []
    
    for state in os.listdir(folder_path):
        state_path = os.path.join(folder_path, state)
        if os.path.isdir(state_path):
            for year in os.listdir(state_path):
                year_path = os.path.join(state_path, year)
                if os.path.isdir(year_path):
                    for file in os.listdir(year_path):
                        if file.endswith(".json"):
                            file_path = os.path.join(year_path, file)
                            with open(file_path, 'r') as f:
                                data = json.load(f)
                            
                            year_val = year
                            quarter = file.replace(".json", "")
                            top_data = data.get("data", {})

                            # Safely iterate if list exists
                            states_list = top_data.get("states") or []
                            districts_list = top_data.get("districts") or []
                            pincodes_list = top_data.get("pincodes") or []

                            # States
                            for s in states_list:
                                if "metric" in s:
                                    all_rows.append({
                                        "level": "state",
                                        "entity": s.get("entityName"),
                                        "type": s["metric"].get("type"),
                                        "count": s["metric"].get("count"),
                                        "amount": s["metric"].get("amount"),
                                        "year": year_val,
                                        "quarter": quarter,
                                        "dataset": category
                                    })
                                else:
                                    all_rows.append({
                                        "level": "state",
                                        "entity": s.get("name"),
                                        "registeredUsers": s.get("registeredUsers"),
                                        "year": year_val,
                                        "quarter": quarter,
                                        "dataset": category
                                    })
                            # Districts
                            for d in districts_list:
                                if "metric" in d:
                                    all_rows.append({
                                        "level": "district",
                                        "entity": d.get("entityName"),
                                        "type": d["metric"].get("type"),
                                        "count": d["metric"].get("count"),
                                        "amount": d["metric"].get("amount"),
                                        "year": year_val,
                                        "quarter": quarter,
                                        "dataset": category
                                    })
                                else:
                                    all_rows.append({
                                        "level": "district",
                                        "entity": d.get("name"),
                                        "registeredUsers": d.get("registeredUsers"),
                                        "year": year_val,
                                        "quarter": quarter,
                                        "dataset": category
                                    })
                            # Pincodes
                            for p in pincodes_list:
                                if "metric" in p:
                                    all_rows.append({
                                        "level": "pincode",
                                        "entity": p.get("entityName"),
                                        "type": p["metric"].get("type"),
                                        "count": p["metric"].get("count"),
                                        "amount": p["metric"].get("amount"),
                                        "year": year_val,
                                        "quarter": quarter,
                                        "dataset": category
                                    })
                                else:
                                    all_rows.append({
                                        "level": "pincode",
                                        "entity": p.get("name"),
                                        "registeredUsers": p.get("registeredUsers"),
                                        "year": year_val,
                                        "quarter": quarter,
                                        "dataset": category
                                    })

    # Convert to DataFrame
    df = pd.DataFrame(all_rows)
    output_csv = os.path.join(output_folder, f"{category}_data.csv")
    df.to_csv(output_csv, index=False)
    print(f" CSV created for {category} at: {output_csv}, total rows: {len(df)}")

# Top categories
top_categories = ["transaction", "user", "insurance"]
for cat in top_categories:
    process_top_json(cat)



 CSV created for transaction at: C:\Users\YAZHINI.YAZHNI-EM-SYS\Downloads\pulse-master\phonepe_project\data\top\csv_outputs\transaction_data.csv, total rows: 18295
 CSV created for user at: C:\Users\YAZHINI.YAZHNI-EM-SYS\Downloads\pulse-master\phonepe_project\data\top\csv_outputs\user_data.csv, total rows: 18296
 CSV created for insurance at: C:\Users\YAZHINI.YAZHNI-EM-SYS\Downloads\pulse-master\phonepe_project\data\top\csv_outputs\insurance_data.csv, total rows: 12276
