In [147]:
import os
import json
import pandas as pd
from glob import glob
import translators as ts

In [156]:
# Helper: translate UTF-8 encoded Nepali text to English
def utfToEng(text):
    # If the input is in bytes, decode it first
    print(f"{text} -- {type(text)}")
    if isinstance(text, bytes):
        try:
            text = text.decode('utf-8')
        except UnicodeDecodeError as e:
            print(f"Decoding error: {e} -> bytes: {text}")
            return text

    # Now process if it's a proper non-empty string
    if isinstance(text, str):
        try:
            return ts.translate_text(query_text=text, from_language='ne', to_language='en', translator='google')
        except Exception as e:
            print(f"Translation error: {e} -> text: {text}")
            return text
    return text


In [157]:
utfToEng("\u093f\u0935\u0927\u093e\u0932\u092f \u092e\u093e\u0917\u0930\u094d")

िवधालय मागर् -- <class 'str'>


'Duration'

In [150]:
json_folder=r"C:\Users\ACER\Desktop\python projects\jsondatapan\data"
all_data=[]

In [151]:
# --- Helper: Flatten panRegistrationDetail ---
def flatten_pan_registration(details):
    flattened={}
    for i, entry in enumerate(details, start=1):
        flattened[f"pan_{i}"] = entry.get("pan")
        flattened[f"acctType_{i}"] = entry.get("acctType")
        flattened[f"registrationDate_{i}"] = entry.get("registrationDate")
        flattened[f"accountStatus_{i}"] = entry.get("accountStatus")
        flattened[f"filing_Period_{i}"] = entry.get("filing_Period")
    return flattened
    

In [152]:
#looping through the json files
for file_path in glob(os.path.join(json_folder, "*.json")):
    print(f"Reading: {file_path}")
    entry = {"source_file": os.path.basename(file_path)}
    with open(file_path, "r", encoding="utf-8") as f:
        try:
            raw = json.load(f)
            if raw.get("panDetails"):
    
                pan = raw["panDetails"][0]
                entry.update({
                "pan": pan.get("pan"),
                
                "trade_Name_Eng": pan.get("trade_Name_Eng"),
                "ward_No": pan.get("ward_No"),
                "street_Name": utfToEng(pan.get("street_Name")),

                "vdc_Town": pan.get("vdc_Town"),
                "telephone": pan.get("telephone"),
                "mobile": pan.get("mobile"),
                "eff_Reg_Date": pan.get("eff_Reg_Date"),
                "acctType": pan.get("acctType"),
                "office_Name": pan.get("office_Name"),
                #  "office_nam_Eng": utftoeng(pan.get("office_Name"))
                "account_Status": pan.get("account_Status"),
                "is_Personal": pan.get("is_Personal")
                })
                
            if raw.get("panRegistrationDetail"):
                entry.update(flatten_pan_registration(raw["panRegistrationDetail"]))
            
            if raw.get("panTaxClearance"):
                if raw["panTaxClearance"]:  # Check if array is not empty
                    tax = raw["panTaxClearance"][0]
        
                    entry["tax_clearance_exists"] = tax.get("exists_Yn")
                    entry["tax_clearance_fy"] = tax.get("fiscal_Year")
                    
            all_data.append(entry)
            print(f"✅ Processed: {os.path.basename(file_path)} - {len(entry)} fields extracted")
            
        except json.JSONDecodeError as e:
            print(f"Error reading {file_path}: {e}")
            continue
        

Reading: C:\Users\ACER\Desktop\python projects\jsondatapan\data\106796229_17-07-2025-16-04.json
मैत्रीनगर -- <class 'str'>
Translation error: module 'translators' has no attribute 'google' -> text: मैत्रीनगर
✅ Processed: 106796229_17-07-2025-16-04.json - 25 fields extracted
Reading: C:\Users\ACER\Desktop\python projects\jsondatapan\data\111935785_17-07-2025-18-37.json
िवधालय मागर् -- <class 'str'>
Translation error: module 'translators' has no attribute 'google' -> text: िवधालय मागर्
✅ Processed: 111935785_17-07-2025-18-37.json - 20 fields extracted
Reading: C:\Users\ACER\Desktop\python projects\jsondatapan\data\response_1752984286318.json
ह्युमत् -- <class 'str'>
Translation error: module 'translators' has no attribute 'google' -> text: ह्युमत्
✅ Processed: response_1752984286318.json - 20 fields extracted
Reading: C:\Users\ACER\Desktop\python projects\jsondatapan\data\response_1752984624722.json
कोलही टोल -- <class 'str'>
Translation error: module 'translators' has no attribute 'goog

In [153]:


# Create DataFrame
df = pd.DataFrame(all_data)

# Show sample
print(df.head())

# Save to CSV (optional)
df.to_csv("flattenedpandata.csv", index=False, encoding="utf-8")

                       source_file        pan                  trade_Name_Eng  \
0  106796229_17-07-2025-16-04.json  106796229            Shrestha Liquar Stor   
1  111935785_17-07-2025-18-37.json  111935785                   Bikram  Duwal   
2      response_1752984286318.json  115853324                    Saroj  ahahi   
3      response_1752984624722.json  133774304  New K.C. Electricals Suppliers   

  ward_No   street_Name                 vdc_Town   telephone      mobile  \
0       2     मैत्रीनगर     कीर्तिपुर, नगरपालिका  9801146000  9801146000   
1       5  िवधालय मागर्  मध्यपुर थिमी, नगरपालिका        None  9840011999   
2      12       ह्युमत्   काठमाडौँ, महानगरपालिका        None  9802935100   
3       5     कोलही टोल          लमही, नगरपालिका        None  9810960996   

  eff_Reg_Date acctType  ... registrationDate_1  \
0   2072.05.27       10  ...         2072.05.27   
1   2076.04.06       30  ...         2076.04.06   
2   2076.05.16       30  ...         2076.05.16   
3   2080.