In [5]:
import pandas as pd
import re

df = pd.read_csv('/content/HSN_SAC.xlsx - HSN_MSTR.csv')

df.columns = df.columns.str.strip()

print(" Columns detected in CSV:", df.columns.tolist())

if 'HSNCode' not in df.columns and 'HSN Code' in df.columns:
    df.rename(columns={'HSN Code': 'HSNCode'}, inplace=True)

df['HSNCode'] = df['HSNCode'].astype(str).str.strip()

def is_valid_format(code):
    return bool(re.fullmatch(r'\d{2,8}', code))

def exists_in_master(code):
    return code in df['HSNCode'].values

def hierarchical_check(code):
    parent_levels = [code[:i] for i in range(2, len(code), 2)]
    missing = [p for p in parent_levels if p not in df['HSNCode'].values]
    return missing

def validate_hsn_code(code):
    code = code.strip()
    result = {'HSNCode': code}

    if not is_valid_format(code):
        result['Status'] = ' Invalid Format'
        result['Reason'] = 'Code must be 2-8 numeric digits'
        return result

    if exists_in_master(code):
        result['Status'] = ' Valid'
        result['Description'] = df[df['HSNCode'] == code]['Description'].values[0]
        missing = hierarchical_check(code)
        if missing:
            result['HierarchyWarning'] = f" Missing parent codes: {', '.join(missing)}"
    else:
        result['Status'] = ' Invalid Code'
        result['Reason'] = 'Code not found in master dataset'

    return result

codes_to_test = ['01', '0101', '01011010', '9999', '01A']
print("\n Validation Results:")
for code in codes_to_test:
    print(validate_hsn_code(code))


👉 Columns detected in CSV: ['HSNCode', 'Description']

🔎 Validation Results:
{'HSNCode': '01', 'Status': '✅ Valid', 'Description': 'LIVE ANIMALS'}
{'HSNCode': '0101', 'Status': '✅ Valid', 'Description': 'LIVE HORSES, ASSES, MULES AND HINNIES.'}
{'HSNCode': '9999', 'Status': '❌ Invalid Code', 'Reason': 'Code not found in master dataset'}
{'HSNCode': '01A', 'Status': '❌ Invalid Format', 'Reason': 'Code must be 2-8 numeric digits'}
