In [17]:
import os
import pandas as pd
from datetime import datetime
import json
import gc

folder_path_demanddetails = '/home/prerna/Punjab/punjab-data-prod-analysis/abohar/output_demand_details/'

# read active properties & needed columns
property_df = pd.read_csv(
    '/home/prerna/Punjab/punjab-data-prod-analysis/abohar/eg_pt_property.csv',
    usecols=['id', 'propertyid', 'tenantid', 'createdtime', 'additionaldetails', 'ownershipcategory', 'status', 'usagecategory']
)
property_df = property_df[property_df['status'] == 'ACTIVE'].copy()

# read units
unit_df = pd.read_csv(
    '/home/prerna/Punjab/punjab-data-prod-analysis/abohar/eg_pt_unit.csv',
    usecols=['propertyid', 'occupancytype']
)



# read demand
demand_df = pd.read_csv(
    '/home/prerna/Punjab/punjab-data-prod-analysis/abohar/egbs_demand_v1.csv',
    dtype={"consumercode": str},
    low_memory=False,
    usecols=['id', 'taxperiodfrom', 'taxperiodto', 'consumercode', 'status']
)
demand_df = demand_df[demand_df['status'] == 'ACTIVE'].copy()


# read demand details (memory‑efficient, in chunks)
all_chunks = []
needed_cols = ['demandid', 'taxamount', 'collectionamount', 'taxheadcode']
for filename in os.listdir(folder_path_demanddetails):
    if filename.endswith('.csv'):
        file_path = os.path.join(folder_path_demanddetails, filename)
        print(f'Loading: {file_path}')
        chunk = pd.read_csv(file_path, usecols=needed_cols)
        all_chunks.append(chunk)
demand_details_df = pd.concat(all_chunks, ignore_index=True)
del all_chunks; gc.collect()

print("✅ Loaded data")

Loading: /home/prerna/Punjab/punjab-data-prod-analysis/abohar/output_demand_details/output_4.csv
Loading: /home/prerna/Punjab/punjab-data-prod-analysis/abohar/output_demand_details/output_34.csv
Loading: /home/prerna/Punjab/punjab-data-prod-analysis/abohar/output_demand_details/output_79.csv
Loading: /home/prerna/Punjab/punjab-data-prod-analysis/abohar/output_demand_details/output_38.csv
Loading: /home/prerna/Punjab/punjab-data-prod-analysis/abohar/output_demand_details/output_6.csv
Loading: /home/prerna/Punjab/punjab-data-prod-analysis/abohar/output_demand_details/output_60.csv
Loading: /home/prerna/Punjab/punjab-data-prod-analysis/abohar/output_demand_details/output_44.csv
Loading: /home/prerna/Punjab/punjab-data-prod-analysis/abohar/output_demand_details/output_83.csv
Loading: /home/prerna/Punjab/punjab-data-prod-analysis/abohar/output_demand_details/output_89.csv
Loading: /home/prerna/Punjab/punjab-data-prod-analysis/abohar/output_demand_details/output_17.csv
Loading: /home/prerna/

In [18]:
print(len(property_df))         # number of rows in properties
print(len(unit_df))             # number of rows in units
print(len(demand_df))   # number of rows in demand details
print(len(demand_details_df))   # number of rows in demand details

43158
103319
178240
5217011


In [19]:
# join pt and unit
joined_pt_unit = property_df.merge(unit_df, left_on='id', right_on='propertyid', how='left', suffixes=('_property', '_unit'))
del property_df, unit_df; gc.collect()
joined_pt_unit.head()
print(joined_pt_unit['id'].nunique())

43158


In [20]:
joined_pt_unit.head()

Unnamed: 0,id,propertyid_property,tenantid,status,ownershipcategory,usagecategory,createdtime,additionaldetails,propertyid_unit,occupancytype
0,7eafe3da-ad69-441a-a982-899a80561e9b,PT-601-2154555,pb.abohar,ACTIVE,INDIVIDUAL.SINGLEOWNER,RESIDENTIAL,1749139607554,,,
1,d4572a6e-e993-4245-a7cd-7ff83d324e51,PT-601-2154556,pb.abohar,ACTIVE,INDIVIDUAL.SINGLEOWNER,RESIDENTIAL,1749139609529,,,
2,2fb0fe75-4e5b-43d2-b681-7b7b9e6251be,PT-601-2110140,pb.abohar,ACTIVE,INDIVIDUAL.SINGLEOWNER,RESIDENTIAL,1748845265950,"{""yearConstruction"": null}",2fb0fe75-4e5b-43d2-b681-7b7b9e6251be,SELFOCCUPIED
3,2fb0fe75-4e5b-43d2-b681-7b7b9e6251be,PT-601-2110140,pb.abohar,ACTIVE,INDIVIDUAL.SINGLEOWNER,RESIDENTIAL,1748845265950,"{""yearConstruction"": null}",2fb0fe75-4e5b-43d2-b681-7b7b9e6251be,SELFOCCUPIED
4,86fa4b2d-3a27-4b7c-b5bb-c822cb1a6a80,PT-601-2154557,pb.abohar,ACTIVE,INDIVIDUAL.SINGLEOWNER,RESIDENTIAL,1749139611492,,,


In [21]:
# join demand and demand details
joined_demand = demand_df.merge(demand_details_df, left_on='id', right_on='demandid', how='left', suffixes=('_demand', '_detail'))
print(joined_demand['id'].nunique())
del demand_details_df, demand_df; gc.collect()
joined_demand.head()

178240


Unnamed: 0,id,consumercode,taxperiodfrom,taxperiodto,status,demandid,taxheadcode,taxamount,collectionamount
0,18839,PT-601-010237,1459468800000,1491004799000,ACTIVE,18839,PT_TIME_REBATE,0.0,0.0
1,18839,PT-601-010237,1459468800000,1491004799000,ACTIVE,18839,PT_TIME_PENALTY,158.16,158.16
2,18839,PT-601-010237,1459468800000,1491004799000,ACTIVE,18839,PT_TIME_INTEREST,279.62,279.62
3,18839,PT-601-010237,1459468800000,1491004799000,ACTIVE,18839,PT_ADHOC_REBATE,-51.0,-51.0
4,18839,PT-601-010237,1459468800000,1491004799000,ACTIVE,18839,PT_ROUNDOFF,-0.38,-0.38


In [22]:
import pytz

# Correct: parse as datetime from milliseconds since epoch
joined_demand['taxperiodfrom'] = pd.to_datetime(joined_demand['taxperiodfrom'], unit='ms', utc=True)
joined_demand['taxperiodto'] = pd.to_datetime(joined_demand['taxperiodto'], unit='ms', utc=True)

# Convert to IST (Asia/Kolkata)
ist = pytz.timezone('Asia/Kolkata')
joined_demand['taxperiodfrom'] = joined_demand['taxperiodfrom'].dt.tz_convert(ist)
joined_demand['taxperiodto'] = joined_demand['taxperiodto'].dt.tz_convert(ist)

# Financial year calculation
def get_fy(date):
    if date.month >= 4:
        fy_start = date.year
        fy_end = date.year + 1
    else:
        fy_start = date.year - 1
        fy_end = date.year
    return f"{fy_start}-{str(fy_end)[-2:]}"

joined_demand['fy'] = joined_demand['taxperiodfrom'].apply(get_fy)

# Group by consumercode
result = joined_demand.groupby('consumercode')['fy'].agg(['min', 'max']).reset_index()
result.rename(columns={'min': 'earliest_fy', 'max': 'latest_fy'}, inplace=True)

print(result)

        consumercode earliest_fy latest_fy
0      PT-601-010237     2016-17   2024-25
1      PT-601-018942     2014-15   2024-25
2      PT-601-019040     2014-15   2024-25
3      PT-601-019057     2014-15   2024-25
4      PT-601-019216     2014-15   2025-26
...              ...         ...       ...
46328  PT-601-999774     2017-18   2024-25
46329  PT-601-999830     2014-15   2025-26
46330  PT-601-999883     2014-15   2024-25
46331  PT-601-999888     2016-17   2024-25
46332  PT-601-999942     2020-21   2024-25

[46333 rows x 3 columns]


In [23]:
# Merge latest_fy onto joined_demand by consumercode
joined = joined_demand.merge(
    result[['consumercode', 'latest_fy']],
    on='consumercode',
    how='left'
)

latest_demand = joined[joined['fy'] == joined['latest_fy']]

demand_sum = latest_demand.groupby('consumercode')['taxamount'].sum().reset_index()
demand_sum.rename(columns={'taxamount':'latest_fy_taxamount'}, inplace=True)

result = result.merge(demand_sum, on='consumercode', how='left')

print(result)

        consumercode earliest_fy latest_fy  latest_fy_taxamount
0      PT-601-010237     2016-17   2024-25               884.00
1      PT-601-018942     2014-15   2024-25               223.00
2      PT-601-019040     2014-15   2024-25               236.00
3      PT-601-019057     2014-15   2024-25              3521.03
4      PT-601-019216     2014-15   2025-26               656.00
...              ...         ...       ...                  ...
46328  PT-601-999774     2017-18   2024-25             29666.00
46329  PT-601-999830     2014-15   2025-26               320.00
46330  PT-601-999883     2014-15   2024-25               219.00
46331  PT-601-999888     2016-17   2024-25               398.00
46332  PT-601-999942     2020-21   2024-25                 0.00

[46333 rows x 4 columns]


In [24]:
#Calculating the tax amount(demand) of current year
target_fy = "2025-26"
current_fy_demand = joined_demand[joined_demand['fy'] == target_fy]

df_fy_sum = current_fy_demand.groupby('consumercode')['taxamount'].sum().reset_index()
df_fy_sum.rename(columns={'taxamount': 'current_fy_taxamount'}, inplace=True)

all_consumercodes = pd.DataFrame(joined_demand['consumercode'].unique(), columns=['consumercode'])

final = all_consumercodes.merge(df_fy_sum, on='consumercode', how='left')
final['current_fy_taxamount'] = final['current_fy_taxamount'].fillna(0)

result = result.merge(final, on='consumercode', how='left')
result['current_fy_taxamount'] = result['current_fy_taxamount'].fillna(0)

print(result)

        consumercode earliest_fy latest_fy  latest_fy_taxamount  \
0      PT-601-010237     2016-17   2024-25               884.00   
1      PT-601-018942     2014-15   2024-25               223.00   
2      PT-601-019040     2014-15   2024-25               236.00   
3      PT-601-019057     2014-15   2024-25              3521.03   
4      PT-601-019216     2014-15   2025-26               656.00   
...              ...         ...       ...                  ...   
46328  PT-601-999774     2017-18   2024-25             29666.00   
46329  PT-601-999830     2014-15   2025-26               320.00   
46330  PT-601-999883     2014-15   2024-25               219.00   
46331  PT-601-999888     2016-17   2024-25               398.00   
46332  PT-601-999942     2020-21   2024-25                 0.00   

       current_fy_taxamount  
0                       0.0  
1                       0.0  
2                       0.0  
3                       0.0  
4                     656.0  
...            

In [25]:
# Fiscal years before the current FY
arrear_demand = joined_demand[joined_demand['fy'] < "2025-26"]

agg = arrear_demand.groupby('consumercode').agg(
    arrear_taxamount_sum=('taxamount', 'sum'),
    arrear_collectionamount_sum=('collectionamount', 'sum')
).reset_index()

agg['arrear_years_demand_generated'] = (
    agg['arrear_taxamount_sum'] - agg['arrear_collectionamount_sum']
)

result = result.merge(
    agg[['consumercode', 'arrear_years_demand_generated']],
    on='consumercode', how='left'
)
result['arrear_years_demand_generated'] = result['arrear_years_demand_generated'].fillna(0)

print(result)

        consumercode earliest_fy latest_fy  latest_fy_taxamount  \
0      PT-601-010237     2016-17   2024-25               884.00   
1      PT-601-018942     2014-15   2024-25               223.00   
2      PT-601-019040     2014-15   2024-25               236.00   
3      PT-601-019057     2014-15   2024-25              3521.03   
4      PT-601-019216     2014-15   2025-26               656.00   
...              ...         ...       ...                  ...   
46328  PT-601-999774     2017-18   2024-25             29666.00   
46329  PT-601-999830     2014-15   2025-26               320.00   
46330  PT-601-999883     2014-15   2024-25               219.00   
46331  PT-601-999888     2016-17   2024-25               398.00   
46332  PT-601-999942     2020-21   2024-25                 0.00   

       current_fy_taxamount  arrear_years_demand_generated  
0                       0.0                        6133.00  
1                       0.0                        1195.00  
2           

In [26]:
relevant_codes = ['PT_TIME_PENALTY', 'PT_TIME_INTEREST']
filtered = joined_demand[joined_demand['taxheadcode'].isin(relevant_codes)]

grouped = (
    filtered.groupby(['consumercode', 'taxheadcode'])['taxamount']
    .sum()
    .unstack(fill_value=0)  # Puts taxheadcodes as columns, fills missing with 0
    .reset_index()
)

grouped = grouped[['consumercode', 'PT_TIME_PENALTY', 'PT_TIME_INTEREST']]
grouped = grouped.fillna(0)

result = result.merge(grouped, on='consumercode', how='left')
result[['PT_TIME_PENALTY', 'PT_TIME_INTEREST']] = result[['PT_TIME_PENALTY', 'PT_TIME_INTEREST']].fillna(0)

print(result)

        consumercode earliest_fy latest_fy  latest_fy_taxamount  \
0      PT-601-010237     2016-17   2024-25               884.00   
1      PT-601-018942     2014-15   2024-25               223.00   
2      PT-601-019040     2014-15   2024-25               236.00   
3      PT-601-019057     2014-15   2024-25              3521.03   
4      PT-601-019216     2014-15   2025-26               656.00   
...              ...         ...       ...                  ...   
46328  PT-601-999774     2017-18   2024-25             29666.00   
46329  PT-601-999830     2014-15   2025-26               320.00   
46330  PT-601-999883     2014-15   2024-25               219.00   
46331  PT-601-999888     2016-17   2024-25               398.00   
46332  PT-601-999942     2020-21   2024-25                 0.00   

       current_fy_taxamount  arrear_years_demand_generated  PT_TIME_PENALTY  \
0                       0.0                        6133.00           998.01   
1                       0.0          

In [27]:
unit_all_columns_df = pd.read_csv(
    '/home/prerna/Punjab/punjab-data-prod-analysis/abohar/eg_pt_unit.csv'
)
print(unit_all_columns_df)

                                          id   tenantid  \
0       fa2ff484-b15a-4d5d-b111-341b25d10ce8  pb.abohar   
1       381873d5-812a-4ce6-91e1-d1dae17bc9a0  pb.abohar   
2       0abf8511-dd72-4e69-a1ce-248efb5cd20a  pb.abohar   
3       d04c574c-3cf1-4ba1-9358-141bca64dbf6  pb.abohar   
4       3d599f49-ad0f-4b22-982e-539038e3b3c7  pb.abohar   
...                                      ...        ...   
103314  30bbdae9-161b-45be-960b-08f664dc6221  pb.abohar   
103315  5dbb9797-1472-4864-82a9-618912ccaee0  pb.abohar   
103316  9c4eba81-b233-4be5-bde4-cc4df85ada1d  pb.abohar   
103317  6ab06925-65d8-40a1-a710-12fbbca9ff87  pb.abohar   
103318  5fa2fb11-35fa-4381-b756-dbfee222d349  pb.abohar   

                                  propertyid  floorno         unittype  \
0       39951e13-3369-4e7b-9828-18f45878f49c        0  OTHERCOMMERCIAL   
1       39951e13-3369-4e7b-9828-18f45878f49c        0            false   
2       089637a9-98bd-4bf3-883b-8d47835e8311        0              Na

In [28]:

# read active properties & needed columns
property_df = pd.read_csv(
    '/home/prerna/Punjab/punjab-data-prod-analysis/abohar/eg_pt_property.csv',
    usecols=['id', 'propertyid', 'tenantid', 'createdtime', 'additionaldetails', 'ownershipcategory', 'status', 'usagecategory', 'propertytype']
)
property_df = property_df[property_df['status'] == 'ACTIVE'].copy()
# Merge properties and units by property id
merged = property_df.merge(unit_all_columns_df, left_on='id', right_on='propertyid', suffixes=('_property', '_unit'))

# def classify_ownership(occupancies):
#     unique_types = set(occupancies)
#     if 'RENTED' in unique_types:
#         if len(unique_types) > 1:
#             return 'Mixed'
#         else:
#             return 'Tenant'
#     if 'SELFOCCUPIED' in unique_types:
#         # If only SELFOCCUPIED or SELFOCCUPIED + UNOCCUPIED
#         return 'Owner'
#     if 'UNOCCUPIED' in unique_types:
#         return 'Owner'
#     # fallback
#     return None

def classify_ownership(occupancies):
    unique_types = set(occupancies)

    # tenant-like categories
    tenant_types = {"RENTED", "PG"}
    
    # if any tenant type present
    if unique_types & tenant_types:
        if len(unique_types - tenant_types) == 0:
            return "Tenant"
        else:
            return "Mixed"
    
    # owner-like categories
    if "SELFOCCUPIED" in unique_types or "UNOCCUPIED" in unique_types:
        return "Owner"
    
    # fallback
    return None


# Find occupancytypes per property id
ownership = (
    merged.groupby('propertyid_property')['occupancytype']
    .apply(classify_ownership)
    .reset_index()
    .rename(columns={'occupancytype': 'Owned_Rented'})
)

property_df = property_df.merge(ownership, left_on='propertyid', right_on = 'propertyid_property', how='left')

print(property_df)


                                         id      propertyid   tenantid  \
0      7eafe3da-ad69-441a-a982-899a80561e9b  PT-601-2154555  pb.abohar   
1      d4572a6e-e993-4245-a7cd-7ff83d324e51  PT-601-2154556  pb.abohar   
2      2fb0fe75-4e5b-43d2-b681-7b7b9e6251be  PT-601-2110140  pb.abohar   
3      86fa4b2d-3a27-4b7c-b5bb-c822cb1a6a80  PT-601-2154557  pb.abohar   
4      813b3aa9-c87b-4d55-994f-7e8d951f03c3  PT-601-2154559  pb.abohar   
...                                     ...             ...        ...   
43153  7860e170-a4e3-4fe6-8cb8-0acc67082996   PT-601-098718  pb.abohar   
43154  1b54e1a5-8cfb-409a-8f2e-0dd235677136  PT-601-1030985  pb.abohar   
43155  9da1008b-1f39-4479-9a96-169fc4cea80c   PT-601-774270  pb.abohar   
43156  cc5b2b79-4741-4734-bc0e-39ab322423a8   PT-601-845206  pb.abohar   
43157  bf65027f-5a50-48b0-a313-bc6826a18b20  PT-601-1154939  pb.abohar   

       status                 propertytype          ownershipcategory  \
0      ACTIVE  BUILTUP.INDEPENDENTPROP

In [29]:
def clean_numeric(series):
    # Replace 'NULL' strings and NaNs with 0, then convert to float
    return pd.to_numeric(series.replace('NULL', 0), errors='coerce').fillna(0)

merged['builtuparea'] = clean_numeric(merged['builtuparea'])
merged['plintharea'] = clean_numeric(merged['plintharea'])

area_summary = (
    merged.groupby('propertyid_property', as_index=False)
    .agg(
        total_builtup_area=('builtuparea', 'sum'),
        total_plinth_area=('plintharea', 'sum')
    )
)
# for col in ['total_builtup_area', 'total_plinth_area']:
#     if col in property_df.columns:
#         property_df = property_df.drop(col, axis=1)

property_df = property_df.merge(area_summary, left_on='propertyid' ,right_on='propertyid_property', how='left')
property_df['total_builtup_area'] = property_df['total_builtup_area'].fillna(0)
property_df['total_plinth_area'] = property_df['total_plinth_area'].fillna(0)

print(property_df)

                                         id      propertyid   tenantid  \
0      7eafe3da-ad69-441a-a982-899a80561e9b  PT-601-2154555  pb.abohar   
1      d4572a6e-e993-4245-a7cd-7ff83d324e51  PT-601-2154556  pb.abohar   
2      2fb0fe75-4e5b-43d2-b681-7b7b9e6251be  PT-601-2110140  pb.abohar   
3      86fa4b2d-3a27-4b7c-b5bb-c822cb1a6a80  PT-601-2154557  pb.abohar   
4      813b3aa9-c87b-4d55-994f-7e8d951f03c3  PT-601-2154559  pb.abohar   
...                                     ...             ...        ...   
43153  7860e170-a4e3-4fe6-8cb8-0acc67082996   PT-601-098718  pb.abohar   
43154  1b54e1a5-8cfb-409a-8f2e-0dd235677136  PT-601-1030985  pb.abohar   
43155  9da1008b-1f39-4479-9a96-169fc4cea80c   PT-601-774270  pb.abohar   
43156  cc5b2b79-4741-4734-bc0e-39ab322423a8   PT-601-845206  pb.abohar   
43157  bf65027f-5a50-48b0-a313-bc6826a18b20  PT-601-1154939  pb.abohar   

       status                 propertytype          ownershipcategory  \
0      ACTIVE  BUILTUP.INDEPENDENTPROP

In [30]:
property_result_merged = property_df.merge(
    result,
    left_on='propertyid',
    right_on='consumercode',
    how='left'
)

print(property_result_merged)

                                         id      propertyid   tenantid  \
0      7eafe3da-ad69-441a-a982-899a80561e9b  PT-601-2154555  pb.abohar   
1      d4572a6e-e993-4245-a7cd-7ff83d324e51  PT-601-2154556  pb.abohar   
2      2fb0fe75-4e5b-43d2-b681-7b7b9e6251be  PT-601-2110140  pb.abohar   
3      86fa4b2d-3a27-4b7c-b5bb-c822cb1a6a80  PT-601-2154557  pb.abohar   
4      813b3aa9-c87b-4d55-994f-7e8d951f03c3  PT-601-2154559  pb.abohar   
...                                     ...             ...        ...   
43153  7860e170-a4e3-4fe6-8cb8-0acc67082996   PT-601-098718  pb.abohar   
43154  1b54e1a5-8cfb-409a-8f2e-0dd235677136  PT-601-1030985  pb.abohar   
43155  9da1008b-1f39-4479-9a96-169fc4cea80c   PT-601-774270  pb.abohar   
43156  cc5b2b79-4741-4734-bc0e-39ab322423a8   PT-601-845206  pb.abohar   
43157  bf65027f-5a50-48b0-a313-bc6826a18b20  PT-601-1154939  pb.abohar   

       status                 propertytype          ownershipcategory  \
0      ACTIVE  BUILTUP.INDEPENDENTPROP

In [31]:
# Step 1: Load owner data
owner_df = pd.read_csv(
    '/home/prerna/Punjab/punjab-data-prod-analysis/abohar/eg_pt_owner.csv',
    usecols=['propertyid', 'ownertype', 'status']
)

owner_df = owner_df[owner_df['status'] == 'ACTIVE'].copy()

# Step 2: Determine exemption
owner_df['is_exempted'] = owner_df['ownertype'].isin(['WIDOW', 'FREEDOMFIGHTER'])
exempted_status = owner_df.groupby('propertyid')['is_exempted'].any().reset_index()
exempted_status['Is Property Exempted [Yes/ No]'] = exempted_status['is_exempted'].apply(lambda x: 'Yes' if x else 'No')
exempted_status = exempted_status.drop(columns=['is_exempted'])



In [32]:
# ✅ Step 1: Add exemption column to the merged result
property_result_merged = property_result_merged.merge(
    exempted_status[['propertyid', 'Is Property Exempted [Yes/ No]']],
    left_on='id',  # property_df.id == eg_pt_owner.propertyid
    right_on='propertyid',
    how='left'
)

property_result_merged['Is Property Exempted [Yes/ No]'] = property_result_merged['Is Property Exempted [Yes/ No]'].fillna('No')

# Drop duplicate merge key
if 'propertyid' in property_result_merged.columns:
    property_result_merged.drop(columns=['propertyid'], inplace=True)


# If 'propertyid_x' exists, use it as the correct property ID
if 'propertyid_x' in property_result_merged.columns:
    property_result_merged['propertyid'] = property_result_merged['propertyid_x']

# ✅ Step 2: Rename columns for the final report
report = property_result_merged.rename(columns={
    'tenantid': 'ULB',
    'propertyid': 'Property ID',
    'usagecategory': 'Usage',
    'createdtime': 'Date of Creation of the Property in the System',
    'additionaldetails': 'Date of Construction of the Property',
    'ownershipcategory': 'Ownership Type',
    'Is Property Exempted [Yes/ No]': 'Is Property Exempted [Yes/ No]',
    'Owned_Rented': 'Owned_Rented (Owner/ Rented/ Mixed)',
    'earliest_fy': 'Earliest Financial Year for which Demand was Generated',
    'latest_fy': 'Latest Financial Year for which Demand was Generated',
    'latest_fy_taxamount': 'Latest Demand Generated [in Rs.]',
    'current_fy_taxamount': 'Current Years Demand Generated [in Rs.]',
    'PT_TIME_PENALTY': 'Penalty',
    'PT_TIME_INTEREST': 'Interest',
    'arrear_years_demand_generated': 'Arrear Years Demand Generated [in Rs.]',
    'propertytype': 'Property Type[Building/ Vacant]',
    'total_builtup_area': 'Total Builtup Area [Sum of all units/ floors]',
    'total_plinth_area': 'Total Plinth Area [Sum of all units/ floors]'
}).copy()

# ✅ Step 3: Format ULB and date fields
def epoch_to_custom_date(epoch_ms):
    return datetime.fromtimestamp(epoch_ms / 1000).strftime('%d-%b-%Y') if pd.notna(epoch_ms) else None

def get_year_construction(val):
    if pd.isna(val): return None
    try: return json.loads(val).get('yearConstruction')
    except: return None

report['ULB'] = report['ULB'].str.split('.').str[1].str.capitalize()
report['Date of Creation of the Property in the System'] = report['Date of Creation of the Property in the System'].apply(epoch_to_custom_date)
report['Date of Construction of the Property'] = report['Date of Construction of the Property'].apply(get_year_construction)

# ✅ Step 4: Select final columns in required order
final_report = report[
    [
        'ULB',
        'Property ID',
        'Usage',
        'Date of Creation of the Property in the System',
        'Date of Construction of the Property',
        'Ownership Type',
        'Is Property Exempted [Yes/ No]',
        'Owned_Rented (Owner/ Rented/ Mixed)',
        'Earliest Financial Year for which Demand was Generated',
        'Latest Financial Year for which Demand was Generated',
        'Latest Demand Generated [in Rs.]',
        'Current Years Demand Generated [in Rs.]',
        'Penalty',
        'Interest',
        'Arrear Years Demand Generated [in Rs.]',
        'Property Type[Building/ Vacant]',
        'Total Builtup Area [Sum of all units/ floors]',
        'Total Plinth Area [Sum of all units/ floors]'
    ]
].copy()

# ✅ Step 5: Save the CSV
print("✅ Writing CSV")
final_report.to_csv('Punjab_Data_Analysis_abohar_final.csv', index=False)
print(f"🎉 Done! CSV generated with {len(final_report)} properties")


✅ Writing CSV
🎉 Done! CSV generated with 43158 properties
