In [1]:
import os
import pandas as pd
from datetime import datetime
import json
import gc

folder_path_demanddetails = '/home/prerna/Punjab/punjab-data-prod-analysis/jalandhar/output_demand_details/'

# read active properties & needed columns
property_df = pd.read_csv(
    '/home/prerna/Punjab/punjab-data-prod-analysis/jalandhar/eg_pt_property.csv',
    usecols=['id', 'propertyid', 'tenantid', 'createdtime', 'additionaldetails', 'ownershipcategory', 'status', 'usagecategory']
)
property_df = property_df[property_df['status'] == 'ACTIVE'].copy()

# read units
unit_df = pd.read_csv(
    '/home/prerna/Punjab/punjab-data-prod-analysis/jalandhar/eg_pt_unit.csv',
    usecols=['propertyid', 'occupancytype']
)



# read demand
demand_df = pd.read_csv(
    '/home/prerna/Punjab/punjab-data-prod-analysis/jalandhar/egbs_demand_v1.csv',
    dtype={"consumercode": str},
    low_memory=False,
    usecols=['id', 'taxperiodfrom', 'taxperiodto', 'consumercode', 'status']
)
demand_df = demand_df[demand_df['status'] == 'ACTIVE'].copy()


# read demand details (memory‑efficient, in chunks)
all_chunks = []
needed_cols = ['demandid', 'taxamount', 'collectionamount', 'taxheadcode']
for filename in os.listdir(folder_path_demanddetails):
    if filename.endswith('.csv'):
        file_path = os.path.join(folder_path_demanddetails, filename)
        print(f'Loading: {file_path}')
        chunk = pd.read_csv(file_path, usecols=needed_cols)
        all_chunks.append(chunk)
demand_details_df = pd.concat(all_chunks, ignore_index=True)
del all_chunks; gc.collect()

print("✅ Loaded data")

Loading: /home/prerna/Punjab/punjab-data-prod-analysis/jalandhar/output_demand_details/output_263.csv
Loading: /home/prerna/Punjab/punjab-data-prod-analysis/jalandhar/output_demand_details/output_4.csv
Loading: /home/prerna/Punjab/punjab-data-prod-analysis/jalandhar/output_demand_details/output_753.csv
Loading: /home/prerna/Punjab/punjab-data-prod-analysis/jalandhar/output_demand_details/output_479.csv
Loading: /home/prerna/Punjab/punjab-data-prod-analysis/jalandhar/output_demand_details/output_555.csv
Loading: /home/prerna/Punjab/punjab-data-prod-analysis/jalandhar/output_demand_details/output_672.csv
Loading: /home/prerna/Punjab/punjab-data-prod-analysis/jalandhar/output_demand_details/output_496.csv
Loading: /home/prerna/Punjab/punjab-data-prod-analysis/jalandhar/output_demand_details/output_718.csv
Loading: /home/prerna/Punjab/punjab-data-prod-analysis/jalandhar/output_demand_details/output_545.csv
Loading: /home/prerna/Punjab/punjab-data-prod-analysis/jalandhar/output_demand_detai

In [2]:
print(len(property_df))         # number of rows in properties
print(len(unit_df))             # number of rows in units
print(len(demand_df))   # number of rows in demand details
print(len(demand_details_df))   # number of rows in demand details

184070
464313
1199777
39269965


In [3]:
# join pt and unit
joined_pt_unit = property_df.merge(unit_df, left_on='id', right_on='propertyid', how='left', suffixes=('_property', '_unit'))
del property_df, unit_df; gc.collect()
joined_pt_unit.head()
print(joined_pt_unit['id'].nunique())

184070


In [4]:
joined_pt_unit.head()

Unnamed: 0,id,propertyid_property,tenantid,status,ownershipcategory,usagecategory,createdtime,additionaldetails,propertyid_unit,occupancytype
0,602a42b8-1f2a-40d0-8cd3-86f4b29fad56,PT-1013-460425,pb.jalandhar,ACTIVE,INDIVIDUAL.SINGLEOWNER,RESIDENTIAL,1564172078614,"{""inflammable"": false, ""heightAbove36Feet"": fa...",602a42b8-1f2a-40d0-8cd3-86f4b29fad56,SELFOCCUPIED
1,6358083c-6576-4fa7-ad1e-f8418d1b62dc,PT-1013-2352280,pb.jalandhar,ACTIVE,INDIVIDUAL.SINGLEOWNER,RESIDENTIAL,1753570331175,"{""yearConstruction"": null}",6358083c-6576-4fa7-ad1e-f8418d1b62dc,SELFOCCUPIED
2,6358083c-6576-4fa7-ad1e-f8418d1b62dc,PT-1013-2352280,pb.jalandhar,ACTIVE,INDIVIDUAL.SINGLEOWNER,RESIDENTIAL,1753570331175,"{""yearConstruction"": null}",6358083c-6576-4fa7-ad1e-f8418d1b62dc,SELFOCCUPIED
3,6358083c-6576-4fa7-ad1e-f8418d1b62dc,PT-1013-2352280,pb.jalandhar,ACTIVE,INDIVIDUAL.SINGLEOWNER,RESIDENTIAL,1753570331175,"{""yearConstruction"": null}",6358083c-6576-4fa7-ad1e-f8418d1b62dc,SELFOCCUPIED
4,14fabe93-2049-4689-9ca4-c0bc5a6d2f55,PT-1013-1151516,pb.jalandhar,ACTIVE,INDIVIDUAL.SINGLEOWNER,RESIDENTIAL,1724819675838,"{""previousPropertyUuid"": ""50e42310-423d-450e-b...",14fabe93-2049-4689-9ca4-c0bc5a6d2f55,SELFOCCUPIED


In [5]:
# join demand and demand details
joined_demand = demand_df.merge(demand_details_df, left_on='id', right_on='demandid', how='left', suffixes=('_demand', '_detail'))
print(joined_demand['id'].nunique())
del demand_details_df, demand_df; gc.collect()
joined_demand.head()

1199777


Unnamed: 0,id,consumercode,taxperiodfrom,taxperiodto,status,demandid,taxheadcode,taxamount,collectionamount
0,fcb1e7b4-0588-4acb-9840-ca3d77612b8c,PT-1013-467682,1364774400000,1396310399000,ACTIVE,fcb1e7b4-0588-4acb-9840-ca3d77612b8c,PT_FIRE_CESS,0.0,0.0
1,fcb1e7b4-0588-4acb-9840-ca3d77612b8c,PT-1013-467682,1364774400000,1396310399000,ACTIVE,fcb1e7b4-0588-4acb-9840-ca3d77612b8c,PT_ROUNDOFF,0.16,0.16
2,fcb1e7b4-0588-4acb-9840-ca3d77612b8c,PT-1013-467682,1364774400000,1396310399000,ACTIVE,fcb1e7b4-0588-4acb-9840-ca3d77612b8c,PT_OWNER_EXEMPTION,0.0,0.0
3,fcb1e7b4-0588-4acb-9840-ca3d77612b8c,PT-1013-467682,1364774400000,1396310399000,ACTIVE,fcb1e7b4-0588-4acb-9840-ca3d77612b8c,PT_TIME_PENALTY,0.0,0.0
4,fcb1e7b4-0588-4acb-9840-ca3d77612b8c,PT-1013-467682,1364774400000,1396310399000,ACTIVE,fcb1e7b4-0588-4acb-9840-ca3d77612b8c,PT_TIME_INTEREST,0.0,0.0


In [6]:
import pytz

# Correct: parse as datetime from milliseconds since epoch
joined_demand['taxperiodfrom'] = pd.to_datetime(joined_demand['taxperiodfrom'], unit='ms', utc=True)
joined_demand['taxperiodto'] = pd.to_datetime(joined_demand['taxperiodto'], unit='ms', utc=True)

# Convert to IST (Asia/Kolkata)
ist = pytz.timezone('Asia/Kolkata')
joined_demand['taxperiodfrom'] = joined_demand['taxperiodfrom'].dt.tz_convert(ist)
joined_demand['taxperiodto'] = joined_demand['taxperiodto'].dt.tz_convert(ist)

# Financial year calculation
def get_fy(date):
    if date.month >= 4:
        fy_start = date.year
        fy_end = date.year + 1
    else:
        fy_start = date.year - 1
        fy_end = date.year
    return f"{fy_start}-{str(fy_end)[-2:]}"

joined_demand['fy'] = joined_demand['taxperiodfrom'].apply(get_fy)

# Group by consumercode
result = joined_demand.groupby('consumercode')['fy'].agg(['min', 'max']).reset_index()
result.rename(columns={'min': 'earliest_fy', 'max': 'latest_fy'}, inplace=True)

print(result)

           consumercode earliest_fy latest_fy
0       PT-1013-1000009     2013-14   2024-25
1       PT-1013-1000020     2018-19   2024-25
2       PT-1013-1000024     2020-21   2024-25
3       PT-1013-1000029     2020-21   2024-25
4       PT-1013-1000037     2020-21   2024-25
...                 ...         ...       ...
187894   PT-1013-999879     2013-14   2024-25
187895   PT-1013-999958     2020-21   2024-25
187896   PT-1013-999960     2020-21   2024-25
187897   PT-1013-999961     2020-21   2024-25
187898   PT-1013-999975     2014-15   2024-25

[187899 rows x 3 columns]


In [7]:
# Merge latest_fy onto joined_demand by consumercode
joined = joined_demand.merge(
    result[['consumercode', 'latest_fy']],
    on='consumercode',
    how='left'
)

latest_demand = joined[joined['fy'] == joined['latest_fy']]

demand_sum = latest_demand.groupby('consumercode')['taxamount'].sum().reset_index()
demand_sum.rename(columns={'taxamount':'latest_fy_taxamount'}, inplace=True)

result = result.merge(demand_sum, on='consumercode', how='left')

print(result)

           consumercode earliest_fy latest_fy  latest_fy_taxamount
0       PT-1013-1000009     2013-14   2024-25                  0.0
1       PT-1013-1000020     2018-19   2024-25               5014.0
2       PT-1013-1000024     2020-21   2024-25               6318.0
3       PT-1013-1000029     2020-21   2024-25               8077.0
4       PT-1013-1000037     2020-21   2024-25                845.0
...                 ...         ...       ...                  ...
187894   PT-1013-999879     2013-14   2024-25               3502.0
187895   PT-1013-999958     2020-21   2024-25                828.0
187896   PT-1013-999960     2020-21   2024-25               2236.0
187897   PT-1013-999961     2020-21   2024-25                702.0
187898   PT-1013-999975     2014-15   2024-25                744.0

[187899 rows x 4 columns]


In [8]:
#Calculating the tax amount(demand) of current year
target_fy = "2025-26"
current_fy_demand = joined_demand[joined_demand['fy'] == target_fy]

df_fy_sum = current_fy_demand.groupby('consumercode')['taxamount'].sum().reset_index()
df_fy_sum.rename(columns={'taxamount': 'current_fy_taxamount'}, inplace=True)

all_consumercodes = pd.DataFrame(joined_demand['consumercode'].unique(), columns=['consumercode'])

final = all_consumercodes.merge(df_fy_sum, on='consumercode', how='left')
final['current_fy_taxamount'] = final['current_fy_taxamount'].fillna(0)

result = result.merge(final, on='consumercode', how='left')
result['current_fy_taxamount'] = result['current_fy_taxamount'].fillna(0)

print(result)

           consumercode earliest_fy latest_fy  latest_fy_taxamount  \
0       PT-1013-1000009     2013-14   2024-25                  0.0   
1       PT-1013-1000020     2018-19   2024-25               5014.0   
2       PT-1013-1000024     2020-21   2024-25               6318.0   
3       PT-1013-1000029     2020-21   2024-25               8077.0   
4       PT-1013-1000037     2020-21   2024-25                845.0   
...                 ...         ...       ...                  ...   
187894   PT-1013-999879     2013-14   2024-25               3502.0   
187895   PT-1013-999958     2020-21   2024-25                828.0   
187896   PT-1013-999960     2020-21   2024-25               2236.0   
187897   PT-1013-999961     2020-21   2024-25                702.0   
187898   PT-1013-999975     2014-15   2024-25                744.0   

        current_fy_taxamount  
0                        0.0  
1                        0.0  
2                        0.0  
3                        0.0  
4   

In [9]:
# Fiscal years before the current FY
arrear_demand = joined_demand[joined_demand['fy'] < "2025-26"]

agg = arrear_demand.groupby('consumercode').agg(
    arrear_taxamount_sum=('taxamount', 'sum'),
    arrear_collectionamount_sum=('collectionamount', 'sum')
).reset_index()

agg['arrear_years_demand_generated'] = (
    agg['arrear_taxamount_sum'] - agg['arrear_collectionamount_sum']
)

result = result.merge(
    agg[['consumercode', 'arrear_years_demand_generated']],
    on='consumercode', how='left'
)
result['arrear_years_demand_generated'] = result['arrear_years_demand_generated'].fillna(0)

print(result)

           consumercode earliest_fy latest_fy  latest_fy_taxamount  \
0       PT-1013-1000009     2013-14   2024-25                  0.0   
1       PT-1013-1000020     2018-19   2024-25               5014.0   
2       PT-1013-1000024     2020-21   2024-25               6318.0   
3       PT-1013-1000029     2020-21   2024-25               8077.0   
4       PT-1013-1000037     2020-21   2024-25                845.0   
...                 ...         ...       ...                  ...   
187894   PT-1013-999879     2013-14   2024-25               3502.0   
187895   PT-1013-999958     2020-21   2024-25                828.0   
187896   PT-1013-999960     2020-21   2024-25               2236.0   
187897   PT-1013-999961     2020-21   2024-25                702.0   
187898   PT-1013-999975     2014-15   2024-25                744.0   

        current_fy_taxamount  arrear_years_demand_generated  
0                        0.0                            0.0  
1                        0.0       

In [10]:
relevant_codes = ['PT_TIME_PENALTY', 'PT_TIME_INTEREST']
filtered = joined_demand[joined_demand['taxheadcode'].isin(relevant_codes)]

grouped = (
    filtered.groupby(['consumercode', 'taxheadcode'])['taxamount']
    .sum()
    .unstack(fill_value=0)  # Puts taxheadcodes as columns, fills missing with 0
    .reset_index()
)

grouped = grouped[['consumercode', 'PT_TIME_PENALTY', 'PT_TIME_INTEREST']]
grouped = grouped.fillna(0)

result = result.merge(grouped, on='consumercode', how='left')
result[['PT_TIME_PENALTY', 'PT_TIME_INTEREST']] = result[['PT_TIME_PENALTY', 'PT_TIME_INTEREST']].fillna(0)

print(result)

           consumercode earliest_fy latest_fy  latest_fy_taxamount  \
0       PT-1013-1000009     2013-14   2024-25                  0.0   
1       PT-1013-1000020     2018-19   2024-25               5014.0   
2       PT-1013-1000024     2020-21   2024-25               6318.0   
3       PT-1013-1000029     2020-21   2024-25               8077.0   
4       PT-1013-1000037     2020-21   2024-25                845.0   
...                 ...         ...       ...                  ...   
187894   PT-1013-999879     2013-14   2024-25               3502.0   
187895   PT-1013-999958     2020-21   2024-25                828.0   
187896   PT-1013-999960     2020-21   2024-25               2236.0   
187897   PT-1013-999961     2020-21   2024-25                702.0   
187898   PT-1013-999975     2014-15   2024-25                744.0   

        current_fy_taxamount  arrear_years_demand_generated  PT_TIME_PENALTY  \
0                        0.0                            0.0             0.00   

In [11]:
unit_all_columns_df = pd.read_csv(
    '/home/prerna/Punjab/punjab-data-prod-analysis/jalandhar/eg_pt_unit.csv'
)
print(unit_all_columns_df)

                                          id      tenantid  \
0       76524dc3-acbf-49b1-a354-347b03612ec3  pb.jalandhar   
1       c1deb0ac-c74f-469a-9c97-ddc884cb3826  pb.jalandhar   
2       2df8ccc9-3775-43ac-8088-8b721c2ea923  pb.jalandhar   
3       b9c9687b-23a2-4d1c-b0c4-f0d3e17754e3  pb.jalandhar   
4       01cf0ae9-ce6a-45fd-94a0-defa9e946659  pb.jalandhar   
...                                      ...           ...   
464308  1ca6bf11-37ab-4a3b-b0ca-840a41a5d27a  pb.jalandhar   
464309  966f4f67-5e6c-47d7-a69d-49c2b2312646  pb.jalandhar   
464310  cad9e561-7c86-41d6-97df-16778a8a5178  pb.jalandhar   
464311  bebcc775-2055-4145-9690-4ea85a66de7a  pb.jalandhar   
464312  623f03f5-3961-4f7f-8de8-b05c2b103e5e  pb.jalandhar   

                                  propertyid  floorno unittype  \
0       6358083c-6576-4fa7-ad1e-f8418d1b62dc        2      NaN   
1       6358083c-6576-4fa7-ad1e-f8418d1b62dc        1      NaN   
2       6358083c-6576-4fa7-ad1e-f8418d1b62dc        0    

In [12]:

# read active properties & needed columns
property_df = pd.read_csv(
    '/home/prerna/Punjab/punjab-data-prod-analysis/jalandhar/eg_pt_property.csv',
    usecols=['id', 'propertyid', 'tenantid', 'createdtime', 'additionaldetails', 'ownershipcategory', 'status', 'usagecategory', 'propertytype']
)
property_df = property_df[property_df['status'] == 'ACTIVE'].copy()
# Merge properties and units by property id
merged = property_df.merge(unit_all_columns_df, left_on='id', right_on='propertyid', suffixes=('_property', '_unit'))

# def classify_ownership(occupancies):
#     unique_types = set(occupancies)
#     if 'RENTED' in unique_types:
#         if len(unique_types) > 1:
#             return 'Mixed'
#         else:
#             return 'Tenant'
#     if 'SELFOCCUPIED' in unique_types:
#         # If only SELFOCCUPIED or SELFOCCUPIED + UNOCCUPIED
#         return 'Owner'
#     if 'UNOCCUPIED' in unique_types:
#         return 'Owner'
#     # fallback
#     return None


def classify_ownership(occupancies):
    unique_types = set(occupancies)

    # tenant-like categories
    tenant_types = {"RENTED", "PG"}
    
    # if any tenant type present
    if unique_types & tenant_types:
        if len(unique_types - tenant_types) == 0:
            return "Tenant"
        else:
            return "Mixed"
    
    # owner-like categories
    if "SELFOCCUPIED" in unique_types or "UNOCCUPIED" in unique_types:
        return "Owner"
    
    # fallback
    return None

# Find occupancytypes per property id
ownership = (
    merged.groupby('propertyid_property')['occupancytype']
    .apply(classify_ownership)
    .reset_index()
    .rename(columns={'occupancytype': 'Owned_Rented'})
)

property_df = property_df.merge(ownership, left_on='propertyid', right_on = 'propertyid_property', how='left')

print(property_df)


                                          id       propertyid      tenantid  \
0       602a42b8-1f2a-40d0-8cd3-86f4b29fad56   PT-1013-460425  pb.jalandhar   
1       6358083c-6576-4fa7-ad1e-f8418d1b62dc  PT-1013-2352280  pb.jalandhar   
2       14fabe93-2049-4689-9ca4-c0bc5a6d2f55  PT-1013-1151516  pb.jalandhar   
3       db8bcfb8-fa0a-4f42-aec4-77e4bf1d6619  PT-1013-1446443  pb.jalandhar   
4       f44e91ab-60c4-40fd-86c2-16fcced74c26   PT-1013-914284  pb.jalandhar   
...                                      ...              ...           ...   
184065  5ca379a7-e05b-4899-9276-d29a0c647338   PT-1013-540818  pb.jalandhar   
184066  3cddb513-9c7c-4ba8-bca5-f0cdaf2a942e   PT-1013-540806  pb.jalandhar   
184067  442e74c5-a5b2-443e-86a1-547722470459   PT-1013-707131  pb.jalandhar   
184068  d897a493-83a3-4db3-b322-0785ad2d3df2   PT-1013-987939  pb.jalandhar   
184069  8e520ed2-8bb1-4dc2-bfc4-a0d5b84b8ba8  PT-1013-1774030  pb.jalandhar   

        status                 propertytype        

In [13]:
def clean_numeric(series):
    # Replace 'NULL' strings and NaNs with 0, then convert to float
    return pd.to_numeric(series.replace('NULL', 0), errors='coerce').fillna(0)

merged['builtuparea'] = clean_numeric(merged['builtuparea'])
merged['plintharea'] = clean_numeric(merged['plintharea'])

area_summary = (
    merged.groupby('propertyid_property', as_index=False)
    .agg(
        total_builtup_area=('builtuparea', 'sum'),
        total_plinth_area=('plintharea', 'sum')
    )
)
# for col in ['total_builtup_area', 'total_plinth_area']:
#     if col in property_df.columns:
#         property_df = property_df.drop(col, axis=1)

property_df = property_df.merge(area_summary, left_on='propertyid' ,right_on='propertyid_property', how='left')
property_df['total_builtup_area'] = property_df['total_builtup_area'].fillna(0)
property_df['total_plinth_area'] = property_df['total_plinth_area'].fillna(0)

print(property_df)

                                          id       propertyid      tenantid  \
0       602a42b8-1f2a-40d0-8cd3-86f4b29fad56   PT-1013-460425  pb.jalandhar   
1       6358083c-6576-4fa7-ad1e-f8418d1b62dc  PT-1013-2352280  pb.jalandhar   
2       14fabe93-2049-4689-9ca4-c0bc5a6d2f55  PT-1013-1151516  pb.jalandhar   
3       db8bcfb8-fa0a-4f42-aec4-77e4bf1d6619  PT-1013-1446443  pb.jalandhar   
4       f44e91ab-60c4-40fd-86c2-16fcced74c26   PT-1013-914284  pb.jalandhar   
...                                      ...              ...           ...   
184065  5ca379a7-e05b-4899-9276-d29a0c647338   PT-1013-540818  pb.jalandhar   
184066  3cddb513-9c7c-4ba8-bca5-f0cdaf2a942e   PT-1013-540806  pb.jalandhar   
184067  442e74c5-a5b2-443e-86a1-547722470459   PT-1013-707131  pb.jalandhar   
184068  d897a493-83a3-4db3-b322-0785ad2d3df2   PT-1013-987939  pb.jalandhar   
184069  8e520ed2-8bb1-4dc2-bfc4-a0d5b84b8ba8  PT-1013-1774030  pb.jalandhar   

        status                 propertytype        

In [14]:
property_result_merged = property_df.merge(
    result,
    left_on='propertyid',
    right_on='consumercode',
    how='left'
)

print(property_result_merged)

                                          id       propertyid      tenantid  \
0       602a42b8-1f2a-40d0-8cd3-86f4b29fad56   PT-1013-460425  pb.jalandhar   
1       6358083c-6576-4fa7-ad1e-f8418d1b62dc  PT-1013-2352280  pb.jalandhar   
2       14fabe93-2049-4689-9ca4-c0bc5a6d2f55  PT-1013-1151516  pb.jalandhar   
3       db8bcfb8-fa0a-4f42-aec4-77e4bf1d6619  PT-1013-1446443  pb.jalandhar   
4       f44e91ab-60c4-40fd-86c2-16fcced74c26   PT-1013-914284  pb.jalandhar   
...                                      ...              ...           ...   
184065  5ca379a7-e05b-4899-9276-d29a0c647338   PT-1013-540818  pb.jalandhar   
184066  3cddb513-9c7c-4ba8-bca5-f0cdaf2a942e   PT-1013-540806  pb.jalandhar   
184067  442e74c5-a5b2-443e-86a1-547722470459   PT-1013-707131  pb.jalandhar   
184068  d897a493-83a3-4db3-b322-0785ad2d3df2   PT-1013-987939  pb.jalandhar   
184069  8e520ed2-8bb1-4dc2-bfc4-a0d5b84b8ba8  PT-1013-1774030  pb.jalandhar   

        status                 propertytype        

In [15]:
# Step 1: Load owner data
owner_df = pd.read_csv(
    '/home/prerna/Punjab/punjab-data-prod-analysis/jalandhar/eg_pt_owner.csv',
    usecols=['propertyid', 'ownertype', 'status']
)

owner_df = owner_df[owner_df['status'] == 'ACTIVE'].copy()

# Step 2: Determine exemption
owner_df['is_exempted'] = owner_df['ownertype'].isin(['WIDOW', 'FREEDOMFIGHTER'])
exempted_status = owner_df.groupby('propertyid')['is_exempted'].any().reset_index()
exempted_status['Is Property Exempted [Yes/ No]'] = exempted_status['is_exempted'].apply(lambda x: 'Yes' if x else 'No')
exempted_status = exempted_status.drop(columns=['is_exempted'])



In [16]:
# ✅ Step 1: Add exemption column to the merged result
property_result_merged = property_result_merged.merge(
    exempted_status[['propertyid', 'Is Property Exempted [Yes/ No]']],
    left_on='id',  # property_df.id == eg_pt_owner.propertyid
    right_on='propertyid',
    how='left'
)

property_result_merged['Is Property Exempted [Yes/ No]'] = property_result_merged['Is Property Exempted [Yes/ No]'].fillna('No')

# Drop duplicate merge key
if 'propertyid' in property_result_merged.columns:
    property_result_merged.drop(columns=['propertyid'], inplace=True)


# If 'propertyid_x' exists, use it as the correct property ID
if 'propertyid_x' in property_result_merged.columns:
    property_result_merged['propertyid'] = property_result_merged['propertyid_x']

# ✅ Step 2: Rename columns for the final report
report = property_result_merged.rename(columns={
    'tenantid': 'ULB',
    'propertyid': 'Property ID',
    'usagecategory': 'Usage',
    'createdtime': 'Date of Creation of the Property in the System',
    'additionaldetails': 'Date of Construction of the Property',
    'ownershipcategory': 'Ownership Type',
    'Is Property Exempted [Yes/ No]': 'Is Property Exempted [Yes/ No]',
    'Owned_Rented': 'Owned_Rented (Owner/ Rented/ Mixed)',
    'earliest_fy': 'Earliest Financial Year for which Demand was Generated',
    'latest_fy': 'Latest Financial Year for which Demand was Generated',
    'latest_fy_taxamount': 'Latest Demand Generated [in Rs.]',
    'current_fy_taxamount': 'Current Years Demand Generated [in Rs.]',
    'PT_TIME_PENALTY': 'Penalty',
    'PT_TIME_INTEREST': 'Interest',
    'arrear_years_demand_generated': 'Arrear Years Demand Generated [in Rs.]',
    'propertytype': 'Property Type[Building/ Vacant]',
    'total_builtup_area': 'Total Builtup Area [Sum of all units/ floors]',
    'total_plinth_area': 'Total Plinth Area [Sum of all units/ floors]'
}).copy()

# ✅ Step 3: Format ULB and date fields
def epoch_to_custom_date(epoch_ms):
    return datetime.fromtimestamp(epoch_ms / 1000).strftime('%d-%b-%Y') if pd.notna(epoch_ms) else None

def get_year_construction(val):
    if pd.isna(val): return None
    try: return json.loads(val).get('yearConstruction')
    except: return None

report['ULB'] = report['ULB'].str.split('.').str[1].str.capitalize()
report['Date of Creation of the Property in the System'] = report['Date of Creation of the Property in the System'].apply(epoch_to_custom_date)
report['Date of Construction of the Property'] = report['Date of Construction of the Property'].apply(get_year_construction)

# ✅ Step 4: Select final columns in required order
final_report = report[
    [
        'ULB',
        'Property ID',
        'Usage',
        'Date of Creation of the Property in the System',
        'Date of Construction of the Property',
        'Ownership Type',
        'Is Property Exempted [Yes/ No]',
        'Owned_Rented (Owner/ Rented/ Mixed)',
        'Earliest Financial Year for which Demand was Generated',
        'Latest Financial Year for which Demand was Generated',
        'Latest Demand Generated [in Rs.]',
        'Current Years Demand Generated [in Rs.]',
        'Penalty',
        'Interest',
        'Arrear Years Demand Generated [in Rs.]',
        'Property Type[Building/ Vacant]',
        'Total Builtup Area [Sum of all units/ floors]',
        'Total Plinth Area [Sum of all units/ floors]'
    ]
].copy()

# ✅ Step 5: Save the CSV
print("✅ Writing CSV")
final_report.to_csv('Punjab_Data_Analysis_jalandhar_final.csv', index=False)
print(f"🎉 Done! CSV generated with {len(final_report)} properties")


✅ Writing CSV
🎉 Done! CSV generated with 184070 properties
