In [None]:
import os
import pandas as pd
from datetime import datetime
import json
import gc


folder_path_demand = '/home/admin1/Downloads/punjab-data-prod-analysis/zirakpur/output_demand/'
folder_path_demanddetails = '/home/admin1/Downloads/punjab-data-prod-analysis/zirakpur/output_demand_details/'

# read active properties & needed columns
property_df = pd.read_csv(
    '/home/admin1/Downloads/punjab-data-prod-analysis/zirakpur/eg_pt_property_zirakpur.csv',
    usecols=['id', 'propertyid', 'tenantid', 'createdtime', 'additionaldetails', 'ownershipcategory', 'status', 'usagecategory']
)
property_df = property_df[property_df['status'] == 'ACTIVE'].copy()

# read units
unit_df = pd.read_csv(
    '/home/admin1/Downloads/punjab-data-prod-analysis/zirakpur/eg_pt_unit_zirakpur.csv',
    usecols=['propertyid', 'occupancytype']
)



# read demand
# demand_df = pd.read_csv(
#     '/home/admin1/Downloads/punjab-data-prod-analysis/adampur/egbs_demand_v1.csv',
#     dtype={"consumercode": str},
#     low_memory=False,
#     usecols=['id', 'taxperiodfrom', 'taxperiodto', 'consumercode', 'status']
# )
# demand_df = demand_df[demand_df['status'] == 'ACTIVE'].copy()

# read demand (memory‑efficient, in chunks)
all_chunks = []
needed_cols = ['id', 'taxperiodfrom', 'taxperiodto', 'consumercode', 'status', 'businessservice']
for filename in os.listdir(folder_path_demand):
    if filename.endswith('.csv'):
        file_path = os.path.join(folder_path_demand, filename)
        print(f'Loading: {file_path}')
        chunk = pd.read_csv(file_path, usecols=needed_cols)
        all_chunks.append(chunk)
demand_df = pd.concat(all_chunks, ignore_index=True)
demand_df = demand_df[demand_df['status'] == 'ACTIVE'].copy()
demand_df = demand_df[demand_df['businessservice'] == 'PT'].copy()
del all_chunks; gc.collect()

# read demand details (memory‑efficient, in chunks)
all_chunks = []
needed_cols = ['demandid', 'taxamount', 'collectionamount', 'taxheadcode']
for filename in os.listdir(folder_path_demanddetails):
    if filename.endswith('.csv'):
        file_path = os.path.join(folder_path_demanddetails, filename)
        print(f'Loading: {file_path}')
        chunk = pd.read_csv(file_path, usecols=needed_cols)
        all_chunks.append(chunk)
demand_details_df = pd.concat(all_chunks, ignore_index=True)
del all_chunks; gc.collect()

print("✅ Loaded data")

Loading: /home/admin1/Downloads/punjab-data-prod-analysis/zirakpur/output_demand/output_12.csv
Loading: /home/admin1/Downloads/punjab-data-prod-analysis/zirakpur/output_demand/output_5.csv
Loading: /home/admin1/Downloads/punjab-data-prod-analysis/zirakpur/output_demand/output_1.csv
Loading: /home/admin1/Downloads/punjab-data-prod-analysis/zirakpur/output_demand/output_10.csv
Loading: /home/admin1/Downloads/punjab-data-prod-analysis/zirakpur/output_demand/output_7.csv
Loading: /home/admin1/Downloads/punjab-data-prod-analysis/zirakpur/output_demand/output_14.csv
Loading: /home/admin1/Downloads/punjab-data-prod-analysis/zirakpur/output_demand/output_4.csv
Loading: /home/admin1/Downloads/punjab-data-prod-analysis/zirakpur/output_demand/output_6.csv
Loading: /home/admin1/Downloads/punjab-data-prod-analysis/zirakpur/output_demand/output_13.csv
Loading: /home/admin1/Downloads/punjab-data-prod-analysis/zirakpur/output_demand/output_9.csv
Loading: /home/admin1/Downloads/punjab-data-prod-analysi

In [6]:
print(len(property_df))         # number of rows in properties
print(len(unit_df))             # number of rows in units
print(len(demand_df))   # number of rows in demand details
print(len(demand_details_df))   # number of rows in demand details

51136
235349
433244
11651880


In [7]:
# join pt and unit
joined_pt_unit = property_df.merge(unit_df, left_on='id', right_on='propertyid', how='left', suffixes=('_property', '_unit'))
del property_df, unit_df; gc.collect()
joined_pt_unit.head()
print(joined_pt_unit['id'].nunique())

51136


In [8]:
joined_pt_unit.head()

Unnamed: 0,id,propertyid_property,tenantid,status,ownershipcategory,usagecategory,createdtime,additionaldetails,propertyid_unit,occupancytype
0,98421215-340b-4327-8e1a-88cb09ade91f,PT-1507-2007885,pb.zirakpur,ACTIVE,INDIVIDUAL.SINGLEOWNER,RESIDENTIAL,1724819673390,"{""vasikaNo"": ""3042"", ""vasikaDate"": ""2010-06-03...",98421215-340b-4327-8e1a-88cb09ade91f,SELFOCCUPIED
1,e57cf274-4d5a-4064-9bdc-668e20afa4f8,PT-1507-1614804,pb.zirakpur,ACTIVE,INDIVIDUAL.SINGLEOWNER,RESIDENTIAL,1724819936757,"{""vasikaNo"": ""2494"", ""vasikaDate"": ""2015-03-04...",e57cf274-4d5a-4064-9bdc-668e20afa4f8,SELFOCCUPIED
2,e57cf274-4d5a-4064-9bdc-668e20afa4f8,PT-1507-1614804,pb.zirakpur,ACTIVE,INDIVIDUAL.SINGLEOWNER,RESIDENTIAL,1724819936757,"{""vasikaNo"": ""2494"", ""vasikaDate"": ""2015-03-04...",e57cf274-4d5a-4064-9bdc-668e20afa4f8,SELFOCCUPIED
3,c1243464-d8e8-49bc-ac8a-ebd7b31de584,PT-1507-2110139,pb.zirakpur,ACTIVE,INDIVIDUAL.SINGLEOWNER,MIXED,1748845263483,"{""vasikaNo"": ""2020-21/13/1/11858"", ""vasikaDate...",c1243464-d8e8-49bc-ac8a-ebd7b31de584,SELFOCCUPIED
4,c1243464-d8e8-49bc-ac8a-ebd7b31de584,PT-1507-2110139,pb.zirakpur,ACTIVE,INDIVIDUAL.SINGLEOWNER,MIXED,1748845263483,"{""vasikaNo"": ""2020-21/13/1/11858"", ""vasikaDate...",c1243464-d8e8-49bc-ac8a-ebd7b31de584,SELFOCCUPIED


In [9]:
# join demand and demand details
joined_demand = demand_df.merge(demand_details_df, left_on='id', right_on='demandid', how='left', suffixes=('_demand', '_detail'))
print(joined_demand['id'].nunique())
del demand_details_df, demand_df; gc.collect()
joined_demand.head()

433244


Unnamed: 0,id,consumercode,businessservice,taxperiodfrom,taxperiodto,status,demandid,taxheadcode,taxamount,collectionamount
0,886dd46d-2795-4eaa-bf3d-506500563758,PT-1507-1524867,PT,1554076800000,1585699199000,ACTIVE,886dd46d-2795-4eaa-bf3d-506500563758,PT_TAX,390.0,390.0
1,886dd46d-2795-4eaa-bf3d-506500563758,PT-1507-1524867,PT,1554076800000,1585699199000,ACTIVE,886dd46d-2795-4eaa-bf3d-506500563758,PT_UNIT_USAGE_EXEMPTION,0.0,0.0
2,886dd46d-2795-4eaa-bf3d-506500563758,PT-1507-1524867,PT,1554076800000,1585699199000,ACTIVE,886dd46d-2795-4eaa-bf3d-506500563758,PT_OWNER_EXEMPTION,0.0,0.0
3,886dd46d-2795-4eaa-bf3d-506500563758,PT-1507-1524867,PT,1554076800000,1585699199000,ACTIVE,886dd46d-2795-4eaa-bf3d-506500563758,PT_FIRE_CESS,0.0,0.0
4,886dd46d-2795-4eaa-bf3d-506500563758,PT-1507-1524867,PT,1554076800000,1585699199000,ACTIVE,886dd46d-2795-4eaa-bf3d-506500563758,PT_CANCER_CESS,7.8,7.8


In [10]:
import pytz

# Correct: parse as datetime from milliseconds since epoch
joined_demand['taxperiodfrom'] = pd.to_datetime(joined_demand['taxperiodfrom'], unit='ms', utc=True)
joined_demand['taxperiodto'] = pd.to_datetime(joined_demand['taxperiodto'], unit='ms', utc=True)

# Convert to IST (Asia/Kolkata)
ist = pytz.timezone('Asia/Kolkata')
joined_demand['taxperiodfrom'] = joined_demand['taxperiodfrom'].dt.tz_convert(ist)
joined_demand['taxperiodto'] = joined_demand['taxperiodto'].dt.tz_convert(ist)

# Financial year calculation
def get_fy(date):
    if date.month >= 4:
        fy_start = date.year
        fy_end = date.year + 1
    else:
        fy_start = date.year - 1
        fy_end = date.year
    return f"{fy_start}-{str(fy_end)[-2:]}"

joined_demand['fy'] = joined_demand['taxperiodfrom'].apply(get_fy)

# Group by consumercode
result = joined_demand.groupby('consumercode')['fy'].agg(['min', 'max']).reset_index()
result.rename(columns={'min': 'earliest_fy', 'max': 'latest_fy'}, inplace=True)

print(result)

         consumercode earliest_fy latest_fy
0      PT-1507-000020     2019-20   2024-25
1      PT-1507-000021     2017-18   2024-25
2      PT-1507-000022     2019-20   2024-25
3      PT-1507-000023     2018-19   2024-25
4      PT-1507-000026     2014-15   2024-25
...               ...         ...       ...
51268  PT-1507-999965     2019-20   2025-26
51269  PT-1507-999968     2019-20   2024-25
51270  PT-1507-999969     2019-20   2024-25
51271  PT-1507-999971     2020-21   2024-25
51272  PT-1507-999972     2020-21   2024-25

[51273 rows x 3 columns]


In [11]:
# Merge latest_fy onto joined_demand by consumercode
joined = joined_demand.merge(
    result[['consumercode', 'latest_fy']],
    on='consumercode',
    how='left'
)

latest_demand = joined[joined['fy'] == joined['latest_fy']]

demand_sum = latest_demand.groupby('consumercode')['taxamount'].sum().reset_index()
demand_sum.rename(columns={'taxamount':'latest_fy_taxamount'}, inplace=True)

result = result.merge(demand_sum, on='consumercode', how='left')

print(result)

         consumercode earliest_fy latest_fy  latest_fy_taxamount
0      PT-1507-000020     2019-20   2024-25               341.00
1      PT-1507-000021     2017-18   2024-25               279.00
2      PT-1507-000022     2019-20   2024-25               279.00
3      PT-1507-000023     2018-19   2024-25               279.00
4      PT-1507-000026     2014-15   2024-25                 0.00
...               ...         ...       ...                  ...
51268  PT-1507-999965     2019-20   2025-26               393.00
51269  PT-1507-999968     2019-20   2024-25               332.16
51270  PT-1507-999969     2019-20   2024-25               401.00
51271  PT-1507-999971     2020-21   2024-25               493.00
51272  PT-1507-999972     2020-21   2024-25               493.00

[51273 rows x 4 columns]


In [12]:
#Calculating the tax amount(demand) of current year
target_fy = "2025-26"
current_fy_demand = joined_demand[joined_demand['fy'] == target_fy]

df_fy_sum = current_fy_demand.groupby('consumercode')['taxamount'].sum().reset_index()
df_fy_sum.rename(columns={'taxamount': 'current_fy_taxamount'}, inplace=True)

all_consumercodes = pd.DataFrame(joined_demand['consumercode'].unique(), columns=['consumercode'])

final = all_consumercodes.merge(df_fy_sum, on='consumercode', how='left')
final['current_fy_taxamount'] = final['current_fy_taxamount'].fillna(0)

result = result.merge(final, on='consumercode', how='left')
result['current_fy_taxamount'] = result['current_fy_taxamount'].fillna(0)

print(result)

         consumercode earliest_fy latest_fy  latest_fy_taxamount  \
0      PT-1507-000020     2019-20   2024-25               341.00   
1      PT-1507-000021     2017-18   2024-25               279.00   
2      PT-1507-000022     2019-20   2024-25               279.00   
3      PT-1507-000023     2018-19   2024-25               279.00   
4      PT-1507-000026     2014-15   2024-25                 0.00   
...               ...         ...       ...                  ...   
51268  PT-1507-999965     2019-20   2025-26               393.00   
51269  PT-1507-999968     2019-20   2024-25               332.16   
51270  PT-1507-999969     2019-20   2024-25               401.00   
51271  PT-1507-999971     2020-21   2024-25               493.00   
51272  PT-1507-999972     2020-21   2024-25               493.00   

       current_fy_taxamount  
0                       0.0  
1                       0.0  
2                       0.0  
3                       0.0  
4                       0.0  
...

In [13]:
# Fiscal years before the current FY
arrear_demand = joined_demand[joined_demand['fy'] < "2025-26"]

agg = arrear_demand.groupby('consumercode').agg(
    arrear_taxamount_sum=('taxamount', 'sum'),
    arrear_collectionamount_sum=('collectionamount', 'sum')
).reset_index()

agg['arrear_years_demand_generated'] = (
    agg['arrear_taxamount_sum'] - agg['arrear_collectionamount_sum']
)

result = result.merge(
    agg[['consumercode', 'arrear_years_demand_generated']],
    on='consumercode', how='left'
)
result['arrear_years_demand_generated'] = result['arrear_years_demand_generated'].fillna(0)

print(result)

         consumercode earliest_fy latest_fy  latest_fy_taxamount  \
0      PT-1507-000020     2019-20   2024-25               341.00   
1      PT-1507-000021     2017-18   2024-25               279.00   
2      PT-1507-000022     2019-20   2024-25               279.00   
3      PT-1507-000023     2018-19   2024-25               279.00   
4      PT-1507-000026     2014-15   2024-25                 0.00   
...               ...         ...       ...                  ...   
51268  PT-1507-999965     2019-20   2025-26               393.00   
51269  PT-1507-999968     2019-20   2024-25               332.16   
51270  PT-1507-999969     2019-20   2024-25               401.00   
51271  PT-1507-999971     2020-21   2024-25               493.00   
51272  PT-1507-999972     2020-21   2024-25               493.00   

       current_fy_taxamount  arrear_years_demand_generated  
0                       0.0                        1059.02  
1                       0.0                         279.00  


In [14]:
relevant_codes = ['PT_TIME_PENALTY', 'PT_TIME_INTEREST']
filtered = joined_demand[joined_demand['taxheadcode'].isin(relevant_codes)]

grouped = (
    filtered.groupby(['consumercode', 'taxheadcode'])['taxamount']
    .sum()
    .unstack(fill_value=0)  # Puts taxheadcodes as columns, fills missing with 0
    .reset_index()
)

grouped = grouped[['consumercode', 'PT_TIME_PENALTY', 'PT_TIME_INTEREST']]
grouped = grouped.fillna(0)

result = result.merge(grouped, on='consumercode', how='left')
result[['PT_TIME_PENALTY', 'PT_TIME_INTEREST']] = result[['PT_TIME_PENALTY', 'PT_TIME_INTEREST']].fillna(0)

print(result)

         consumercode earliest_fy latest_fy  latest_fy_taxamount  \
0      PT-1507-000020     2019-20   2024-25               341.00   
1      PT-1507-000021     2017-18   2024-25               279.00   
2      PT-1507-000022     2019-20   2024-25               279.00   
3      PT-1507-000023     2018-19   2024-25               279.00   
4      PT-1507-000026     2014-15   2024-25                 0.00   
...               ...         ...       ...                  ...   
51268  PT-1507-999965     2019-20   2025-26               393.00   
51269  PT-1507-999968     2019-20   2024-25               332.16   
51270  PT-1507-999969     2019-20   2024-25               401.00   
51271  PT-1507-999971     2020-21   2024-25               493.00   
51272  PT-1507-999972     2020-21   2024-25               493.00   

       current_fy_taxamount  arrear_years_demand_generated  PT_TIME_PENALTY  \
0                       0.0                        1059.02           212.75   
1                       0

In [15]:
unit_all_columns_df = pd.read_csv(
    '/home/admin1/Downloads/punjab-data-prod-analysis/zirakpur/eg_pt_unit_zirakpur.csv'
)
print(unit_all_columns_df)

                                          id     tenantid  \
0       d5b22136-0042-4e2e-a06c-c2686519d38e  pb.zirakpur   
1       d5d5c95c-f0f8-4c92-afcd-73e50c98fb01  pb.zirakpur   
2       ff7375a9-7497-4744-ba4c-290b1c4e502f  pb.zirakpur   
3       50a28b78-5e42-4f60-a93b-c29c9d04c7f3  pb.zirakpur   
4       4737b804-6664-42c1-b1d0-b14c70275750  pb.zirakpur   
...                                      ...          ...   
235344  f67fad0b-29b6-474b-ac35-d43dff880af3  pb.zirakpur   
235345  00eb284e-c98f-4771-b977-24c657a61529  pb.zirakpur   
235346  15f8c330-3cda-4bda-970f-58ac0d4c7db8  pb.zirakpur   
235347  73ae4f8c-0d4f-479d-b4e1-db92b30cc39b  pb.zirakpur   
235348  3034abb2-dcf2-4479-8909-b7d72baa2a2a  pb.zirakpur   

                                  propertyid  floorno         unittype  \
0       2228f557-bc4e-494a-a623-85357ccc0a47        9              NaN   
1       79fcff38-3a0d-4c62-9554-404db8a57c74        0              NaN   
2       e999c36a-66fb-457f-ab4e-fb1aa5b9ba31 

In [16]:

# read active properties & needed columns
property_df = pd.read_csv(
    '/home/admin1/Downloads/punjab-data-prod-analysis/zirakpur/eg_pt_property_zirakpur.csv',
    usecols=['id', 'propertyid', 'tenantid', 'createdtime', 'additionaldetails', 'ownershipcategory', 'status', 'usagecategory', 'propertytype']
)
property_df = property_df[property_df['status'] == 'ACTIVE'].copy()
# Merge properties and units by property id
merged = property_df.merge(unit_all_columns_df, left_on='id', right_on='propertyid', suffixes=('_property', '_unit'))

def classify_ownership(occupancies):
    unique_types = set(occupancies)
    if 'RENTED' in unique_types:
        if len(unique_types) > 1:
            return 'Mixed'
        else:
            return 'Tenant'
    if 'SELFOCCUPIED' in unique_types:
        # If only SELFOCCUPIED or SELFOCCUPIED + UNOCCUPIED
        return 'Owner'
    if 'UNOCCUPIED' in unique_types:
        return 'Owner'
    # fallback
    return None

# Find occupancytypes per property id
ownership = (
    merged.groupby('propertyid_property')['occupancytype']
    .apply(classify_ownership)
    .reset_index()
    .rename(columns={'occupancytype': 'Owned_Rented'})
)

property_df = property_df.merge(ownership, left_on='propertyid', right_on = 'propertyid_property', how='left')

print(property_df)


                                         id       propertyid     tenantid  \
0      98421215-340b-4327-8e1a-88cb09ade91f  PT-1507-2007885  pb.zirakpur   
1      e57cf274-4d5a-4064-9bdc-668e20afa4f8  PT-1507-1614804  pb.zirakpur   
2      c1243464-d8e8-49bc-ac8a-ebd7b31de584  PT-1507-2110139  pb.zirakpur   
3      f26beb0b-7d40-46ce-afe8-8e14f52ae116   PT-1507-911582  pb.zirakpur   
4      7752330b-3279-41ba-a0c8-1c6ae8dcaab2   PT-1507-000708  pb.zirakpur   
...                                     ...              ...          ...   
51131  c9eb0d64-6a75-48de-895a-40824d0cb65f   PT-1507-031079  pb.zirakpur   
51132  4fe7fa5a-b335-4f61-928e-8020fc822b79  PT-1507-1994062  pb.zirakpur   
51133  516fbf96-edfc-42d1-90e6-9b1772e507e9  PT-1507-1994064  pb.zirakpur   
51134  b30cc38a-167a-446a-8498-f694e2b341cc  PT-1507-1994057  pb.zirakpur   
51135  c7901731-b473-4994-b62d-8b434143937a  PT-1507-1032586  pb.zirakpur   

       status                 propertytype          ownershipcategory  \
0 

In [17]:
def clean_numeric(series):
    # Replace 'NULL' strings and NaNs with 0, then convert to float
    return pd.to_numeric(series.replace('NULL', 0), errors='coerce').fillna(0)

merged['builtuparea'] = clean_numeric(merged['builtuparea'])
merged['plintharea'] = clean_numeric(merged['plintharea'])

area_summary = (
    merged.groupby('propertyid_property', as_index=False)
    .agg(
        total_builtup_area=('builtuparea', 'sum'),
        total_plinth_area=('plintharea', 'sum')
    )
)
# for col in ['total_builtup_area', 'total_plinth_area']:
#     if col in property_df.columns:
#         property_df = property_df.drop(col, axis=1)

property_df = property_df.merge(area_summary, left_on='propertyid' ,right_on='propertyid_property', how='left')
property_df['total_builtup_area'] = property_df['total_builtup_area'].fillna(0)
property_df['total_plinth_area'] = property_df['total_plinth_area'].fillna(0)

print(property_df)

                                         id       propertyid     tenantid  \
0      98421215-340b-4327-8e1a-88cb09ade91f  PT-1507-2007885  pb.zirakpur   
1      e57cf274-4d5a-4064-9bdc-668e20afa4f8  PT-1507-1614804  pb.zirakpur   
2      c1243464-d8e8-49bc-ac8a-ebd7b31de584  PT-1507-2110139  pb.zirakpur   
3      f26beb0b-7d40-46ce-afe8-8e14f52ae116   PT-1507-911582  pb.zirakpur   
4      7752330b-3279-41ba-a0c8-1c6ae8dcaab2   PT-1507-000708  pb.zirakpur   
...                                     ...              ...          ...   
51131  c9eb0d64-6a75-48de-895a-40824d0cb65f   PT-1507-031079  pb.zirakpur   
51132  4fe7fa5a-b335-4f61-928e-8020fc822b79  PT-1507-1994062  pb.zirakpur   
51133  516fbf96-edfc-42d1-90e6-9b1772e507e9  PT-1507-1994064  pb.zirakpur   
51134  b30cc38a-167a-446a-8498-f694e2b341cc  PT-1507-1994057  pb.zirakpur   
51135  c7901731-b473-4994-b62d-8b434143937a  PT-1507-1032586  pb.zirakpur   

       status                 propertytype          ownershipcategory  \
0 

In [18]:
property_result_merged = property_df.merge(
    result,
    left_on='propertyid',
    right_on='consumercode',
    how='left'
)

print(property_result_merged)

                                         id       propertyid     tenantid  \
0      98421215-340b-4327-8e1a-88cb09ade91f  PT-1507-2007885  pb.zirakpur   
1      e57cf274-4d5a-4064-9bdc-668e20afa4f8  PT-1507-1614804  pb.zirakpur   
2      c1243464-d8e8-49bc-ac8a-ebd7b31de584  PT-1507-2110139  pb.zirakpur   
3      f26beb0b-7d40-46ce-afe8-8e14f52ae116   PT-1507-911582  pb.zirakpur   
4      7752330b-3279-41ba-a0c8-1c6ae8dcaab2   PT-1507-000708  pb.zirakpur   
...                                     ...              ...          ...   
51131  c9eb0d64-6a75-48de-895a-40824d0cb65f   PT-1507-031079  pb.zirakpur   
51132  4fe7fa5a-b335-4f61-928e-8020fc822b79  PT-1507-1994062  pb.zirakpur   
51133  516fbf96-edfc-42d1-90e6-9b1772e507e9  PT-1507-1994064  pb.zirakpur   
51134  b30cc38a-167a-446a-8498-f694e2b341cc  PT-1507-1994057  pb.zirakpur   
51135  c7901731-b473-4994-b62d-8b434143937a  PT-1507-1032586  pb.zirakpur   

       status                 propertytype          ownershipcategory  \
0 

In [19]:
# Step 1: Load owner data
owner_df = pd.read_csv(
    '/home/admin1/Downloads/punjab-data-prod-analysis/zirakpur/eg_pt_owner_zirakpur.csv',
    usecols=['propertyid', 'ownertype', 'status']
)

owner_df = owner_df[owner_df['status'] == 'ACTIVE'].copy()

# Step 2: Determine exemption
owner_df['is_exempted'] = owner_df['ownertype'].isin(['WIDOW', 'FREEDOMFIGHTER'])
exempted_status = owner_df.groupby('propertyid')['is_exempted'].any().reset_index()
exempted_status['Is Property Exempted [Yes/ No]'] = exempted_status['is_exempted'].apply(lambda x: 'Yes' if x else 'No')
exempted_status = exempted_status.drop(columns=['is_exempted'])



In [20]:
# ✅ Step 1: Add exemption column to the merged result
property_result_merged = property_result_merged.merge(
    exempted_status[['propertyid', 'Is Property Exempted [Yes/ No]']],
    left_on='id',  # property_df.id == eg_pt_owner.propertyid
    right_on='propertyid',
    how='left'
)

property_result_merged['Is Property Exempted [Yes/ No]'] = property_result_merged['Is Property Exempted [Yes/ No]'].fillna('No')

# Drop duplicate merge key
if 'propertyid' in property_result_merged.columns:
    property_result_merged.drop(columns=['propertyid'], inplace=True)


# If 'propertyid_x' exists, use it as the correct property ID
if 'propertyid_x' in property_result_merged.columns:
    property_result_merged['propertyid'] = property_result_merged['propertyid_x']

# ✅ Step 2: Rename columns for the final report
report = property_result_merged.rename(columns={
    'tenantid': 'ULB',
    'propertyid': 'Property ID',
    'usagecategory': 'Usage',
    'createdtime': 'Date of Creation of the Property in the System',
    'additionaldetails': 'Date of Construction of the Property',
    'ownershipcategory': 'Ownership Type',
    'Is Property Exempted [Yes/ No]': 'Is Property Exempted [Yes/ No]',
    'Owned_Rented': 'Owned_Rented (Owner/ Rented/ Mixed)',
    'earliest_fy': 'Earliest Financial Year for which Demand was Generated',
    'latest_fy': 'Latest Financial Year for which Demand was Generated',
    'latest_fy_taxamount': 'Latest Demand Generated [in Rs.]',
    'current_fy_taxamount': 'Current Years Demand Generated [in Rs.]',
    'PT_TIME_PENALTY': 'Penalty',
    'PT_TIME_INTEREST': 'Interest',
    'arrear_years_demand_generated': 'Arrear Years Demand Generated [in Rs.]',
    'propertytype': 'Property Type[Building/ Vacant]',
    'total_builtup_area': 'Total Builtup Area [Sum of all units/ floors]',
    'total_plinth_area': 'Total Plinth Area [Sum of all units/ floors]'
}).copy()

# ✅ Step 3: Format ULB and date fields
def epoch_to_custom_date(epoch_ms):
    return datetime.fromtimestamp(epoch_ms / 1000).strftime('%d-%b-%Y') if pd.notna(epoch_ms) else None

def get_year_construction(val):
    if pd.isna(val): return None
    try: return json.loads(val).get('yearConstruction')
    except: return None

report['ULB'] = report['ULB'].str.split('.').str[1].str.capitalize()
report['Date of Creation of the Property in the System'] = report['Date of Creation of the Property in the System'].apply(epoch_to_custom_date)
report['Date of Construction of the Property'] = report['Date of Construction of the Property'].apply(get_year_construction)

# ✅ Step 4: Select final columns in required order
final_report = report[
    [
        'ULB',
        'Property ID',
        'Usage',
        'Date of Creation of the Property in the System',
        'Date of Construction of the Property',
        'Ownership Type',
        'Is Property Exempted [Yes/ No]',
        'Owned_Rented (Owner/ Rented/ Mixed)',
        'Earliest Financial Year for which Demand was Generated',
        'Latest Financial Year for which Demand was Generated',
        'Latest Demand Generated [in Rs.]',
        'Current Years Demand Generated [in Rs.]',
        'Penalty',
        'Interest',
        'Arrear Years Demand Generated [in Rs.]',
        'Property Type[Building/ Vacant]',
        'Total Builtup Area [Sum of all units/ floors]',
        'Total Plinth Area [Sum of all units/ floors]'
    ]
].copy()

# ✅ Step 5: Save the CSV
print("✅ Writing CSV")
final_report.to_csv('Punjab_Data_Analysis_zirakpur_final.csv', index=False)
print(f"🎉 Done! CSV generated with {len(final_report)} properties")


✅ Writing CSV
🎉 Done! CSV generated with 51136 properties
