In [1]:
import os
import pandas as pd
from datetime import datetime
import json
import gc

folder_path_demanddetails = '/home/prerna/Punjab/punjab-data-prod-analysis/patiala/output_demand_details/'

# read active properties & needed columns
property_df = pd.read_csv(
    '/home/prerna/Punjab/punjab-data-prod-analysis/patiala/eg_pt_property.csv',
    usecols=['id', 'propertyid', 'tenantid', 'createdtime', 'additionaldetails', 'ownershipcategory', 'status', 'usagecategory']
)
property_df = property_df[property_df['status'] == 'ACTIVE'].copy()

# read units
unit_df = pd.read_csv(
    '/home/prerna/Punjab/punjab-data-prod-analysis/patiala/eg_pt_unit.csv',
    usecols=['propertyid', 'occupancytype']
)



# read demand
demand_df = pd.read_csv(
    '/home/prerna/Punjab/punjab-data-prod-analysis/patiala/egbs_demand_v1.csv',
    dtype={"consumercode": str},
    low_memory=False,
    usecols=['id', 'taxperiodfrom', 'taxperiodto', 'consumercode', 'status']
)
demand_df = demand_df[demand_df['status'] == 'ACTIVE'].copy()


# read demand details (memory‑efficient, in chunks)
all_chunks = []
needed_cols = ['demandid', 'taxamount', 'collectionamount', 'taxheadcode']
for filename in os.listdir(folder_path_demanddetails):
    if filename.endswith('.csv'):
        file_path = os.path.join(folder_path_demanddetails, filename)
        print(f'Loading: {file_path}')
        chunk = pd.read_csv(file_path, usecols=needed_cols)
        all_chunks.append(chunk)
demand_details_df = pd.concat(all_chunks, ignore_index=True)
del all_chunks; gc.collect()

print("✅ Loaded data")

Loading: /home/prerna/Punjab/punjab-data-prod-analysis/patiala/output_demand_details/output_263.csv
Loading: /home/prerna/Punjab/punjab-data-prod-analysis/patiala/output_demand_details/output_4.csv
Loading: /home/prerna/Punjab/punjab-data-prod-analysis/patiala/output_demand_details/output_34.csv
Loading: /home/prerna/Punjab/punjab-data-prod-analysis/patiala/output_demand_details/output_193.csv
Loading: /home/prerna/Punjab/punjab-data-prod-analysis/patiala/output_demand_details/output_79.csv
Loading: /home/prerna/Punjab/punjab-data-prod-analysis/patiala/output_demand_details/output_38.csv
Loading: /home/prerna/Punjab/punjab-data-prod-analysis/patiala/output_demand_details/output_292.csv
Loading: /home/prerna/Punjab/punjab-data-prod-analysis/patiala/output_demand_details/output_6.csv
Loading: /home/prerna/Punjab/punjab-data-prod-analysis/patiala/output_demand_details/output_201.csv
Loading: /home/prerna/Punjab/punjab-data-prod-analysis/patiala/output_demand_details/output_161.csv
Loading

In [2]:
print(len(property_df))         # number of rows in properties
print(len(unit_df))             # number of rows in units
print(len(demand_df))   # number of rows in demand details
print(len(demand_details_df))   # number of rows in demand details

136121
415135
916345
14643753


In [3]:
# join pt and unit
joined_pt_unit = property_df.merge(unit_df, left_on='id', right_on='propertyid', how='left', suffixes=('_property', '_unit'))
del property_df, unit_df; gc.collect()
joined_pt_unit.head()
print(joined_pt_unit['id'].nunique())

136121


In [4]:
joined_pt_unit.head()

Unnamed: 0,id,propertyid_property,tenantid,status,ownershipcategory,usagecategory,createdtime,additionaldetails,propertyid_unit,occupancytype
0,7b7593ba-d9fd-4ade-9b62-4c84e59a7885,PT-1910-1380786,pb.patiala,ACTIVE,INDIVIDUAL.SINGLEOWNER,NONRESIDENTIAL.COMMERCIAL,1658226556748,"{""surveyInfo"": {""floor"": ""Measurement taken by...",7b7593ba-d9fd-4ade-9b62-4c84e59a7885,SELFOCCUPIED
1,7b7593ba-d9fd-4ade-9b62-4c84e59a7885,PT-1910-1380786,pb.patiala,ACTIVE,INDIVIDUAL.SINGLEOWNER,NONRESIDENTIAL.COMMERCIAL,1658226556748,"{""surveyInfo"": {""floor"": ""Measurement taken by...",7b7593ba-d9fd-4ade-9b62-4c84e59a7885,SELFOCCUPIED
2,38e60ced-0142-44ef-9162-839e3844c79a,PT-1910-1171983,pb.patiala,ACTIVE,INDIVIDUAL.SINGLEOWNER,MIXED,1634437004030,"{""legacyInfo"": {""usage"": ""Built Up"", ""colony"":...",38e60ced-0142-44ef-9162-839e3844c79a,RENTED
3,38e60ced-0142-44ef-9162-839e3844c79a,PT-1910-1171983,pb.patiala,ACTIVE,INDIVIDUAL.SINGLEOWNER,MIXED,1634437004030,"{""legacyInfo"": {""usage"": ""Built Up"", ""colony"":...",38e60ced-0142-44ef-9162-839e3844c79a,SELFOCCUPIED
4,38e60ced-0142-44ef-9162-839e3844c79a,PT-1910-1171983,pb.patiala,ACTIVE,INDIVIDUAL.SINGLEOWNER,MIXED,1634437004030,"{""legacyInfo"": {""usage"": ""Built Up"", ""colony"":...",38e60ced-0142-44ef-9162-839e3844c79a,SELFOCCUPIED


In [5]:
# join demand and demand details
joined_demand = demand_df.merge(demand_details_df, left_on='id', right_on='demandid', how='left', suffixes=('_demand', '_detail'))
print(joined_demand['id'].nunique())
del demand_details_df, demand_df; gc.collect()
joined_demand.head()

916345


Unnamed: 0,id,consumercode,taxperiodfrom,taxperiodto,status,demandid,taxheadcode,taxamount,collectionamount
0,29608,PT-1909-016884,1522540800000,1554076799000,ACTIVE,29608,PT_TAX,1196.67,1196.67
1,29608,PT-1909-016884,1522540800000,1554076799000,ACTIVE,29608,PT_UNIT_USAGE_EXEMPTION,0.0,0.0
2,29608,PT-1909-016884,1522540800000,1554076799000,ACTIVE,29608,PT_OWNER_EXEMPTION,0.0,0.0
3,29608,PT-1909-016884,1522540800000,1554076799000,ACTIVE,29608,PT_FIRE_CESS,0.0,0.0
4,29608,PT-1909-016884,1522540800000,1554076799000,ACTIVE,29608,PT_CANCER_CESS,23.94,23.94


In [6]:
import pytz

# Correct: parse as datetime from milliseconds since epoch
joined_demand['taxperiodfrom'] = pd.to_datetime(joined_demand['taxperiodfrom'], unit='ms', utc=True)
joined_demand['taxperiodto'] = pd.to_datetime(joined_demand['taxperiodto'], unit='ms', utc=True)

# Convert to IST (Asia/Kolkata)
ist = pytz.timezone('Asia/Kolkata')
joined_demand['taxperiodfrom'] = joined_demand['taxperiodfrom'].dt.tz_convert(ist)
joined_demand['taxperiodto'] = joined_demand['taxperiodto'].dt.tz_convert(ist)

# Financial year calculation
def get_fy(date):
    if date.month >= 4:
        fy_start = date.year
        fy_end = date.year + 1
    else:
        fy_start = date.year - 1
        fy_end = date.year
    return f"{fy_start}-{str(fy_end)[-2:]}"

joined_demand['fy'] = joined_demand['taxperiodfrom'].apply(get_fy)

# Group by consumercode
result = joined_demand.groupby('consumercode')['fy'].agg(['min', 'max']).reset_index()
result.rename(columns={'min': 'earliest_fy', 'max': 'latest_fy'}, inplace=True)

print(result)

           consumercode earliest_fy latest_fy
0        PT-1909-015023     2018-19   2024-25
1        PT-1909-016884     2018-19   2024-25
2        PT-1909-031318     2017-18   2020-21
3        PT-1909-035743     2018-19   2018-19
4        PT-1909-046124     2018-19   2024-25
...                 ...         ...       ...
135860  PT-1910-2466161     2013-14   2025-26
135861  PT-1910-2466215     2022-23   2025-26
135862  PT-1910-2466253     2021-22   2025-26
135863  PT-1910-2466389     2018-19   2025-26
135864  PT-1910-2466445     2022-23   2025-26

[135865 rows x 3 columns]


In [7]:
# Merge latest_fy onto joined_demand by consumercode
joined = joined_demand.merge(
    result[['consumercode', 'latest_fy']],
    on='consumercode',
    how='left'
)

latest_demand = joined[joined['fy'] == joined['latest_fy']]

demand_sum = latest_demand.groupby('consumercode')['taxamount'].sum().reset_index()
demand_sum.rename(columns={'taxamount':'latest_fy_taxamount'}, inplace=True)

result = result.merge(demand_sum, on='consumercode', how='left')

print(result)

           consumercode earliest_fy latest_fy  latest_fy_taxamount
0        PT-1909-015023     2018-19   2024-25                  0.0
1        PT-1909-016884     2018-19   2024-25               1805.0
2        PT-1909-031318     2017-18   2020-21                  0.0
3        PT-1909-035743     2018-19   2018-19                398.0
4        PT-1909-046124     2018-19   2024-25               1343.0
...                 ...         ...       ...                  ...
135860  PT-1910-2466161     2013-14   2025-26               1386.0
135861  PT-1910-2466215     2022-23   2025-26               3209.0
135862  PT-1910-2466253     2021-22   2025-26                769.0
135863  PT-1910-2466389     2018-19   2025-26                868.0
135864  PT-1910-2466445     2022-23   2025-26               3476.0

[135865 rows x 4 columns]


In [8]:
#Calculating the tax amount(demand) of current year
target_fy = "2025-26"
current_fy_demand = joined_demand[joined_demand['fy'] == target_fy]

df_fy_sum = current_fy_demand.groupby('consumercode')['taxamount'].sum().reset_index()
df_fy_sum.rename(columns={'taxamount': 'current_fy_taxamount'}, inplace=True)

all_consumercodes = pd.DataFrame(joined_demand['consumercode'].unique(), columns=['consumercode'])

final = all_consumercodes.merge(df_fy_sum, on='consumercode', how='left')
final['current_fy_taxamount'] = final['current_fy_taxamount'].fillna(0)

result = result.merge(final, on='consumercode', how='left')
result['current_fy_taxamount'] = result['current_fy_taxamount'].fillna(0)

print(result)

           consumercode earliest_fy latest_fy  latest_fy_taxamount  \
0        PT-1909-015023     2018-19   2024-25                  0.0   
1        PT-1909-016884     2018-19   2024-25               1805.0   
2        PT-1909-031318     2017-18   2020-21                  0.0   
3        PT-1909-035743     2018-19   2018-19                398.0   
4        PT-1909-046124     2018-19   2024-25               1343.0   
...                 ...         ...       ...                  ...   
135860  PT-1910-2466161     2013-14   2025-26               1386.0   
135861  PT-1910-2466215     2022-23   2025-26               3209.0   
135862  PT-1910-2466253     2021-22   2025-26                769.0   
135863  PT-1910-2466389     2018-19   2025-26                868.0   
135864  PT-1910-2466445     2022-23   2025-26               3476.0   

        current_fy_taxamount  
0                        0.0  
1                        0.0  
2                        0.0  
3                        0.0  
4   

In [9]:
# Fiscal years before the current FY
arrear_demand = joined_demand[joined_demand['fy'] < "2025-26"]

agg = arrear_demand.groupby('consumercode').agg(
    arrear_taxamount_sum=('taxamount', 'sum'),
    arrear_collectionamount_sum=('collectionamount', 'sum')
).reset_index()

agg['arrear_years_demand_generated'] = (
    agg['arrear_taxamount_sum'] - agg['arrear_collectionamount_sum']
)

result = result.merge(
    agg[['consumercode', 'arrear_years_demand_generated']],
    on='consumercode', how='left'
)
result['arrear_years_demand_generated'] = result['arrear_years_demand_generated'].fillna(0)

print(result)

           consumercode earliest_fy latest_fy  latest_fy_taxamount  \
0        PT-1909-015023     2018-19   2024-25                  0.0   
1        PT-1909-016884     2018-19   2024-25               1805.0   
2        PT-1909-031318     2017-18   2020-21                  0.0   
3        PT-1909-035743     2018-19   2018-19                398.0   
4        PT-1909-046124     2018-19   2024-25               1343.0   
...                 ...         ...       ...                  ...   
135860  PT-1910-2466161     2013-14   2025-26               1386.0   
135861  PT-1910-2466215     2022-23   2025-26               3209.0   
135862  PT-1910-2466253     2021-22   2025-26                769.0   
135863  PT-1910-2466389     2018-19   2025-26                868.0   
135864  PT-1910-2466445     2022-23   2025-26               3476.0   

        current_fy_taxamount  arrear_years_demand_generated  
0                        0.0                           0.00  
1                        0.0       

In [10]:
relevant_codes = ['PT_TIME_PENALTY', 'PT_TIME_INTEREST']
filtered = joined_demand[joined_demand['taxheadcode'].isin(relevant_codes)]

grouped = (
    filtered.groupby(['consumercode', 'taxheadcode'])['taxamount']
    .sum()
    .unstack(fill_value=0)  # Puts taxheadcodes as columns, fills missing with 0
    .reset_index()
)

grouped = grouped[['consumercode', 'PT_TIME_PENALTY', 'PT_TIME_INTEREST']]
grouped = grouped.fillna(0)

result = result.merge(grouped, on='consumercode', how='left')
result[['PT_TIME_PENALTY', 'PT_TIME_INTEREST']] = result[['PT_TIME_PENALTY', 'PT_TIME_INTEREST']].fillna(0)

print(result)

           consumercode earliest_fy latest_fy  latest_fy_taxamount  \
0        PT-1909-015023     2018-19   2024-25                  0.0   
1        PT-1909-016884     2018-19   2024-25               1805.0   
2        PT-1909-031318     2017-18   2020-21                  0.0   
3        PT-1909-035743     2018-19   2018-19                398.0   
4        PT-1909-046124     2018-19   2024-25               1343.0   
...                 ...         ...       ...                  ...   
135860  PT-1910-2466161     2013-14   2025-26               1386.0   
135861  PT-1910-2466215     2022-23   2025-26               3209.0   
135862  PT-1910-2466253     2021-22   2025-26                769.0   
135863  PT-1910-2466389     2018-19   2025-26                868.0   
135864  PT-1910-2466445     2022-23   2025-26               3476.0   

        current_fy_taxamount  arrear_years_demand_generated  PT_TIME_PENALTY  \
0                        0.0                           0.00             0.00   

In [11]:
unit_all_columns_df = pd.read_csv(
    '/home/prerna/Punjab/punjab-data-prod-analysis/patiala/eg_pt_unit.csv'
)
print(unit_all_columns_df)

                                          id    tenantid  \
0       63589044-ff90-471f-a11a-e22048b5c841  pb.patiala   
1       32715f09-416c-4986-96e3-5dd3907382dd  pb.patiala   
2       5c129ed9-32e4-4215-a4f4-51c9b545da4a  pb.patiala   
3       49f4a786-061c-4cf7-a0bc-163688ec4695  pb.patiala   
4       db357902-c481-43f9-9cd6-477107bea8b0  pb.patiala   
...                                      ...         ...   
415130  e9691517-b815-4f76-8a99-05b122503c79  pb.patiala   
415131  3e56ca50-3740-4cf2-b812-2b189d77a743  pb.patiala   
415132  a2846c3b-79bc-420b-b52e-17af92994828  pb.patiala   
415133  f53321fc-9910-4f17-a620-33b79297e339  pb.patiala   
415134  7d75887c-f7fa-4ce5-bb86-17a91d4c4a4b  pb.patiala   

                                  propertyid  floorno         unittype  \
0       d8faffdd-b0ef-4298-a542-e0a5ae6268e8        0              NaN   
1       7851f6c0-543f-437a-b4fb-6f58d29a5e90        0              NaN   
2       7851f6c0-543f-437a-b4fb-6f58d29a5e90        1    

In [12]:

# read active properties & needed columns
property_df = pd.read_csv(
    '/home/prerna/Punjab/punjab-data-prod-analysis/patiala/eg_pt_property.csv',
    usecols=['id', 'propertyid', 'tenantid', 'createdtime', 'additionaldetails', 'ownershipcategory', 'status', 'usagecategory', 'propertytype']
)
property_df = property_df[property_df['status'] == 'ACTIVE'].copy()
# Merge properties and units by property id
merged = property_df.merge(unit_all_columns_df, left_on='id', right_on='propertyid', suffixes=('_property', '_unit'))

# def classify_ownership(occupancies):
#     unique_types = set(occupancies)
#     if 'RENTED' in unique_types:
#         if len(unique_types) > 1:
#             return 'Mixed'
#         else:
#             return 'Tenant'
#     if 'SELFOCCUPIED' in unique_types:
#         # If only SELFOCCUPIED or SELFOCCUPIED + UNOCCUPIED
#         return 'Owner'
#     if 'UNOCCUPIED' in unique_types:
#         return 'Owner'
#     # fallback
#     return None

def classify_ownership(occupancies):
    unique_types = set(occupancies)

    # tenant-like categories
    tenant_types = {"RENTED", "PG"}
    
    # if any tenant type present
    if unique_types & tenant_types:
        if len(unique_types - tenant_types) == 0:
            return "Tenant"
        else:
            return "Mixed"
    
    # owner-like categories
    if "SELFOCCUPIED" in unique_types or "UNOCCUPIED" in unique_types:
        return "Owner"
    
    # fallback
    return None


# Find occupancytypes per property id
ownership = (
    merged.groupby('propertyid_property')['occupancytype']
    .apply(classify_ownership)
    .reset_index()
    .rename(columns={'occupancytype': 'Owned_Rented'})
)

property_df = property_df.merge(ownership, left_on='propertyid', right_on = 'propertyid_property', how='left')

print(property_df)


                                          id       propertyid    tenantid  \
0       7b7593ba-d9fd-4ade-9b62-4c84e59a7885  PT-1910-1380786  pb.patiala   
1       38e60ced-0142-44ef-9162-839e3844c79a  PT-1910-1171983  pb.patiala   
2       edc59d90-4578-4fe5-8988-c8eb90ea9601  PT-1910-1170452  pb.patiala   
3       fa06ba77-2491-4433-bd10-67c24bd0a768  PT-1910-1178924  pb.patiala   
4       325d1f7d-21ca-494a-bc8e-40b4480b58ee  PT-1910-1196167  pb.patiala   
...                                      ...              ...         ...   
136116  1e2da04b-d2d4-484d-991c-4591fe07a4c8  PT-1910-1395438  pb.patiala   
136117  f33cb80b-ed0a-4744-b937-5eb8edfad678  PT-1910-1343905  pb.patiala   
136118  9e634662-656a-4a77-9811-5ef799b4964c  PT-1910-1294308  pb.patiala   
136119  2e75c8c4-6649-495c-9d54-87d7beaba651  PT-1910-1240383  pb.patiala   
136120  7e4899f4-a465-4c70-a8b6-ced63c8f9108  PT-1910-1289747  pb.patiala   

        status                 propertytype          ownershipcategory  \
0

In [13]:
def clean_numeric(series):
    # Replace 'NULL' strings and NaNs with 0, then convert to float
    return pd.to_numeric(series.replace('NULL', 0), errors='coerce').fillna(0)

merged['builtuparea'] = clean_numeric(merged['builtuparea'])
merged['plintharea'] = clean_numeric(merged['plintharea'])

area_summary = (
    merged.groupby('propertyid_property', as_index=False)
    .agg(
        total_builtup_area=('builtuparea', 'sum'),
        total_plinth_area=('plintharea', 'sum')
    )
)
# for col in ['total_builtup_area', 'total_plinth_area']:
#     if col in property_df.columns:
#         property_df = property_df.drop(col, axis=1)

property_df = property_df.merge(area_summary, left_on='propertyid' ,right_on='propertyid_property', how='left')
property_df['total_builtup_area'] = property_df['total_builtup_area'].fillna(0)
property_df['total_plinth_area'] = property_df['total_plinth_area'].fillna(0)

print(property_df)

                                          id       propertyid    tenantid  \
0       7b7593ba-d9fd-4ade-9b62-4c84e59a7885  PT-1910-1380786  pb.patiala   
1       38e60ced-0142-44ef-9162-839e3844c79a  PT-1910-1171983  pb.patiala   
2       edc59d90-4578-4fe5-8988-c8eb90ea9601  PT-1910-1170452  pb.patiala   
3       fa06ba77-2491-4433-bd10-67c24bd0a768  PT-1910-1178924  pb.patiala   
4       325d1f7d-21ca-494a-bc8e-40b4480b58ee  PT-1910-1196167  pb.patiala   
...                                      ...              ...         ...   
136116  1e2da04b-d2d4-484d-991c-4591fe07a4c8  PT-1910-1395438  pb.patiala   
136117  f33cb80b-ed0a-4744-b937-5eb8edfad678  PT-1910-1343905  pb.patiala   
136118  9e634662-656a-4a77-9811-5ef799b4964c  PT-1910-1294308  pb.patiala   
136119  2e75c8c4-6649-495c-9d54-87d7beaba651  PT-1910-1240383  pb.patiala   
136120  7e4899f4-a465-4c70-a8b6-ced63c8f9108  PT-1910-1289747  pb.patiala   

        status                 propertytype          ownershipcategory  \
0

In [14]:
property_result_merged = property_df.merge(
    result,
    left_on='propertyid',
    right_on='consumercode',
    how='left'
)

print(property_result_merged)

                                          id       propertyid    tenantid  \
0       7b7593ba-d9fd-4ade-9b62-4c84e59a7885  PT-1910-1380786  pb.patiala   
1       38e60ced-0142-44ef-9162-839e3844c79a  PT-1910-1171983  pb.patiala   
2       edc59d90-4578-4fe5-8988-c8eb90ea9601  PT-1910-1170452  pb.patiala   
3       fa06ba77-2491-4433-bd10-67c24bd0a768  PT-1910-1178924  pb.patiala   
4       325d1f7d-21ca-494a-bc8e-40b4480b58ee  PT-1910-1196167  pb.patiala   
...                                      ...              ...         ...   
136116  1e2da04b-d2d4-484d-991c-4591fe07a4c8  PT-1910-1395438  pb.patiala   
136117  f33cb80b-ed0a-4744-b937-5eb8edfad678  PT-1910-1343905  pb.patiala   
136118  9e634662-656a-4a77-9811-5ef799b4964c  PT-1910-1294308  pb.patiala   
136119  2e75c8c4-6649-495c-9d54-87d7beaba651  PT-1910-1240383  pb.patiala   
136120  7e4899f4-a465-4c70-a8b6-ced63c8f9108  PT-1910-1289747  pb.patiala   

        status                 propertytype          ownershipcategory  \
0

In [15]:
# Step 1: Load owner data
owner_df = pd.read_csv(
    '/home/prerna/Punjab/punjab-data-prod-analysis/patiala/eg_pt_owner.csv',
    usecols=['propertyid', 'ownertype', 'status']
)

owner_df = owner_df[owner_df['status'] == 'ACTIVE'].copy()

# Step 2: Determine exemption
owner_df['is_exempted'] = owner_df['ownertype'].isin(['WIDOW', 'FREEDOMFIGHTER'])
exempted_status = owner_df.groupby('propertyid')['is_exempted'].any().reset_index()
exempted_status['Is Property Exempted [Yes/ No]'] = exempted_status['is_exempted'].apply(lambda x: 'Yes' if x else 'No')
exempted_status = exempted_status.drop(columns=['is_exempted'])



In [16]:
# ✅ Step 1: Add exemption column to the merged result
property_result_merged = property_result_merged.merge(
    exempted_status[['propertyid', 'Is Property Exempted [Yes/ No]']],
    left_on='id',  # property_df.id == eg_pt_owner.propertyid
    right_on='propertyid',
    how='left'
)

property_result_merged['Is Property Exempted [Yes/ No]'] = property_result_merged['Is Property Exempted [Yes/ No]'].fillna('No')

# Drop duplicate merge key
if 'propertyid' in property_result_merged.columns:
    property_result_merged.drop(columns=['propertyid'], inplace=True)


# If 'propertyid_x' exists, use it as the correct property ID
if 'propertyid_x' in property_result_merged.columns:
    property_result_merged['propertyid'] = property_result_merged['propertyid_x']

# ✅ Step 2: Rename columns for the final report
report = property_result_merged.rename(columns={
    'tenantid': 'ULB',
    'propertyid': 'Property ID',
    'usagecategory': 'Usage',
    'createdtime': 'Date of Creation of the Property in the System',
    'additionaldetails': 'Date of Construction of the Property',
    'ownershipcategory': 'Ownership Type',
    'Is Property Exempted [Yes/ No]': 'Is Property Exempted [Yes/ No]',
    'Owned_Rented': 'Owned_Rented (Owner/ Rented/ Mixed)',
    'earliest_fy': 'Earliest Financial Year for which Demand was Generated',
    'latest_fy': 'Latest Financial Year for which Demand was Generated',
    'latest_fy_taxamount': 'Latest Demand Generated [in Rs.]',
    'current_fy_taxamount': 'Current Years Demand Generated [in Rs.]',
    'PT_TIME_PENALTY': 'Penalty',
    'PT_TIME_INTEREST': 'Interest',
    'arrear_years_demand_generated': 'Arrear Years Demand Generated [in Rs.]',
    'propertytype': 'Property Type[Building/ Vacant]',
    'total_builtup_area': 'Total Builtup Area [Sum of all units/ floors]',
    'total_plinth_area': 'Total Plinth Area [Sum of all units/ floors]'
}).copy()

# ✅ Step 3: Format ULB and date fields
def epoch_to_custom_date(epoch_ms):
    return datetime.fromtimestamp(epoch_ms / 1000).strftime('%d-%b-%Y') if pd.notna(epoch_ms) else None

def get_year_construction(val):
    if pd.isna(val): return None
    try: return json.loads(val).get('yearConstruction')
    except: return None

report['ULB'] = report['ULB'].str.split('.').str[1].str.capitalize()
report['Date of Creation of the Property in the System'] = report['Date of Creation of the Property in the System'].apply(epoch_to_custom_date)
report['Date of Construction of the Property'] = report['Date of Construction of the Property'].apply(get_year_construction)

# ✅ Step 4: Select final columns in required order
final_report = report[
    [
        'ULB',
        'Property ID',
        'Usage',
        'Date of Creation of the Property in the System',
        'Date of Construction of the Property',
        'Ownership Type',
        'Is Property Exempted [Yes/ No]',
        'Owned_Rented (Owner/ Rented/ Mixed)',
        'Earliest Financial Year for which Demand was Generated',
        'Latest Financial Year for which Demand was Generated',
        'Latest Demand Generated [in Rs.]',
        'Current Years Demand Generated [in Rs.]',
        'Penalty',
        'Interest',
        'Arrear Years Demand Generated [in Rs.]',
        'Property Type[Building/ Vacant]',
        'Total Builtup Area [Sum of all units/ floors]',
        'Total Plinth Area [Sum of all units/ floors]'
    ]
].copy()

# ✅ Step 5: Save the CSV
print("✅ Writing CSV")
final_report.to_csv('Punjab_Data_Analysis_patiala_final.csv', index=False)
print(f"🎉 Done! CSV generated with {len(final_report)} properties")


✅ Writing CSV
🎉 Done! CSV generated with 136121 properties
