In [30]:
import pandas as pd

# Load CSV
phagwara_report = pd.read_csv('/home/prerna/Punjab/PUNJAB/Punjab_Data_Analysis_Phagwara_PDC_FINAL.csv')

# Convert Creation date to datetime
# phagwara_report["Creation date"] = pd.to_datetime(
#     phagwara_report["Creation date"], format="%d-%m-%y", errors="coerce"
# )

# All FY columns in sorted order
fy_cols = [col for col in phagwara_report.columns if "-" in col]
fy_cols = sorted(fy_cols, key=lambda x: int(x.split("-")[0]))

# Function to map creation date -> FY string
def get_fy(date):
    if pd.isna(date):
        return None
    year = date.year
    month = date.month
    if month >= 4:  # April and later → new FY
        return f"{year}-{str(year+1)[-2:]}"
    else:           # Jan–Mar → previous FY
        return f"{year-1}-{str(year)[-2:]}"

# Defaulter check
def check_defaulter(row):
    # creation_fy = get_fy(row["Creation date"])
    
    # Determine starting FY (whichever is later: 2019-20 or creation FY)
    start_fy = "2019-20"
    
    # if start_fy not in fy_cols:
    #     return "No"  # Creation before available FY data
    
    start_idx = fy_cols.index(start_fy)
    for col in fy_cols[start_idx:]:
        val = str(row[col]).strip()
        if val == 'No' or val == 'Demand not generated, hence not paid':
            return "Yes"
        # elif val != "Yes":  # anything other than Yes counts as default
        #     return "Yes"
    return "No"

# Apply logic
phagwara_report["IsDefaulter"] = phagwara_report.apply(check_defaulter, axis=1)

print(phagwara_report[["Property ID", "IsDefaulter"]].head())

phagwara_report.to_csv('Phagwara_updated_defaulter_report_PDC_4.csv', index=False)


       Property ID IsDefaulter
0   PT-1014-052724         Yes
1  PT-1014-1000000          No
2  PT-1014-1000004          No
3  PT-1014-1000006          No
4  PT-1014-1000007          No


In [31]:
import pandas as pd

egov_report = pd.read_csv("/home/prerna/Punjab/PUNJAB/Phagwara_updated_defaulter_report_egov_2.csv")
pdc_report = pd.read_csv("/home/prerna/Punjab/PUNJAB/Phagwara_updated_defaulter_report_PDC_4.csv")

egov_report = egov_report[egov_report['IsDefaulter'] == 'Yes'].copy()
pdc_report = pdc_report[pdc_report['IsDefaulter'] == 'Yes'].copy()

In [32]:
# Convert to sets
egov_ids = set(egov_report["Property ID"])
pdc_ids = set(pdc_report["Property ID"])

# Intersection → in both
common_ids = egov_ids & pdc_ids
print("Count in both:", len(common_ids))

# PDC but not in EGOV
pdc_not_in_egov = pdc_ids - egov_ids
print("Count in PDC only:", len(pdc_not_in_egov))

# EGOV but not in PDC
egov_not_in_pdc = egov_ids - pdc_ids
print("Count in EGOV only:", len(egov_not_in_pdc))


Count in both: 9350
Count in PDC only: 0
Count in EGOV only: 841


In [22]:
# In both
df_common = egov_report[egov_report["Property ID"].isin(common_ids)]

# PDC only
df_pdc_only = pdc_report[pdc_report["Property ID"].isin(pdc_not_in_egov)]

# EGOV only
df_egov_only = egov_report[egov_report["Property ID"].isin(egov_not_in_pdc)]


In [23]:
print(df_pdc_only)

           Property ID                                           Owner Id  \
1187   PT-1014-1096381           ['38865bb0-34ec-4ed2-beb4-0b2521de750a']   
2359   PT-1014-1251362           ['9eb179ef-dda0-41f1-b84a-195329058086']   
15925   PT-1014-997131  ['38b6e47b-813d-441d-8a3c-98d46c122684', 'ca94...   

         Locality  Plot size          Propertyusagetype 2013-14 2014-15  \
1187   ['PLC103']     150.00                RESIDENTIAL     NaN     NaN   
2359    ['PLC94']      27.50  NONRESIDENTIAL.COMMERCIAL     NaN     NaN   
15925   ['PLC68']     144.45  NONRESIDENTIAL.COMMERCIAL     NaN     NaN   

      2015-16 2016-17 2017-18 2018-19 2019-20 2020-21 2021-22 2022-23 2023-24  \
1187      NaN     NaN     NaN     NaN     NaN     Yes     Yes     Yes      No   
2359      NaN     NaN     NaN     NaN     NaN     Yes     Yes     Yes     Yes   
15925     NaN     NaN     Yes     Yes     Yes     Yes     Yes     Yes     Yes   

      2024-25 IsDefaulter  
1187       No         Yes  
2359     

In [18]:
df_pdc_only.to_csv("pdc_only_defaulters.csv", index=False)