<a href="https://colab.research.google.com/github/riddhipatel18/ABSI-ASSI-Aadhaar-Biometric-Stress-Intelligence-System/blob/main/data_thon.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import zipfile
import pandas as pd

ZIP_PATH = "/content/api_data_aadhar_biometric (1).zip"
OUTPUT_FILE = "ABSI_ASSI_Tableau_Data.csv"
CHUNK_SIZE = 200_000

ABSI_THRESHOLD = 0.75
ASSI_ADULT_HEAVY = 0.7

aggregated = []

with zipfile.ZipFile(ZIP_PATH, 'r') as z:
    csv_files = [f for f in z.namelist() if f.endswith('.csv')]

    for csv_file in csv_files:
        print(f"Processing {csv_file}")

        with z.open(csv_file) as f:
            for chunk in pd.read_csv(f, chunksize=CHUNK_SIZE):

                # Date parsing
                chunk['date'] = pd.to_datetime(
                    chunk['date'],
                    format='mixed',
                    dayfirst=True,
                    errors='coerce'
                )

                # Total biometric load
                chunk['total_bio'] = (
                    chunk['bio_age_5_17'] +
                    chunk['bio_age_17_']
                )

                aggregated.append(
                    chunk[['date', 'state', 'district', 'pincode',
                           'bio_age_5_17', 'bio_age_17_', 'total_bio']]
                )

# Combine
df = pd.concat(aggregated, ignore_index=True)

# Daily aggregation
daily_df = (
    df.groupby(['state', 'district', 'date'], as_index=False)
      .agg({
          'bio_age_5_17': 'sum',
          'bio_age_17_': 'sum',
          'total_bio': 'sum'
      })
)

# -------------------
# ABSI (normalized)
# -------------------
daily_df['max_bio_district'] = (
    daily_df.groupby('district')['total_bio'].transform('max')
)

daily_df['ABSI'] = daily_df['total_bio'] / daily_df['max_bio_district']

# -------------------
# ASSI (age skew)
# -------------------
daily_df['ASSI'] = (
    daily_df['bio_age_17_'] /
    (daily_df['bio_age_5_17'] + daily_df['bio_age_17_'])
)

# Alerts
daily_df['ABSI_alert'] = daily_df['ABSI'] >= ABSI_THRESHOLD
daily_df['ASSI_adult_heavy'] = daily_df['ASSI'] >= ASSI_ADULT_HEAVY

# State-wise Top 10 (by ABSI)
daily_df['state_rank'] = (
    daily_df.groupby('state')['ABSI']
            .rank(method='dense', ascending=False)
)

daily_df['is_top10_state'] = daily_df['state_rank'] <= 10

# Save
daily_df.to_csv(OUTPUT_FILE, index=False)

print("✅ ABSI + ASSI dataset ready for Tableau")


Processing api_data_aadhar_biometric/api_data_aadhar_biometric_0_500000.csv
Processing api_data_aadhar_biometric/api_data_aadhar_biometric_1000000_1500000.csv
Processing api_data_aadhar_biometric/api_data_aadhar_biometric_1500000_1861108.csv
Processing api_data_aadhar_biometric/api_data_aadhar_biometric_500000_1000000.csv
✅ ABSI + ASSI dataset ready for Tableau


In [None]:
from google.colab import files

files.download('/content/ABSI_Tableau_Data.csv')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>