In [1]:
import sqlite3
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
import numpy as np
import os

# File paths
db_path = r"E:\police\consumer.db"
output_dir = r"E:\police\geojson_chunks"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Connect to SQLite
conn = sqlite3.connect(db_path)

# Get total rows
total_rows = conn.execute("SELECT COUNT(*) FROM consumer").fetchone()[0]
print(f"Total rows: {total_rows}")

# Chunking
chunk_size = 500000  # Adjust as needed
offset = 0
chunk_num = 1

while offset < total_rows:
    print(f"Processing rows {offset} to {offset + chunk_size}...")

    query = f"""
        SELECT * FROM consumer
        LIMIT {chunk_size} OFFSET {offset}
    """
    df = pd.read_sql_query(query, conn)

    # Clean coordinates
    df['ZLATITUDE'] = df['ZLATITUDE'].replace(r'^\s*$', np.nan, regex=True)
    df['ZLONGITUDE'] = df['ZLONGITUDE'].replace(r'^\s*$', np.nan, regex=True)
    df['ZLATITUDE'] = pd.to_numeric(df['ZLATITUDE'], errors='coerce')
    df['ZLONGITUDE'] = pd.to_numeric(df['ZLONGITUDE'], errors='coerce')
    df = df.dropna(subset=['ZLATITUDE', 'ZLONGITUDE'])

    # Convert to GeoDataFrame
    geometry = [Point(xy) for xy in zip(df['ZLONGITUDE'], df['ZLATITUDE'])]
    gdf = gpd.GeoDataFrame(df, geometry=geometry, crs="EPSG:4326")

    # Save chunk to separate GeoJSON
    chunk_file = os.path.join(output_dir, f"consumer_chunk_{chunk_num:04d}.geojson")
    gdf.to_file(chunk_file, driver="GeoJSON")
    print(f"Saved: {chunk_file}")

    offset += chunk_size
    chunk_num += 1

conn.close()
print("✅ All chunks written to:", output_dir)


Total rows: 24021380
Processing rows 0 to 500000...
Saved: E:\police\geojson_chunks\consumer_chunk_0001.geojson
Processing rows 500000 to 1000000...
Saved: E:\police\geojson_chunks\consumer_chunk_0002.geojson
Processing rows 1000000 to 1500000...
Saved: E:\police\geojson_chunks\consumer_chunk_0003.geojson
Processing rows 1500000 to 2000000...
Saved: E:\police\geojson_chunks\consumer_chunk_0004.geojson
Processing rows 2000000 to 2500000...
Saved: E:\police\geojson_chunks\consumer_chunk_0005.geojson
Processing rows 2500000 to 3000000...
Saved: E:\police\geojson_chunks\consumer_chunk_0006.geojson
Processing rows 3000000 to 3500000...
Saved: E:\police\geojson_chunks\consumer_chunk_0007.geojson
Processing rows 3500000 to 4000000...
Saved: E:\police\geojson_chunks\consumer_chunk_0008.geojson
Processing rows 4000000 to 4500000...
Saved: E:\police\geojson_chunks\consumer_chunk_0009.geojson
Processing rows 4500000 to 5000000...
Saved: E:\police\geojson_chunks\consumer_chunk_0010.geojson
Process

In [2]:
import geopandas as gpd
import os

# Folder with individual chunk files
chunk_dir = r"E:\police\geojson_chunks"
merged_output_path = r"E:\police\consumer_merged.geojson"

# Collect all .geojson chunk file paths
geojson_files = [os.path.join(chunk_dir, f) for f in os.listdir(chunk_dir) if f.endswith(".geojson")]

print(f"Found {len(geojson_files)} GeoJSON files to merge...")

# Read and concatenate all files
gdf_list = []
for file in geojson_files:
    print(f"Reading: {file}")
    gdf = gpd.read_file(file)
    gdf_list.append(gdf)

# Concatenate into one GeoDataFrame
merged_gdf = gpd.GeoDataFrame(pd.concat(gdf_list, ignore_index=True), crs="EPSG:4326")

# Write merged GeoJSON
merged_gdf.to_file(merged_output_path, driver="GeoJSON")

print(f"✅ Merged GeoJSON saved to: {merged_output_path}")


Found 49 GeoJSON files to merge...
Reading: E:\police\geojson_chunks\consumer_chunk_0001.geojson
Reading: E:\police\geojson_chunks\consumer_chunk_0002.geojson
Reading: E:\police\geojson_chunks\consumer_chunk_0003.geojson
Reading: E:\police\geojson_chunks\consumer_chunk_0004.geojson
Reading: E:\police\geojson_chunks\consumer_chunk_0005.geojson
Reading: E:\police\geojson_chunks\consumer_chunk_0006.geojson
Reading: E:\police\geojson_chunks\consumer_chunk_0007.geojson
Reading: E:\police\geojson_chunks\consumer_chunk_0008.geojson
Reading: E:\police\geojson_chunks\consumer_chunk_0009.geojson
Reading: E:\police\geojson_chunks\consumer_chunk_0010.geojson
Reading: E:\police\geojson_chunks\consumer_chunk_0011.geojson
Reading: E:\police\geojson_chunks\consumer_chunk_0012.geojson
Reading: E:\police\geojson_chunks\consumer_chunk_0013.geojson
Reading: E:\police\geojson_chunks\consumer_chunk_0014.geojson
Reading: E:\police\geojson_chunks\consumer_chunk_0015.geojson
Reading: E:\police\geojson_chunks\c