Reset id column

Standardize location_coordinates column

In [None]:
UPDATE properties_2025_merged
SET location_coordinates = CASE
    -- Fix typos with extra "1" at the beginning
    WHEN location_coordinates LIKE '1 1917''%' THEN REPLACE(location_coordinates, '1 1917''', '1°17''')
    WHEN location_coordinates LIKE '1 1°17''%' THEN REPLACE(location_coordinates, '1 1°17''', '1°17''')
    
    -- Fix longitude typo (379° should be 37°)
    WHEN location_coordinates LIKE '%379°%' THEN REPLACE(location_coordinates, '379°', '37°')
    
    -- Fix "1921'42.1" typo (should be 1°21'42.1)
    WHEN location_coordinates LIKE '%1921''42.1%' THEN REPLACE(location_coordinates, '1921''42.1', '1°21''42.1')
    
    -- Empty to NULL
    WHEN location_coordinates = '' THEN NULL
    
    ELSE location_coordinates
END;

-- Replace 'and' with comma
UPDATE properties_2025_merged
SET location_coordinates = REGEXP_REPLACE(location_coordinates, '\s+and\s+', ', ', 'gi')
WHERE location_coordinates IS NOT NULL;

-- Replace '&' with comma
UPDATE properties_2025_merged
SET location_coordinates = REPLACE(location_coordinates, ' & ', ', ')
WHERE location_coordinates IS NOT NULL;

-- Normalize spacing
UPDATE properties_2025_merged
SET location_coordinates = REGEXP_REPLACE(location_coordinates, '\s+', ' ', 'g')
WHERE location_coordinates IS NOT NULL;

Client names standardization

In [None]:
UPDATE properties_2025_merged
SET client_name = CASE
    -- Standardize KCB Bank variations
    WHEN LOWER(client_name) LIKE '%kcb bank%' THEN 'KCB Bank Kenya Limited'
    
    -- Standardize Co-operative Bank variations
    WHEN LOWER(client_name) LIKE '%co-operative bank%' 
         OR LOWER(client_name) LIKE '%co-op%bank%' THEN 'Co-operative Bank of Kenya Limited'
    
    -- Standardize Co-op Trust
    WHEN LOWER(client_name) LIKE '%co-op trust%' THEN 'Co-op Trust Investment Services Limited'
    
    -- Remove titles (Mr., Mrs., Dr., etc.) from individual names
    WHEN client_name LIKE 'Mr.%' THEN TRIM(REGEXP_REPLACE(client_name, '^Mr\.\s*', '', 'i'))
    WHEN client_name LIKE 'Mrs.%' THEN TRIM(REGEXP_REPLACE(client_name, '^Mrs\.\s*', '', 'i'))
    WHEN client_name LIKE 'Dr.%' THEN TRIM(REGEXP_REPLACE(client_name, '^Dr\.\s*', '', 'i'))
    WHEN client_name LIKE 'Ms.%' THEN TRIM(REGEXP_REPLACE(client_name, '^Ms\.\s*', '', 'i'))
    
    -- Keep others as is
    ELSE client_name
END;

Renumber index column

In [None]:
ALTER TABLE valuations_2021_raw DROP COLUMN id;

ALTER TABLE valuations_2021_raw
ADD COLUMN id SERIAL PRIMARY KEY;

Insert new data

In [None]:
INSERT INTO valuations_2025_clean_v2 (
    property_id,
    report_reference,
    land_reg_number,
    client_name,
    valuer_name,
    inspection_date,
    valuation_date,
    location_county,
    location_description,
    location_coordinates,
    plot_area_hectares,
    plot_area_acres,
    land_use,
    plot_shape,
    soil_type,
    gradient,
    tenure_type,
    registered_proprietor,
    encumbrance_category,
    market_value_amount,
    metadata
)
VALUES (
    'doc42282020251030133255',
    'SOO/DOO/5386/1/25',
    'MATUNGULU/KAMBUSU/870',
    'KCB Bank Kenya Limited',
    'Simon Oruka Orwa',
    '2025/10/13',
    '2025/10/13',
    'Machakos',
    'approximately 5 kilometres off Kangundo Road, 7 kilometres due Northeast of Tala shopping Center, 2 kilometres to the Northeast of Matungulu Boys’ High School and 360 meters to Southeast of Kambusu Secondary School within Kambusu area',
    '1°15''07.9"S 37°21''54.9"E',
    0.18,
    0.44,
    'Agricultural',
    'Irregular',
    'Red Soil',
    'Gentle Slope',
    'Freehold',
    'John Katunga Kavatha',
    'Has Charges',
    700000,
    '{
        "source_file": "doc42282020251030133255.pdf",
        "file_size_kb": 7472.84,
        "processing_time_seconds": 35.58,
        "ocr_used": true,
        "pages_processed": 14,
        "model_name": "models/gemini-2.5-flash",
        "timestamp": "2025-12-04T09:38:43.414744"
    }'::jsonb
);


Turnaround days categories

In [None]:
ALTER TABLE valuations_2025_clean_v2
ADD COLUMN turnaround_category TEXT;

UPDATE valuations_2025_clean_v2
SET turnaround_category = CASE
    WHEN (valuation_date - inspection_date) BETWEEN 0 AND 10 THEN 'Fast Track'
    WHEN (valuation_date - inspection_date) BETWEEN 11 AND 30 THEN 'Standard'
    WHEN (valuation_date - inspection_date) > 30 THEN 'Delayed'
    ELSE 'Invalid'
END;

In [None]:
UPDATE valuations_2025_clean_v2
SET turnaround_category = CASE
    WHEN (valuation_date - inspection_date) BETWEEN 0 AND 10 THEN 'Fast Track'
    WHEN (valuation_date - inspection_date) BETWEEN 11 AND 30 THEN 'Standard'
    WHEN (valuation_date - inspection_date) > 30 THEN 'Delayed'
    ELSE 'Invalid'
END
WHERE id IN (118, 119);

Client Classification

In [None]:
WITH client_classification AS (
    SELECT 
        id,
        CASE 
            WHEN COUNT(*) OVER (PARTITION BY client_name) > 1 THEN 'Repeat Client'
            ELSE 'One-time Client'
        END AS computed_client_type
    FROM valuations_2025_clean_v2
)
UPDATE valuations_2025_clean_v2 v
SET client_type = c.computed_client_type
FROM client_classification c
WHERE v.id = c.id
  AND v.id IN (118, 119);