In [1]:
import os
import pandas as pd
from supabase import create_client
from dotenv import load_dotenv

# Load .env
load_dotenv()

SUPABASE_URL = os.getenv("SUPABASE_URL")
SUPABASE_KEY = os.getenv("SUPABASE_KEY")

# Connect to Supabase
supabase = create_client(SUPABASE_URL, SUPABASE_KEY)

# Fetch table into Python
response = supabase.table("valuations_2025_clean_v2").select("*").execute()



In [2]:
# Extract data
data = response.data
df = pd.DataFrame(data)

# Quick EDA
print(df.head())



                                         property_id   report_reference  \
0  TITLE NO MAVOKO MUNICIPALITY BLOCK 109-42 AHAD...  SOO/DOO/5340/1/25   
1  TITLE NO MAVOKO MUNICIPALITY BLOCK 46-175-192 ...  SOO/DOO/5341/1/25   
2  TITLE NO MOI'S BRIDGE-ZIWA BLOCK 9-18 SUGUTEK ...   SOO/AM/5327/1/25   
3  PORTION NUMBER 1362 MALINDI CASUARINA AREA MAL...      SOO/5324/1/25   
4  TITLE NO NJORO-NGATA BLOCK 2-5138 RVIST AREA O...   SOO/AM/5282/1/25   

                          client_name                             valuer_name  \
0  Co-operative Bank of Kenya Limited  Simon Oruka Orwa, Danish Onyango Orech   
1       Qona DT Sacco Society Limited  Simon Oruka Orwa, Danish Onyango Orech   
2  Co-operative Bank of Kenya Limited                        Simon Oruka Orwa   
3              KCB Bank Kenya Limited                        Simon Oruka Orwa   
4  Co-operative Bank of Kenya Limited                        Simon Oruka Orwa   

  inspection_date valuation_date location_county  \
0      202

In [3]:
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 113 entries, 0 to 112
Data columns (total 23 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   property_id            113 non-null    object 
 1   report_reference       113 non-null    object 
 2   client_name            113 non-null    object 
 3   valuer_name            113 non-null    object 
 4   inspection_date        113 non-null    object 
 5   valuation_date         113 non-null    object 
 6   location_county        113 non-null    object 
 7   location_description   113 non-null    object 
 8   location_coordinates   112 non-null    object 
 9   plot_area_hectares     113 non-null    float64
 10  plot_area_acres        113 non-null    float64
 11  land_use               113 non-null    object 
 12  plot_shape             111 non-null    object 
 13  soil_type              113 non-null    object 
 14  gradient               106 non-null    object 
 15  tenure

In [4]:
print(df.describe())


       plot_area_hectares  plot_area_acres  market_value_amount         id
count          113.000000       113.000000         1.130000e+02  113.00000
mean             2.441746         6.108420         1.085947e+08   57.00000
std              8.511049        21.173485         1.708102e+08   32.76431
min              0.000000         0.000000         0.000000e+00    1.00000
25%              0.039000         0.118600         1.600000e+07   29.00000
50%              0.103100         0.333100         5.000000e+07   57.00000
75%              0.381600         0.943000         1.100000e+08   85.00000
max             50.900000       127.300000         1.175000e+09  113.00000


In [5]:
df.shape

(113, 23)

In [6]:
df.isna().sum()

property_id              0
report_reference         0
client_name              0
valuer_name              0
inspection_date          0
valuation_date           0
location_county          0
location_description     0
location_coordinates     1
plot_area_hectares       0
plot_area_acres          0
land_use                 0
plot_shape               2
soil_type                0
gradient                 7
tenure_type              0
registered_proprietor    0
market_value_amount      0
metadata                 0
created_at               0
land_reg_number          0
encumbrance_category     0
id                       0
dtype: int64

In [7]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="whitegrid")
plt.rcParams["figure.figsize"] = (12, 6)