In [None]:
# Import necessary packages
import os
from supabase import create_client, Client
import requests
from dotenv import load_dotenv
from koboextractor import KoboExtractor

In [15]:
# Loading environment variables
load_dotenv()
KOBO_API_KEY = os.getenv("KOBO_API_KEY")
KOBO_FORM_ID = os.getenv("KOBO_FORM_ID")
KOBO_BASE_URL = 'https://kf.kobotoolbox.org/api/v2'
SUPABASE_URL = os.getenv("SUPABASE_URL")
SUPABASE_API_KEY = os.getenv("SUPABASE_API_KEY")

In [3]:
# Initialize supabase client
supabase: Client = create_client(SUPABASE_URL, SUPABASE_API_KEY)

In [4]:
# Connect to Kobo Account using my token 
kobo = KoboExtractor(KOBO_API_KEY, KOBO_BASE_URL)

In [None]:
"""
Let's fetch data from the form. 
Output format: One large JSON file containing all of the responses in a nested list called `results`. 
               `results` is a nested list with each individual result being a dictionary within the list. 
"""

data = kobo.get_data(KOBO_FORM_ID)
data

{'count': 12,
 'next': None,
 'previous': None,
 'results': [{'_id': 539546599,
   'formhub/uuid': 'b83aa11749194b35b50090f6b5fb1f41',
   'start': '2025-08-15T09:48:34.089+07:00',
   'end': '2025-08-15T09:49:46.890+07:00',
   'Demografi/Usia_tahun': '44',
   'Demografi/Pendidikan_terakhir': 'sma',
   'Demografi/Jenis_kelamin': 'laki_laki',
   'Demografi/Kerja_utama': 'umkm',
   'Demografi/Kelompok_mana_diikuti': 'umkm',
   'UMKM/Jenis_produk_utama': 'terasi',
   'UMKM/Kuantitas_terjual_1bulan': '9000',
   'UMKM/Pendapatan_bulan_dalam_Rupiah_001': '18000000',
   '__version__': 'v5PEgRCv5H6eMgqqcPW2aT',
   'meta/instanceID': 'uuid:dcd966a8-8523-46b6-a526-0e2dc773daec',
   '_xform_id_string': 'aRQ28soDv3f4PxbHBNgRzn',
   '_uuid': 'dcd966a8-8523-46b6-a526-0e2dc773daec',
   'meta/rootUuid': 'uuid:dcd966a8-8523-46b6-a526-0e2dc773daec',
   '_attachments': [],
   '_status': 'submitted_via_web',
   '_geolocation': [None, None],
   '_submission_time': '2025-08-15T02:49:47',
   '_tags': [],
   '_

The following cell contains mappings for all of the fields in the survey as of September 2, 2025. In future revisions of the survey, the following cell might need to be modified so as to ensure correct imports.

`field_mappings` are the fields that are mandatory in the survey. These corespond to the mandatory questions in the `Demografi` section of the survey. Each survey response should have these filled in.

The rest of the fields are not mandatory, therefore they don't appear in all responses. They will later be stored in a JSONB format when we upload the data to the database. These fields are divided into two categories: `text_fields` are the fields with string values, and `numeric_fields` are those with numeric values. Normally Supabase can automatically transform data into the appropriate type, but since we are storing them all in a JSONB file, we'll have to separate between text and numeric fields, so we can convert the numeric fields into floats later.

In [None]:
# Define field mappings 
field_mappings = {
    'kobo_id': '_uuid',
    'time_of_response': '_submission_time',
    '__version_id': '__version__',
    'pendidikan_terakhir': 'Demografi/Pendidikan_terakhir',
    'jenis_kelamin': 'Demografi/Jenis_kelamin',
    'kerja_utama': 'Demografi/Kerja_utama',
    'kelompok_mana': 'Demografi/Kelompok_mana_diikuti',
    'nama': 'Demografi/Nama_lengkap',
    'usia': 'Demografi/Usia_tahun',
    'lama_tinggal': 'Demografi/Lama_tinggal',
}

# List out fields with string values
text_fields = [

    'Nelayan/hasil_tangkapan_utama',
    'UMKM/Jenis_modal',
    'UMKM/Tempat_menjual',    
    'UMKM/Jenis_produk_utama',
    'UMKM/Apa_produk_Anda_halal',
    'Petambak/Jenis_budidaya_utama',
    'Petambak/Jenis_lain_panen_di_tambak',
    'Petani_Sawah/Sawah_dekat_mangrove',
    'Petani_Sawah/Pernah_intrusi',
    'Petani_Sawah/Kondisi_membaik_dalam_2_tahun',
    'Petani_Sawah/Menurut_Anda_apakah_tau_intrusi_air_laut',
    'Petani_Sawah/Sejak_adanya_rehabil_sil_panen_sawah_Anda',
    'KTH/Jenis_kegiatan'
    'Demografi/Apakah_Anda_memiliki_perbaikan_survei_ini',

]

# List out fields with integer values
numeric_fields = [

    'Nelayan/Jumlah_hasil_ikan',
    'Nelayan/Harga_jual_ikan',
    'Nelayan/Jumlah_hasil_udang',
    'Nelayan/Harga_jual_udang',
    'Nelayan/Jumlah_hasil_kepiting',
    'Nelayan/Harga_jual_kepiting',
    'Nelayan/Jumlah_hasil_kerang',
    'Nelayan/Harga_jual_kerang',
    'Nelayan/Pendapatan_bulan_dalam_Rupiah',

    'UMKM/Harga_jualan_produk',
    'UMKM/Pendapatan_bulan_dalam_Rupiah_001',

    'Petambak/Jumlah_udang_vaname',
    'Petambak/Harga_udang_vaname',
    'Petambak/Jumlah_udang_windu',
    'Petambak/Harga_udang_windu',
    'Petambak/Berapa_minggu_satu_musin',

    'KTH/Berapa_kali_KTH_kegiatan',

    'Sadar_Wisata/Wisata_per_musin'

]

fields_with_zero_value = [
    'UMKM/Kuantitas_modal',
    'UMKM/Kuantitas_terjual_1bulan',
    'UMKM/Berapa_total_biaya_p_silkan_dalam_rupiah',
]

In [None]:
def transform_kobo_record(record):
    """
    This function transforms the kobo record into a format we can insert into the database. 

    INPUT: A single record / a single survey entry from Kobo. 
    OUTPUT: A transformed JSON file, with kobo_data storing all of the flexible fields in a single JSONB entry
    """
    
    # Extract mandatory fields with defaults
    result = {}
    for db_field, kobo_field in field_mappings.items():
        result[db_field] = record.get(kobo_field, None)

    # Extract flexible fields
    kobo_data = {}
    all_mapped_fields = set(text_fields + numeric_fields)
    for key, value in record.items():
        if key in fields_with_zero_value:
            kobo_data[key] = float(value)
        elif key in all_mapped_fields and value not in [None, "", "AUTOMATIC", "0"]:
            if key in numeric_fields:
                kobo_data[key] = float(value)
            else:
                kobo_data[key] = value
    
    result['kobo_data'] = kobo_data

    return result

The following cell calls the `transform_kobo_record` function. Recall that `data['results']` contains multiple survey entries. We need to iterate over the `data['results]` list to transform each entry. 

In [None]:
transformed_data = [transform_kobo_record(record) for record in data['results']]
transformed_data

[{'kobo_id': 'dcd966a8-8523-46b6-a526-0e2dc773daec',
  'time_of_response': '2025-08-15T02:49:47',
  '__version_id': 'v5PEgRCv5H6eMgqqcPW2aT',
  'pendidikan_terakhir': 'sma',
  'jenis_kelamin': 'laki_laki',
  'kerja_utama': 'umkm',
  'kelompok_mana': 'umkm',
  'nama': None,
  'usia': '44',
  'lama_tinggal': None,
  'kobo_data': {'UMKM/Jenis_produk_utama': 'terasi',
   'UMKM/Kuantitas_terjual_1bulan': 9000.0,
   'UMKM/Pendapatan_bulan_dalam_Rupiah_001': 18000000.0}},
 {'kobo_id': '7cf99d85-c340-461a-99fc-b5b995fe74e0',
  'time_of_response': '2025-08-15T02:51:29',
  '__version_id': 'v5PEgRCv5H6eMgqqcPW2aT',
  'pendidikan_terakhir': 'sd',
  'jenis_kelamin': 'laki_laki',
  'kerja_utama': 'Nelayan',
  'kelompok_mana': 'nelayan',
  'nama': None,
  'usia': '44',
  'lama_tinggal': '3',
  'kobo_data': {'Nelayan/hasil_tangkapan_utama': 'udang rebon',
   'Nelayan/Jumlah_hasil_udang': 300.0,
   'Nelayan/Pendapatan_bulan_dalam_Rupiah': 4500000.0}},
 {'kobo_id': 'e0a6edbd-52b3-4833-9750-9a5de228fdfe'

Finally, in the next cell we upload the data stored in the transformed_data function. We will upsert, which means we only upload data with unique identifiers (kobo_id), that don't already exist in the database. Kobo_id corresponds to a UUID (universally unique identifier) generated by KoboToolbox every time a new entry is made to the survey

In [None]:
supabase.table("sosec_data").upsert(transformed_data, on_conflict="kobo_id").execute()

APIResponse[~_ReturnT](data=[{'_uuid': 33, 'time_of_response': '2025-08-15T02:49:47+00:00', '__version_id': 'v5PEgRCv5H6eMgqqcPW2aT', 'usia': 44, 'pendidikan_terakhir': 'sma', 'kobo_data': {'UMKM/Jenis_produk_utama': 'terasi', 'UMKM/Kuantitas_terjual_1bulan': 9000.0, 'UMKM/Pendapatan_bulan_dalam_Rupiah_001': 18000000.0}, 'nama': None, 'jenis_kelamin': 'laki_laki', 'lama_tinggal': None, 'kerja_utama': 'umkm', 'kelompok_mana': 'umkm', 'kobo_id': 'dcd966a8-8523-46b6-a526-0e2dc773daec'}, {'_uuid': 34, 'time_of_response': '2025-08-15T02:51:29+00:00', '__version_id': 'v5PEgRCv5H6eMgqqcPW2aT', 'usia': 44, 'pendidikan_terakhir': 'sd', 'kobo_data': {'Nelayan/Jumlah_hasil_udang': 300.0, 'Nelayan/hasil_tangkapan_utama': 'udang rebon', 'Nelayan/Pendapatan_bulan_dalam_Rupiah': 4500000.0}, 'nama': None, 'jenis_kelamin': 'laki_laki', 'lama_tinggal': 3, 'kerja_utama': 'Nelayan', 'kelompok_mana': 'nelayan', 'kobo_id': '7cf99d85-c340-461a-99fc-b5b995fe74e0'}, {'_uuid': 35, 'time_of_response': '2025-08-1