## 🔹 Step 1: Inspect the NetCDF file

In [1]:
import xarray as xr

# Load dataset
file_path = "indian_ocean_data.nc"
ds = xr.open_dataset(file_path)

print(ds)


<xarray.Dataset>
Dimensions:          (N_POINTS: 55618)
Coordinates:
    LATITUDE         (N_POINTS) float64 ...
    LONGITUDE        (N_POINTS) float64 ...
    TIME             (N_POINTS) datetime64[ns] ...
  * N_POINTS         (N_POINTS) int64 0 1 2 3 4 ... 55614 55615 55616 55617
Data variables: (12/15)
    CYCLE_NUMBER     (N_POINTS) int64 ...
    DATA_MODE        (N_POINTS) object ...
    DIRECTION        (N_POINTS) object ...
    PLATFORM_NUMBER  (N_POINTS) int64 ...
    POSITION_QC      (N_POINTS) int64 ...
    PRES             (N_POINTS) float32 ...
    ...               ...
    PSAL_ERROR       (N_POINTS) float32 ...
    PSAL_QC          (N_POINTS) int64 ...
    TEMP             (N_POINTS) float32 ...
    TEMP_ERROR       (N_POINTS) float32 ...
    TEMP_QC          (N_POINTS) int64 ...
    TIME_QC          (N_POINTS) int64 ...
Attributes:
    DATA_ID:              ARGO
    DOI:                  http://doi.org/10.17882/42182
    Fetched_from:         erddap.ifremer.fr
    Fetch

In [3]:
!pip install netcdf4

Defaulting to user installation because normal site-packages is not writeable


In [7]:
import xarray as xr

file_path = "indian_ocean_data.nc"
ds = xr.open_dataset(file_path, engine="netcdf4")

print("Variables in dataset:", list(ds.variables))
print("Dimensions:", ds.dims)
print("Attributes:", ds.attrs)


Variables in dataset: ['CYCLE_NUMBER', 'DATA_MODE', 'DIRECTION', 'PLATFORM_NUMBER', 'POSITION_QC', 'PRES', 'PRES_ERROR', 'PRES_QC', 'PSAL', 'PSAL_ERROR', 'PSAL_QC', 'TEMP', 'TEMP_ERROR', 'TEMP_QC', 'TIME_QC', 'LATITUDE', 'LONGITUDE', 'TIME', 'N_POINTS']
Dimensions: Frozen({'N_POINTS': 55618})
Attributes: {'DATA_ID': 'ARGO', 'DOI': 'http://doi.org/10.17882/42182', 'Fetched_from': 'erddap.ifremer.fr', 'Fetched_by': 'root', 'Fetched_date': '2025/09/04', 'Fetched_constraints': '[x=50.00/70.00; y=-10.00/10.00; z=0.0/2000.0; t=2020-03-01/2020-03-31]', 'Fetched_uri': 'https://erddap.ifremer.fr/erddap/tabledap/ArgoFloats.nc?config_mission_number,cycle_number,data_mode,direction,latitude,longitude,platform_number,position_qc,pres,pres_adjusted,pres_adjusted_error,pres_adjusted_qc,pres_qc,psal,psal_adjusted,psal_adjusted_error,psal_adjusted_qc,psal_qc,temp,temp_adjusted,temp_adjusted_error,temp_adjusted_qc,temp_qc,time,time_qc,vertical_sampling_scheme&longitude%3E=50&longitude%3C=70&latitude%3E=

In [10]:
import sqlite3
import xarray as xr
import pandas as pd

# Load dataset with netCDF4
file_path = "indian_ocean_data.nc"
ds = xr.open_dataset(file_path, engine="netcdf4")

# Extract variables
cycles = ds["CYCLE_NUMBER"].values
platforms = ds["PLATFORM_NUMBER"].values
lats = ds["LATITUDE"].values
lons = ds["LONGITUDE"].values
times = pd.to_datetime(ds["TIME"].values)  # Already proper datetime
pressures = ds["PRES"].values
temps = ds["TEMP"].values
sals = ds["PSAL"].values

# Connect to DB
conn = sqlite3.connect("argo.db")
cursor = conn.cursor()

# Ingest everything in one pass
for i in range(len(platforms)):
    # Skip if any required values are missing
    if pd.isna(lats[i]) or pd.isna(lons[i]) or pd.isna(times[i]):
        continue

    # Insert or find float
    cursor.execute("SELECT id FROM floats WHERE platform_number=? LIMIT 1", (str(platforms[i]),))
    row = cursor.fetchone()
    if row:
        float_id = row[0]
    else:
        cursor.execute("INSERT INTO floats (platform_number) VALUES (?)", (str(platforms[i]),))
        float_id = cursor.lastrowid

    # Insert profile (always create a new one per cycle)
    cursor.execute("""
        INSERT INTO profiles (float_id, cycle_number, latitude, longitude, time)
        VALUES (?, ?, ?, ?, ?)
    """, (float_id, int(cycles[i]), float(lats[i]), float(lons[i]), str(times[i])))
    profile_id = cursor.lastrowid

    # Insert measurement
    if not (pd.isna(pressures[i]) or pd.isna(temps[i]) or pd.isna(sals[i])):
        cursor.execute("""
            INSERT INTO measurements (profile_id, pressure, temperature, salinity, time)
            VALUES (?, ?, ?, ?, ?)
        """, (profile_id, float(pressures[i]), float(temps[i]), float(sals[i]), str(times[i])))

# Commit and close
conn.commit()
conn.close()

print("✅ Profiles and measurements ingested together successfully!")


✅ Profiles and measurements ingested together successfully!
