# Torrevieja Data Integration

In [None]:
import os
import json
import pandas as pd

## Weather Station

### Input format

JSON file has the following structure:

```json
{
    "series": {
        "temperatura": {
            "sensor-id": [
                [ "unix-timestamp1", "value1" ],
                [ "unix-timestamp2", "value2" ]
            ],
        },
        "humedad": {
            "sensor-name": [
                [ "unix-timestamp", "value" ]
            ],
        },
        "presion": {
            "sensor-name": [
                [ "unix-timestamp", "value" ]
            ],
        }
    }
}
```

### Output format

The output will be a CSV file with the following columns:

- `timestamp`
- `temperature`
- `humidity`
- `pressure`

In [None]:
folder_path = "json_data/weather-station"
sensor_id = "5rTM-4sB-iVpGSRJcSJV_La Mata - Parque Natural_38.02519_-0.65845_0"
file_name = sensor_id.replace(" ", "_") + "_2024"
file_path = os.path.join(folder_path, file_name + ".json")

with open(file_path) as f:
    data = json.load(f)

In [None]:
df_temperature = pd.DataFrame(data['series']['temperatura'][sensor_id], columns=['timestamp', 'temperature'])
df_humidity = pd.DataFrame(data['series']['humedad'][sensor_id], columns=['timestamp', 'humidity'])
df_pressure = pd.DataFrame(data['series']['presion'][sensor_id], columns=['timestamp', 'pressure'])

df_temperature.drop_duplicates(subset='timestamp', keep='first', inplace=True)
df_humidity.drop_duplicates(subset='timestamp', keep='first', inplace=True)
df_pressure.drop_duplicates(subset='timestamp', keep='first', inplace=True)

# df_temperature['timestamp'] = pd.to_datetime(df_temperature['timestamp'], unit='ms')
# df_humidity['timestamp'] = pd.to_datetime(df_humidity['timestamp'], unit='ms')
# df_pressure['timestamp'] = pd.to_datetime(df_pressure['timestamp'], unit='ms')

df = pd.merge(df_temperature, df_humidity, how='inner', on='timestamp')
df = pd.merge(df, df_pressure, how='inner', on='timestamp')

df.set_index('timestamp', inplace=True)

In [None]:
df.to_csv(file_name + ".csv")

## Sonometer

### Input format

JSON file has the following structure:

```json
{
    "series": {
        "noise": {
            "sensor-id": [
                [ "unix-timestamp", "value" ]
            ],
        },
        "humidity": {
            "sensor-name": [
                [ "unix-timestamp", "value" ]
            ],
        },
        "temperature": {
            "sensor-name": [
                [ "unix-timestamp", "value" ]
            ],
        },
    }
}
```

### Output format

The output will be a CSV file with the following columns:

- `timestamp`
- `noise`

`humidity` and `temperature` will be ignored, because are already present in the weather station data.

In [None]:
folder_path = "json_data/sonometer"
file_name = "sonometros_dataset_l1_2024"
file_path = os.path.join(folder_path, file_name + ".json")

with open(file_path) as f:
    data = json.load(f)

In [None]:
sensor_id = list(data['series']['noise'].keys())[0]

df_noise = pd.DataFrame(data['series']['noise'][sensor_id], columns=['timestamp', 'noise'])

df_noise.drop_duplicates(subset='timestamp', keep='first', inplace=True)
# df_noise['timestamp'] = pd.to_datetime(df_noise['timestamp'], unit='ms')
df_noise.set_index('timestamp', inplace=True)

In [None]:
df_noise.to_csv(file_name + ".csv")

## Buoy

### Input format

JSON file has the following structure:

```json
{
    "series": {
        "temperature_a": {
            "sensor-1": [
                [ "unix-timestamp", "value" ]
            ],
            "sensor-2": [
                [ "unix-timestamp", "value" ]
            ],
            "..."
        },
        "temperature_b": {
            "..."
        }
    }
}
```

Here, data about water temperature is extracted from `temperature_a`, with 2 different sensors, based on the depths of 0.2 and 0.7 meters respectively.

### Output format

The output will be a CSV file with the following columns:

- `timestamp`
- `temperature_02_meters`
- `temperature_07_meters`

In [None]:
folder_path = "json_data"
file_name = "boya_dataset"
file_path = os.path.join(folder_path, file_name + ".json")

id_sensor_02_meters = "UVztVI0BaNT-uedBhHl1_Profundidad: -0.2m_38.03635_-0.68998_-2.2"
id_sensor_07_meters = "UVztVI0BaNT-uedBhHl1_Profundidad: -0.7m_38.03635_-0.68998_-2.7"

with open(file_path) as f:
    data = json.load(f)

In [None]:
df_sensor_02 = pd.DataFrame(data['series']['temperature_a'][id_sensor_02_meters], columns=['timestamp', 'temperature_02'])
df_sensor_07 = pd.DataFrame(data['series']['temperature_a'][id_sensor_07_meters], columns=['timestamp', 'temperature_07'])

df_sensor_02.drop_duplicates(subset='timestamp', keep='first', inplace=True)
df_sensor_07.drop_duplicates(subset='timestamp', keep='first', inplace=True)

df_water_temperature = pd.merge(df_sensor_02, df_sensor_07, how='inner', on='timestamp')
df_water_temperature.set_index('timestamp', inplace=True)

### Data cleaning

Some temperature measures are invalid (< -10 degrees), so they will be removed.

In [None]:
df_water_temperature = df_water_temperature[(df_water_temperature['temperature_02'] >= -10.0) & (df_water_temperature['temperature_07'] >= -10.0)]

In [None]:
df_water_temperature.to_csv(file_name + ".csv")