# Simple data analysis demo using tidal measurements

## Import libraries

In [17]:
import datetime as dt
import requests
import xml.etree.ElementTree as ET
import pandas as pd

## Function to acquire data from kartverket:

In [15]:
def getTideObservations(log_start: str, log_end: str, location={'lat': '62.469414', 'lon': '6.151946'}, interval: int=60)->(pd.DataFrame):
    """
    Function to retrieve historical sea level data from kartverket for 
    from a  measuring station betw

    Parameters:
    log_start - string containing start date and time in ISO format (yyyy-mm-ddThh:mm:ss+hh:mm)
    log_start - string containing start date and time in ISO format (yyyy-mm-ddThh:mm:ss+hh:mm)   
    location - dictionary with lat an long coordinates. Defaults to Ålesund
    interval - number of minutes between measurements. Defaults to 60

    Returns:
    df = tabular data with sea level measurements
    """
    
    # Generate API query parameters
    tidelevel_api = 'http://api.sehavniva.no/tideapi.php'
    params = {'lat': location['lat'],
          'lon': location['lon'],
          'fromtime': log_start,
          'totime': log_end,
          'datatype': 'obs',
          'refcode': 'msl',
          'lang':'en',
          'interval':str(interval),
          'dst':'0',
          'tide_request': 'locationdata'}

    # Get tide data from API
    response_API = requests.get(tidelevel_api, params=params)
    data_API=response_API.content    
    
    # Parse Data
    xmldata_root = ET.fromstring(data_API)
    waterlevel_measurements = []
    for x in xmldata_root.find('locationdata').iter('waterlevel'):
        waterlevel_measurements.append(x.attrib)
    df = pd.DataFrame(waterlevel_measurements)
    #df.style.set_caption(xmldata_root.find('locationdata').find('location').attrib['descr'])
    
    return df


## Acquire data for 2023

In [49]:
import datetime as dt

from_time = dt.datetime(year=2024, month = 1, day=1, tzinfo=dt.timezone(dt.timedelta(hours=2))).isoformat()
to_time = dt.datetime(year=2024, month = 12, day=31, hour=23, tzinfo=dt.timezone(dt.timedelta(hours=2))).isoformat()

tide_df = getTideObservations(from_time, to_time)

## Display 

In [50]:
tide_df

Unnamed: 0,value,time,flag
0,16.4,2024-01-01T00:00:00+01:00,obs
1,45.3,2024-01-01T01:00:00+01:00,obs
2,53.8,2024-01-01T02:00:00+01:00,obs
3,50.3,2024-01-01T03:00:00+01:00,obs
4,34.5,2024-01-01T04:00:00+01:00,obs
...,...,...,...
6037,37.6,2024-09-11T06:00:00+01:00,obs
6038,16.8,2024-09-11T07:00:00+01:00,obs
6039,3.1,2024-09-11T08:00:00+01:00,obs
6040,-5.0,2024-09-11T09:00:00+01:00,obs


## Make time information more readable

In [51]:
tide_df["year"] = [dt.datetime.fromisoformat(row["time"]).year for _, row in tide_df.iterrows()]
tide_df["month"] = [dt.datetime.fromisoformat(row["time"]).month for _, row in tide_df.iterrows()]
tide_df["day"] = [dt.datetime.fromisoformat(row["time"]).day for _, row in tide_df.iterrows()]
tide_df["time"] = [dt.datetime.fromisoformat(row["time"]).strftime("%H:%M") for _, row in tide_df.iterrows()]

### Display changes

In [52]:
tide_df

Unnamed: 0,value,time,flag,year,month,day
0,16.4,00:00,obs,2024,1,1
1,45.3,01:00,obs,2024,1,1
2,53.8,02:00,obs,2024,1,1
3,50.3,03:00,obs,2024,1,1
4,34.5,04:00,obs,2024,1,1
...,...,...,...,...,...,...
6037,37.6,06:00,obs,2024,9,11
6038,16.8,07:00,obs,2024,9,11
6039,3.1,08:00,obs,2024,9,11
6040,-5.0,09:00,obs,2024,9,11


## Ensure value columnt is numeric

In [58]:
tide_df["value"] = pd.to_numeric(tide_df["value"])
tide_df

Unnamed: 0,value,time,flag,year,month,day
0,16.4,00:00,obs,2024,1,1
1,45.3,01:00,obs,2024,1,1
2,53.8,02:00,obs,2024,1,1
3,50.3,03:00,obs,2024,1,1
4,34.5,04:00,obs,2024,1,1
...,...,...,...,...,...,...
6037,37.6,06:00,obs,2024,9,11
6038,16.8,07:00,obs,2024,9,11
6039,3.1,08:00,obs,2024,9,11
6040,-5.0,09:00,obs,2024,9,11


## Get peak:

In [60]:
tide_df[tide_df["value"]==max(tide_df["value"])]

Unnamed: 0,value,time,flag,year,month,day
5576,141.8,01:00,obs,2024,8,23


## Get lowest tide measurement

In [61]:
tide_df[tide_df["value"]==min(tide_df["value"])]

Unnamed: 0,value,time,flag,year,month,day
1698,-133.3,18:00,obs,2024,3,11
