In [90]:
import requests
import json
import geojson
import ipywidgets as widgets

import pandas as pd
import geopandas as gpd
import xarray as xr

from shapely.geometry import Point
import pydeck as pdk

# Download some data
First we need to get some data. In this example we are interested in the production of salmon and trout in Norway. The Norwegian Fisheries Directorate provides this data in the form of xlsx files. But when we bring that into our environment, it needs some work to clean it up.

In [91]:
url = "https://www.fiskeridir.no/English/Aquaculture/Statistics/Atlantic-salmon-and-rainbow-trout/grow-out-production/sta-laks-mat-11-beh-bevegelse.xlsx"

# Read all sheets into a dictionary of DataFrames
xlsx_data = pd.read_excel(url, sheet_name=None)

# Concatenate all DataFrames into one with a new 'Year' column
combined_dfs = []
for year, df in xlsx_data.items():
    df['Year'] = year  # Add a new column with the year
    combined_dfs.append(df)

# Concatenate all DataFrames into one
combined_df = pd.concat(combined_dfs, ignore_index=True)

# Print or further process the combined DataFrame
print(combined_df)

       Laks, regnbueørret og ørret - matfiskproduksjon  Unnamed: 1  \
0    Atlantic salmon, Rainbow trout and Trout - Gro...         NaN   
1             Offisiell statistikk/Official statistics         NaN   
2                                                  NaN         NaN   
3                             Oppdatert pr. 12.10.2023         NaN   
4                                                  NaN         NaN   
..                                                 ...         ...   
634                                   Sogn og Fjordane   14372.974   
635                                          Hordaland   25279.932   
636                                           Rogaland    9156.298   
637                                      Øvrige fylker    2460.413   
638                                       Totalt/Total  141245.649   

     Unnamed: 2 Unnamed: 3 Unnamed: 4  Unnamed: 5 Unnamed: 6 Unnamed: 7  \
0           NaN        NaN        NaN         NaN        NaN        NaN   
1        

# Transforming up the data
In the next few cells, we are going to be transforming the dataset into something that is easier to work with. By the end of this process, we will have a nice table with the production data by year for both trout and salmon.

In [92]:
# Assuming you have already executed the code to create the 'combined_df' DataFrame
display(combined_df)


Unnamed: 0,"Laks, regnbueørret og ørret - matfiskproduksjon",Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Year
0,"Atlantic salmon, Rainbow trout and Trout - Gro...",,,,,,,,,,,2022
1,Offisiell statistikk/Official statistics,,,,,,,,,,,2022
2,,,,,,,,,,,,2022
3,Oppdatert pr. 12.10.2023,,,,,,,,,,,2022
4,,,,,,,,,,,,2022
...,...,...,...,...,...,...,...,...,...,...,...,...
634,Sogn og Fjordane,14372.974,11863.052,8901.631,2328.085,15003.255,3073.681,2156.025,2409.516,176.102,2644.088,1998
635,Hordaland,25279.932,22722.024,18525.3,3760.313,25748.703,4895.527,4506.002,4050.685,506.095,4844.749,1998
636,Rogaland,9156.298,7170.751,5978.182,1360.449,8988.418,1080.841,1104.43,679.665,190.86,1274.746,1998
637,Øvrige fylker,2460.413,1767.518,1333.115,392.157,2502.659,111.115,0,79.856,31.259,0,1998


In [93]:
# Filtering the data
# Specify the desired terms
desired_terms = [
    "Troms og Finnmark",
    "Nordland",
    "Trøndelag",
    "Møre og Romsdal",
    "Vestland",
    "Rogaland",
    "Øvrige fylker"
]

# Filter the DataFrame based on the first column
filtered_df = combined_df[combined_df.iloc[:, 0].isin(desired_terms)]

# Duplicate the rows
filtered_df = pd.concat([filtered_df] * 2, ignore_index=True)

# Add a new column labeling rows with "Rainbow Trout" or "Atlantic Salmon"
filtered_df['Label'] = ['Rainbow Trout'] * (len(filtered_df) // 2) + ['Atlantic Salmon'] * (len(filtered_df) // 2)

# Display the modified DataFrame
display(filtered_df)

Unnamed: 0,"Laks, regnbueørret og ørret - matfiskproduksjon",Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Year,Label
0,Troms og Finnmark,112869,101118,90020,15061,108906,0,0,0,0,0,2022,Rainbow Trout
1,Nordland,96826,91261,75696,9325,103067,95,0,87,8,0,2022,Rainbow Trout
2,Trøndelag,78295,62345,62638,7993,70009,32,31,28,21,13,2022,Rainbow Trout
3,Møre og Romsdal,23092,54769,20050,5924,51886,2786,3990,3647,689,2441,2022,Rainbow Trout
4,Vestland,77433,76231,68208,15421,70034,13894,20907,14693,2833,17275,2022,Rainbow Trout
...,...,...,...,...,...,...,...,...,...,...,...,...,...
217,Øvrige fylker,2502.659,2093.251,1341.3,673.294,2581.316,0,0,0,0,0,1999,Atlantic Salmon
218,Nordland,26558.51,20284.329,15289.263,4189.405,27232.013,999.795,1208.622,877.227,169.059,1162.131,1998,Atlantic Salmon
219,Møre og Romsdal,18235.991,13839.507,11051.526,3421.081,17672.687,7493.562,4824.52,5396.506,447.219,6474.357,1998,Atlantic Salmon
220,Rogaland,9156.298,7170.751,5978.182,1360.449,8988.418,1080.841,1104.43,679.665,190.86,1274.746,1998,Atlantic Salmon


In [94]:
# Define the mapping for column renaming
column_mapping = {
    'Laks, regnbueørret og ørret - matfiskproduksjon': 'County',
    'Unnamed: 1': 'Live stock pr. 1.1',
    'Unnamed: 2': 'Input',
    'Unnamed: 3': 'Output',
    'Unnamed: 4': 'Losses',
    'Unnamed: 5': 'Live stock pr. 12.31.',
    'Label': 'Species'
}

# Rename columns in the DataFrame
filtered_df.rename(columns=column_mapping, inplace=True)

# Display the DataFrame with the new column names
filtered_df

Unnamed: 0,County,Live stock pr. 1.1,Input,Output,Losses,Live stock pr. 12.31.,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Year,Species
0,Troms og Finnmark,112869,101118,90020,15061,108906,0,0,0,0,0,2022,Rainbow Trout
1,Nordland,96826,91261,75696,9325,103067,95,0,87,8,0,2022,Rainbow Trout
2,Trøndelag,78295,62345,62638,7993,70009,32,31,28,21,13,2022,Rainbow Trout
3,Møre og Romsdal,23092,54769,20050,5924,51886,2786,3990,3647,689,2441,2022,Rainbow Trout
4,Vestland,77433,76231,68208,15421,70034,13894,20907,14693,2833,17275,2022,Rainbow Trout
...,...,...,...,...,...,...,...,...,...,...,...,...,...
217,Øvrige fylker,2502.659,2093.251,1341.3,673.294,2581.316,0,0,0,0,0,1999,Atlantic Salmon
218,Nordland,26558.51,20284.329,15289.263,4189.405,27232.013,999.795,1208.622,877.227,169.059,1162.131,1998,Atlantic Salmon
219,Møre og Romsdal,18235.991,13839.507,11051.526,3421.081,17672.687,7493.562,4824.52,5396.506,447.219,6474.357,1998,Atlantic Salmon
220,Rogaland,9156.298,7170.751,5978.182,1360.449,8988.418,1080.841,1104.43,679.665,190.86,1274.746,1998,Atlantic Salmon


In [95]:
df = pd.DataFrame(filtered_df)

# Set hierarchical index with 'County', 'Year', and 'Species'
df.set_index(['County', 'Year', 'Species'], inplace=True)

# Sort the index for better readability
df.sort_index(axis=0, level=['County', 'Year', 'Species'], inplace=True)

# Reset the index to make 'County', 'Year', and 'Species' regular columns
df.reset_index(inplace=True)

# Display the modified DataFrame
display(df)

Unnamed: 0,County,Year,Species,Live stock pr. 1.1,Input,Output,Losses,Live stock pr. 12.31.,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10
0,Møre og Romsdal,1998,Atlantic Salmon,18235.991,13839.507,11051.526,3421.081,17672.687,7493.562,4824.52,5396.506,447.219,6474.357
1,Møre og Romsdal,1998,Rainbow Trout,18235.991,13839.507,11051.526,3421.081,17672.687,7493.562,4824.52,5396.506,447.219,6474.357
2,Møre og Romsdal,1999,Atlantic Salmon,17521.421,14825.01,11225.926,2984.949,18135.556,6474.357,5326.005,4681.608,396.41,6722.344
3,Møre og Romsdal,1999,Rainbow Trout,17521.421,14825.01,11225.926,2984.949,18135.556,6474.357,5326.005,4681.608,396.41,6722.344
4,Møre og Romsdal,2000,Atlantic Salmon,18716.258,15777.445,12829.409,2217.981,19446.313,6722.344,6814.123,4683.785,465.103,8387.579
...,...,...,...,...,...,...,...,...,...,...,...,...,...
217,Øvrige fylker,2020,Rainbow Trout,7315,921,2320,797,5120,305,126,161,16,254
218,Øvrige fylker,2021,Atlantic Salmon,5137,7320,4338,576,7543,198,141,159,9,171
219,Øvrige fylker,2021,Rainbow Trout,5137,7320,4338,576,7543,198,141,159,9,171
220,Øvrige fylker,2022,Atlantic Salmon,7539,873,3784,1192,3435,261,193,141,23,289


In [96]:
# Duplicate the rows with the first three columns
filtered_df = pd.concat([df.iloc[:, :3]] * 2, ignore_index=True)

# Add the last five columns' values under columns 4 to 8
filtered_df[df.columns[3:8]] = df.iloc[:, 3:].values.reshape(-1, 5)

# Display the modified DataFrame
display(filtered_df)

Unnamed: 0,County,Year,Species,Live stock pr. 1.1,Input,Output,Losses,Live stock pr. 12.31.
0,Møre og Romsdal,1998,Atlantic Salmon,18235.991,13839.507,11051.526,3421.081,17672.687
1,Møre og Romsdal,1998,Rainbow Trout,7493.562,4824.52,5396.506,447.219,6474.357
2,Møre og Romsdal,1999,Atlantic Salmon,18235.991,13839.507,11051.526,3421.081,17672.687
3,Møre og Romsdal,1999,Rainbow Trout,7493.562,4824.52,5396.506,447.219,6474.357
4,Møre og Romsdal,2000,Atlantic Salmon,17521.421,14825.01,11225.926,2984.949,18135.556
...,...,...,...,...,...,...,...,...
439,Øvrige fylker,2020,Rainbow Trout,198,141,159,9,171
440,Øvrige fylker,2021,Atlantic Salmon,7539,873,3784,1192,3435
441,Øvrige fylker,2021,Rainbow Trout,261,193,141,23,289
442,Øvrige fylker,2022,Atlantic Salmon,7539,873,3784,1192,3435


In [97]:
#Add a representative lat long geometry location for each County
county_lat_long = {
    'Troms og Finnmark': (70.594, 21.067),
    'Nordland': (67.419, 12.918),
    'Trøndelag': (64.492, 9.739),
    'Møre og Romsdal': (62.9849, 6.5318),
    'Vestland': (60.9533, 4.3397),
    'Rogaland': (58.7555, 5.2953),
    'Øvrige fylker': (61.406, 8.631)
}

filtered_df['Location'] = filtered_df['County'].apply(lambda x: Point(county_lat_long[x][0], county_lat_long[x][1]) if x in county_lat_long else None)


In [98]:
filtered_df.head()

Unnamed: 0,County,Year,Species,Live stock pr. 1.1,Input,Output,Losses,Live stock pr. 12.31.,Location
0,Møre og Romsdal,1998,Atlantic Salmon,18235.991,13839.507,11051.526,3421.081,17672.687,POINT (62.9849 6.5318)
1,Møre og Romsdal,1998,Rainbow Trout,7493.562,4824.52,5396.506,447.219,6474.357,POINT (62.9849 6.5318)
2,Møre og Romsdal,1999,Atlantic Salmon,18235.991,13839.507,11051.526,3421.081,17672.687,POINT (62.9849 6.5318)
3,Møre og Romsdal,1999,Rainbow Trout,7493.562,4824.52,5396.506,447.219,6474.357,POINT (62.9849 6.5318)
4,Møre og Romsdal,2000,Atlantic Salmon,17521.421,14825.01,11225.926,2984.949,18135.556,POINT (62.9849 6.5318)


In [99]:
# Set Year to be an integer (for the ingest)
filtered_df['Year'] = filtered_df['Year'].astype('int32')

# Exporting as csv
Now we have a nice looking table, we can export it as a csv file or take it one step further and upload it to our own data collection in ODP.

In [100]:
# Save pandas to csv
csv_filename = "atlantic-salmon-rainbow-trout-and-trout-grow-out-production.csv"

# Save the DataFrame to a CSV file
filtered_df.to_csv(csv_filename, index=False)

# Display a message indicating successful save
print(f"DataFrame has been saved to {csv_filename}")

DataFrame has been saved to atlantic-salmon-rainbow-trout-and-trout-grow-out-production.csv


# Uploading Data to ODP
In the next step, we are going to be doing a few things that will allow us to upload our data to ODP.
1. Creating a data collection
2. Creating a dataset inside collection
3. Definin a schema for our data
4. Uploading the data to ODP

We are going to be following the quickstart guide to help us: https://docs.hubocean.earth/guides/quickstart/
    
But first we need our token.

In [101]:
token = requests.post("http://localhost:8000/access_token").json()['token']

In [102]:
# print(token)

In [103]:
headers = {
    "Authorization": f"Bearer {token}",
    "Content-Type": "application/json",
    "X-ODP-CHUNKED-ENCODING": "false"
}
base_url = "https://api.hubocean.earth"



## Creating a Data Collection
First we need to create our data collection. In the first cell we name both the collection and the dataset. These names need to be computer friendly, but we can make nicer ones later on.

<font color='red'>It is necessary to change the collection and dataset name below</font>


In [104]:
dataset_collection_name = "norwegian-directorat-of-fisheries-grow-out-production_tr" # Use the existing name or make-your-own-computer-friendly-name
dataset_name = "overview-of-the-live-stock-1998-2022_tr" # Use the existing name or make-your-own-computer-friendly-name

In [105]:
# Create data collection

endpoint = f"/catalog"
url = base_url + endpoint

body = {
    "kind": "catalog.hubocean.io/dataCollection",
    "version": "v1alpha3",
    "metadata": {
        "name": f"{dataset_collection_name}",
        "display_name": "Norwegian Directorat of Fisheries- Grow Out Production",
        "description": "Overview of the livestock 1998-2022",
        "labels": {
            "hubocean.io/test": 'true'
        }
    },
    "spec": {
        "distribution": {
            "published_by": {
                "contact": "Tom Redd <mail@address.earth>",
                "organisation": "HUB Ocean"
            },
            "published_date": "2019-06-19T06:00:00",
            "website": "https://hubocean.earth",
            "license": {
                "name": "propriatary",
                "full_text": "This is a very strict legal text describing the data license for this data collection. The lawyer would be proud.",
                "href": "www.license.com"
            }
        },
        "tags": ["test", "hubocean"]
    }
}

response = requests.post(url, json=body, headers=headers)

if response.status_code == 200:
    json_response = response.json()
else:
       print(f"Request failed with status code {response.status_code} - {response.text}")

Request failed with status code 409 - {"error": "Resource already exist: qualified_name=catalog.hubocean.io/dataCollection/norwegian-directorat-of-fisheries-grow-out-production_tr", "request_id": "1023b31b-da5b-40d9-971a-3c7c6304a64e"}


In [106]:
## Check to see if the data collection exists

resource_group = "catalog.hubocean.io"
resource_type = "dataCollection"
endpoint = f"/catalog/{resource_group}/{resource_type}/{dataset_collection_name}"
url = base_url + endpoint

response = requests.get(url, headers=headers)

if response.status_code == 200:
    json_response = response.json()
else:
   print(f"Request failed with status code {response.status_code} - {response.text}")

In [107]:
json_response

{'kind': 'catalog.hubocean.io/dataCollection',
 'version': 'v1alpha3',
 'metadata': {'name': 'norwegian-directorat-of-fisheries-grow-out-production_tr',
  'display_name': 'Norwegian Directorat of Fisheries- Grow Out Production',
  'description': 'Overview of the livestock 1998-2022',
  'uuid': 'f3260f3c-0f76-4e95-a25a-b82ed6b73c60',
  'labels': {'hubocean.io/test': 'true'},
  'owner': '02784a85-e8b8-4cf6-9210-97bedfac37e5'},
 'status': {'num_updates': 0,
  'created_time': '2024-01-24T13:25:40.649884',
  'created_by': '02784a85-e8b8-4cf6-9210-97bedfac37e5',
  'updated_time': '2024-01-24T13:25:40.649884',
  'updated_by': '02784a85-e8b8-4cf6-9210-97bedfac37e5',
  'deleted_time': None,
  'deleted_by': None},
 'spec': {'tags': ['test', 'hubocean'],
  'distribution': {'license': {'href': 'www.license.com',
    'name': 'propriatary',
    'full_text': 'This is a very strict legal text describing the data license for this data collection. The lawyer would be proud.'},
   'website': 'https://hub

## Creating a Dataset
Now we have a data collection we can make one or multiple datasets inside it.

In [108]:
#dataset inside collection

endpoint = "/catalog"
url = base_url + endpoint

body = {
    "kind": "catalog.hubocean.io/dataset",
    "version": "v1alpha3",
    "metadata": {
        "name": f"{dataset_name}",
        "display_name": "Atlantic salmon, Rainbow trout and Trout - Grow out production",
        "description": "Official statistics",
        "labels": {
            "hubocean.io/test": "true"
        }
    },
    "spec": {
        "data_collection": f"catalog.hubocean.io/dataCollection/{dataset_collection_name}",
        "storage_class": "registry.hubocean.io/storageClass/tabular",
        "storage_controller": "registry.hubocean.io/storageController/storage-tabular",
        "maintainer": {
            "contact": "Redd, Tom <tom.redd@oceandata.earth>",
            "organisation": "HUB Ocean"
        }
    }
}

response = requests.post(url, json=body, headers=headers)

if response.status_code == 200:
    json_response = response.json()
    print(json_response)

else:
    print(f"Request failed with status code {response.status_code} - {response.text}")

Request failed with status code 409 - {"error": "Resource already exist: qualified_name=catalog.hubocean.io/dataset/overview-of-the-live-stock-1998-2022_tr", "request_id": "3f05bfbc-6890-4906-ad83-3b39ba363326"}


## Creating a Schema
Before we upload the data, we need to make a schema describing the data. If you look above you will see that the schema coresponds to the column headings.

In [109]:
# Create table schema

kind = "catalog.hubocean.io/dataset"

endpoint = f"/data/{kind}/{dataset_name}/schema"
body = {
    "table_schema": {
        "County": {
            "type": "string"
        },
        "Year": {
            "type": "int"
        },
        "Species": {
            "type": "string"
        },
        "Live stock pr. 1.1": {
            "type": "double"
        },
        "Input": {
            "type": "double"
        },
        "Output": {
            "type": "double"
        },
        "Losses": {
            "type": "double"
        },
        "Live stock pr. 12.31.": {
            "type": "double"
        },
        "Location": {
        "type": "geometry"
        },
    },
    "table_description": "Overview over the live stock by county. The number of units in 1000",
    "geospatial_partition_columns": [
        "Location"
    ],
    "geospatial_partition_hash_precision": 5,
    "table_metadata": {
        "geometry": {
            "primary_location": "Location"
        }
    }
}


url = base_url + endpoint
response = requests.post(url, json=body, headers=headers)

if response.status_code == 200:
    json_response = response.json()
    print(json_response)

else:
    print(f"Request failed with status code {response.status_code} - {response.text}")

Request failed with status code 409 - {"error": "Table overview-of-the-live-stock-1998-2022_tr already exists", "request_id": "f2a9406b-751d-461c-be46-30a90a493dbe"}


In [110]:
# Get Schema by dataset name

kind = "catalog.hubocean.io/dataset"

endpoint = f"/data/{kind}/{dataset_name}/schema"

url = base_url + endpoint
response = requests.get(url, headers=headers)

if response.status_code == 200:
    json_response = response.json()
    print(json_response)

else:
    print(f"Request failed with status code {response.status_code} - {response.text}")

{'name': 'overview-of-the-live-stock-1998-2022_tr', 'table_schema': {'Live stock pr. 12.31.': {'type': 'double', 'metadata': {}, 'nullable': True}, 'Output': {'type': 'double', 'metadata': {}, 'nullable': True}, 'Input': {'type': 'double', 'metadata': {}, 'nullable': True}, 'County': {'type': 'string', 'metadata': {}, 'nullable': True}, 'Year': {'type': 'int', 'metadata': {}, 'nullable': True}, 'Losses': {'type': 'double', 'metadata': {}, 'nullable': True}, 'Live stock pr. 1.1': {'type': 'double', 'metadata': {}, 'nullable': True}, 'Species': {'type': 'string', 'metadata': {}, 'nullable': True}, 'Location': {'type': 'geometry', 'metadata': {}, 'nullable': True}}, 'partitioning': None, 'metadata_path': 'cdf_open_table/table_metadata/overview-of-the-live-stock-1998-2022_tr/overview-of-the-live-stock-1998-2022_tr_metadata.json', 'commit_path': 'cdf_open_table/commit/active/overview-of-the-live-stock-1998-2022_tr/overview-of-the-live-stock-1998-2022_tr_commit.json', 'commit_lock_path': 'cd

## Uploading Data to ODP
Now we actually get to upload the data from the table we created. We are using a simple function to automate this process

In [111]:
gdf = gpd.GeoDataFrame(filtered_df, crs="EPSG:4326", geometry="Location")

In [112]:
json_data = json.loads(gdf.to_json())["features"]
json_data[0]

{'id': '0',
 'type': 'Feature',
 'properties': {'County': 'Møre og Romsdal',
  'Input': 13839.507,
  'Live stock pr. 1.1': 18235.991,
  'Live stock pr. 12.31.': 17672.687,
  'Losses': 3421.081,
  'Output': 11051.526,
  'Species': 'Atlantic Salmon',
  'Year': 1998},
 'geometry': {'type': 'Point', 'coordinates': [62.9849, 6.5318]}}

In [117]:
json_data_selected_fields = [
    {
        'County': obj['properties']['County'],
        'Input': obj['properties']['Input'],
        'Live stock pr. 1.1': obj['properties']['Live stock pr. 1.1'],
        'Live stock pr. 12.31.': obj['properties']['Live stock pr. 12.31.'],
        'Losses': obj['properties']['Losses'],
        'Output': obj['properties']['Output'],
        'Species': obj['properties']['Species'],
        'Year': obj['properties']['Year'],
        'Location': obj['geometry']

    }
    for obj in json_data
]
json_data_selected_fields[0]

{'County': 'Møre og Romsdal',
 'Input': 13839.507,
 'Live stock pr. 1.1': 18235.991,
 'Live stock pr. 12.31.': 17672.687,
 'Losses': 3421.081,
 'Output': 11051.526,
 'Species': 'Atlantic Salmon',
 'Year': 1998,
 'Location': {'type': 'Point', 'coordinates': [62.9849, 6.5318]}}

In [118]:
# Create datapoints for all rows in the DataFrame

datapoints = json_data_selected_fields
# Create the body for the request
body = {"data": datapoints}
print(datapoints[1])


{'County': 'Møre og Romsdal', 'Input': 4824.52, 'Live stock pr. 1.1': 7493.562, 'Live stock pr. 12.31.': 6474.357, 'Losses': 447.219, 'Output': 5396.506, 'Species': 'Rainbow Trout', 'Year': 1998, 'Location': {'type': 'Point', 'coordinates': [62.9849, 6.5318]}}


In [119]:
# POST datapoints
kind = "catalog.hubocean.io/dataset"

endpoint = f"/data/{kind}/{dataset_name}"

url = base_url + endpoint

response = requests.post(url, json=body, headers=headers)

if response.status_code == 200:
    json_response = response.json()
    print(json_response)
else:
    print(f"Request failed with status code {response.status_code} - {response.text}")

{'status': 'OK', 'message': 'Data points are added.'}


In [120]:
# Query for our dataset with the OQS syntax.

resource_group = "catalog.hubocean.io"
resource_type = "dataset"

endpoint = f"/data/{resource_group}/{resource_type}/{dataset_name}/list"

body = {
    "#EQUALS": [
        "$Species",
        "Atlantic Salmon"
     ]
 }

#alternative filter
body = {
    "#GREATER_THAN": [
        "$Losses",
        15000
     ]
 }



url = base_url + endpoint
response = requests.post(url, json=body, headers=headers)

if response.status_code == 200:
    json_response = response.json()

else:
    print(f"Request failed with status code {response.status_code} - {response.text}")


In [121]:
pd.json_normalize(json_response['data'], max_level=0)

Unnamed: 0,Live stock pr. 12.31.,Output,Input,Year,Losses,Live stock pr. 1.1,County,Species,Location
0,113164.0,63951.0,86146.0,2012,15512.0,106481.0,Nordland,Atlantic Salmon,"{'type': 'Point', 'coordinates': [67.419, 12.9..."
1,113164.0,63951.0,86146.0,2013,15512.0,106481.0,Nordland,Atlantic Salmon,"{'type': 'Point', 'coordinates': [67.419, 12.9..."
2,108906.0,90020.0,101118.0,2016,15061.0,112869.0,Nordland,Atlantic Salmon,"{'type': 'Point', 'coordinates': [67.419, 12.9..."
3,108906.0,90020.0,101118.0,2017,15061.0,112869.0,Nordland,Atlantic Salmon,"{'type': 'Point', 'coordinates': [67.419, 12.9..."
4,71930.0,64375.0,72157.0,2003,16801.0,80950.0,Rogaland,Atlantic Salmon,"{'type': 'Point', 'coordinates': [58.7555, 5.2..."
5,71930.0,64375.0,72157.0,2004,16801.0,80950.0,Rogaland,Atlantic Salmon,"{'type': 'Point', 'coordinates': [58.7555, 5.2..."
6,77407.0,61214.0,79938.0,2005,15016.0,73700.0,Rogaland,Atlantic Salmon,"{'type': 'Point', 'coordinates': [58.7555, 5.2..."
7,77407.0,61214.0,79938.0,2006,15016.0,73700.0,Rogaland,Atlantic Salmon,"{'type': 'Point', 'coordinates': [58.7555, 5.2..."
8,70034.0,68208.0,76231.0,2007,15421.0,77433.0,Rogaland,Atlantic Salmon,"{'type': 'Point', 'coordinates': [58.7555, 5.2..."
9,70034.0,68208.0,76231.0,2008,15421.0,77433.0,Rogaland,Atlantic Salmon,"{'type': 'Point', 'coordinates': [58.7555, 5.2..."
