# Create the Cognite Client

In [1]:
import sys
from pathlib import Path

utils = str(Path("../utils").resolve())
if utils not in sys.path:
    sys.path.append(utils)

from cognite_auth import interactive_client

client = interactive_client()

# 1 Create a data set

In [3]:
from cognite.client.data_classes import DataSet

my_data_set = DataSet(external_id = 'prefix', name = 'prefix')

created_data_set = client.data_sets.create(my_data_set)

data_set_id = created_data_set.id

print(data_set_id)

1029164115014335


# 2 Create Asset Hierarchy

In [4]:
from cognite.client.data_classes import Asset

# Create Root Asset
root_asset = client.assets.create(Asset(name='world', data_set_id=data_set_id))
root_asset

Unnamed: 0,value
name,world
data_set_id,1029164115014335
metadata,{}
id,8329621812797429
created_time,2024-09-17 07:46:13.496000
last_updated_time,2024-09-17 07:46:13.496000
root_id,8329621812797429


In [5]:
from cognite.client.data_classes import AssetUpdate

# Update the name of the root asset
my_update = AssetUpdate(id=root_asset.id).name.set("global")
client.assets.update(my_update)

Unnamed: 0,value
name,global
data_set_id,1029164115014335
metadata,{}
id,8329621812797429
created_time,2024-09-17 07:46:13.496000
last_updated_time,2024-09-17 07:46:17.596000
root_id,8329621812797429


In [6]:
import pandas as pd

# Read country data and see all unique regions
df = pd.read_csv('../data/all_countries.csv')
df['region'].unique()

array(['Asia', 'Europe', 'Africa', 'Oceania', 'Americas', nan],
      dtype=object)

In [7]:
# Create each region as a new asset
store = []
for region in df['region'].unique():
    asset=Asset(name=region, data_set_id=data_set_id,parent_id=root_asset.id)
    store.append(asset)

client.assets.create(store)

ValueError: Out of range float values are not JSON compliant. Make sure your data does not contain NaN(s) or +/- Inf!

In [None]:
# Find the mapping between the region level asset and its id
region_to_id = client.assets.list(data_set_ids=[data_set_id]).to_pandas()[['name','id']].set_index('name')['id'].to_dict()
region_to_id

In [None]:
# Find all countries and which regions they belong to
countries_to_regions = df[['name','region']].set_index('name').to_dict()['region']
countries_to_regions

In [None]:
# Create an asset for each country
store = []
for country in countries_to_regions:
    
    region = countries_to_regions[country]
    asset=Asset(name=country, data_set_id=data_set_id,parent_id=region_to_id[region])
    store.append(asset)
    
client.assets.create(store)

In [None]:
# Find all assets in the Europe region
client.assets.list(data_set_ids=[data_set_id],parent_ids=[region_to_id['Europe']],limit=-1).to_pandas()

# 3 Adding Time Series Data

In [None]:
# Read population data
df= pd.read_csv('../data/populations_postprocessed.csv', index_col=0, parse_dates=True)
df.head()

In [None]:
# Find mapping of country to its id
country_to_id = client.assets.list(data_set_ids=[data_set_id], limit=-1).to_pandas()[['name','id']].set_index('name')['id'].to_dict()

country_to_id

In [None]:
from cognite.client.data_classes import TimeSeries

# Create a time series object for each country to store population data
store = []
for country in df.columns:
    ts = TimeSeries(name=f'{country}_population',data_set_id=data_set_id, asset_id=country_to_id[country])
    store.append(ts)
    
client.time_series.create(store)

In [None]:
# Change the name of the columns in the data frame
df.columns = df.columns+'_population'

# Find the mapping of the time series to its id
ts_to_id = client.time_series.list(data_set_ids=[data_set_id], limit=-1).to_pandas()[['name','id']].set_index('name')['id'].to_dict()

# Rename data frame so that columns correspond to the time seris id
df = df.rename(columns=ts_to_id)

df.head()

In [None]:
# Insert the population data as a dataframe
client.time_series.data.insert_dataframe(df, external_id_headers=False)

In [None]:
# Find the latest value for the population of Benin
client.time_series.data.retrieve_latest(ts_to_id['Benin_population'])

In [None]:
# List all assets in Europe
assets = client.assets.list(data_set_ids=[data_set_id],parent_ids=[region_to_id['Europe']], limit=-1)

# List all time series of those assets
ts = client.time_series.list(data_set_ids=[data_set_id],asset_ids=[asset.id for asset in assets], limit=-1)

# Retrieve the latest data for all of these time series
data = client.time_series.data.retrieve_latest([item.id for item in ts]).to_pandas()

data.head()

In [None]:
# Perform a sum to evaluate whole population
data.T.sum()

## 4. Uploading Files

In [None]:
import os

# List all files and upload them
for item in os.listdir('../data/files'):
    country = item.split('.')[0]
    client.files.upload(f"../data/files/{item}", name=f"{country}_data_sheet",data_set_id=data_set_id, asset_ids=[country_to_id[country]])

In [None]:
# List all files related to Vanuatu
client.files.list(asset_ids=[country_to_id['Vanuatu']])

## 5. Adding events

In [None]:
# Read event data
df = pd.read_csv('../data/events.csv')
df.head()

In [None]:
from cognite.client.data_classes import Event


store = []
# Cycle through each row
for _,row in df.iterrows():
    
    # Unpack the row
    _,_,_,Disaster_Type,Disaster_Subtype,_,Country,_,_,_,Location,_,Start_Year,Start_Month,Start_Day,End_Year,End_Month,End_Day = row
        
    # Create start and end datetimes
    start_date = pd.to_datetime(f'{Start_Year}-{Start_Month}-{int(Start_Day)}')
    end_date = pd.to_datetime(f'{End_Year}-{End_Month}-{int(End_Day)}')
    
    # Create metadata
    metadata = {'Location':Location}
    
    # Create the event and store
    event = Event(start_time=int(start_date.timestamp()*1000), end_time=int(end_date.timestamp()*1000),metadata=metadata,data_set_id=data_set_id, type=Disaster_Type, subtype=Disaster_Subtype, asset_ids=[country_to_id[Country]])
    store.append(event)

client.events.create(store)

In [None]:
client.events.list(data_set_ids=[data_set_id], type='Volcanic activity')

## 6. Add Labels

In [None]:
client.labels.list()

In [None]:
for country in ['Norway','Canada','Latvia']:

    my_update = AssetUpdate(id=country_to_id[country]).labels.set('cold-countries')

    client.assets.update(my_update)

In [None]:
for country in ['Ghana','Qatar','Malaysia']:

    my_update = AssetUpdate(id=country_to_id[country]).labels.set('hot-countries')

    client.assets.update(my_update)

In [None]:
from cognite.client.data_classes import LabelFilter

my_filter = LabelFilter(contains_any=['cold-countries'])

client.assets.list(labels=my_filter,data_set_ids=[data_set_id])

## 7. Final Task:  Clean-Up and Delete the Data in your Data Set


In [None]:
# Assets
all_assets = [asset.id for asset in client.assets.list(limit=-1, data_set_ids=[data_set_id])]
client.assets.delete(all_assets)

# Time series
all_time_series = [ts.id for ts in client.time_series.list(limit=-1, data_set_ids=[data_set_id])]
client.time_series.delete(all_time_series)

# Files
all_files = [file.id for file in client.files.list(limit=-1, data_set_ids=[data_set_id])]
client.files.delete(all_files)

# Events
all_events = [event.id for event in client.events.list(limit=-1, data_set_ids=[data_set_id])]
client.events.delete(all_events)


print('Success!')