## Step 1: Install cognite-sdk: 

```pip install cognite-sdk```

While were at it we might also want to download ```msal``` which we will use later. (Microsoft Authentication Library)

## Step 2: Authentication through OIDC

In [None]:
import atexit
import os

from cognite.client import CogniteClient
from msal import PublicClientApplication, SerializableTokenCache

# Contact Project Administrator to get these
TENANT_ID = "20d3c681-9982-4395-abd6-7973f7e0f26a"
CLIENT_ID = "7ee73a81-b136-404c-bccc-6f73047d88b0"
CDF_CLUSTER = "az-power-no-northeurope"  # api, westeurope-1 etc
COGNITE_PROJECT = "heco-dev"

CACHE_FILENAME = "cache.bin"
SCOPES = [f"https://{CDF_CLUSTER}.cognitedata.com/.default"]

AUTHORITY_HOST_URI = "https://login.microsoftonline.com"
AUTHORITY_URI = AUTHORITY_HOST_URI + "/" + TENANT_ID
PORT = 53000


def create_cache():
    cache = SerializableTokenCache()
    if os.path.exists(CACHE_FILENAME):
        cache.deserialize(open(CACHE_FILENAME, "r").read())
    atexit.register(lambda:
        open(CACHE_FILENAME, "w").write(cache.serialize()) if cache.has_state_changed else None
    )
    return cache


def authenticate_azure(app):
    # Firstly, check the cache to see if this end user has signed in before
    accounts = app.get_accounts()
    if accounts:
        creds = app.acquire_token_silent(SCOPES, account=accounts[0])
    else:
        # interactive login - make sure you have http://localhost:port in Redirect URI in App Registration as type "Mobile and desktop applications"
        creds = app.acquire_token_interactive(scopes=SCOPES, port=PORT,)

    return creds


app = PublicClientApplication(client_id=CLIENT_ID, authority=AUTHORITY_URI, token_cache=create_cache())


def get_token():
    return authenticate_azure(app)["access_token"]


client = CogniteClient(
    token_url=f"{AUTHORITY_URI}/v2.0",
    token=get_token,
    token_client_id=CLIENT_ID,
    project=COGNITE_PROJECT,
    base_url=f"https://{CDF_CLUSTER}.cognitedata.com",
    client_name="cognite-python-dev",
    debug=False,
)

print(client.iam.token.inspect())

In [None]:
client.time_series.list(limit=1)

## Step 3: Authentication with API-key

In [None]:
from cognite.client import CogniteClient
from getpass import getpass

client_google = CogniteClient(
    api_key = getpass("API KEY:"),
    project = "heco-prod",
    base_url = "https://power-no.cognitedata.com/",
    client_name = "edvard",
)

In [None]:
client_google.sequences.list(limit=1)

## Step 4: Listing things

There is one separate function for listing each data type in CDF.

In [None]:
client.time_series.list()
client.assets.list()
client.sequences.list()
client.events.list()

The list functions lets you be very selective about what data you are searching for. (Show full list of parameters in API documentation)

Let us see how some of the parameters work

In [None]:
# "unit" if i for example know that my time_series uses m3/s then I can find those time series by writing
client.time_series.list(unit="m3/s")

# Each CDF project has many datasets within them, used to sort data. Maybe I know that what I am looking for is in a specific dataset
client.time_series.list(unit="m3/s",data_set_external_ids=["uc:001:shop"])

# Or I might know the time range in which the time-series was created
from cognite.client.data_classes.shared import TimestampRange
from datetime import datetime

client.time_series.list(
    unit="m3/s",
    data_set_external_ids=["uc:001:shop"],
    created_time=TimestampRange(
        min = datetime(2022,5,1,0).timestamp()*1000, # CDF uses timestamps in milliseconds since 1970,jan 1, 00:00
        max = datetime(2022,5,3,0).timestamp()*1000
    )
)

# The limit parameter sets how many matches you want to be returned, so if I for example only want the first 5 matches I would write
client.time_series.list(
    unit="m3/s",
    data_set_external_ids=["uc:001:shop"],
    created_time=TimestampRange(
        min = datetime(2022,5,1,0).timestamp()*1000, # CDF uses timestamps in milliseconds since 1970,jan 1, 00:00
        max = datetime(2022,5,3,0).timestamp()*1000
    ),
    limit = 5
)

# The reason why I am mentioning this up front, is that the limit parameter has a default value of 25,
# thus if you want to fetch more than 25 elements you need to specify the limit of set it to None to get all elements
client.time_series.list(
    unit="m3/s",
    data_set_external_ids=["uc:001:shop"],
    created_time=TimestampRange(
        min = datetime(2022,5,1,0).timestamp()*1000, # CDF uses timestamps in milliseconds since 1970,jan 1, 00:00
        max = datetime(2022,5,3,0).timestamp()*1000
    ),
    limit = -1
)

# There may be situations where you already know what you are looking for and just want to fetch the information about that
# specific element, we can then use the retrieve function instead of the list function to get those specific elements
client.time_series.retrieve(external_id="/Begna/Kraftverk/FASL-Plan-MW-bp")
# The retrieve function takes in either an external id or internal id (which is often referred to as just id). I recommend
# that you always use external ids, an will not really go into here why they both exists, I just wanted you to be aware
# that there is another way to refer to specific data objects.

# Final note, if you ever want to find time-series for which you are not completely sure of its properties it might be better to use the Fusion UI.

In [None]:
# I now want to look a bit at the elements which are listed
listed_items = client.time_series.list(
    unit="m3/s",
    data_set_external_ids=["uc:001:shop"],
    created_time=TimestampRange(
        min = datetime(2022,5,1,0).timestamp()*1000, # CDF uses timestamps in milliseconds since 1970,jan 1, 00:00:00 UTC
        max = datetime(2022,5,3,0).timestamp()*1000  # while .timestamp() gives milliseconds.
    ),
    limit = 1
)

listed_items[0]

# We can see that we have a lot of information about the time series, but where is the data itself?

# For "sequences" and "time-series", simply listing or retrieving an object doesn't retrieve the data it contains, instead it only contains 
# information about each of the objects. This reason for this, is that it is very inefficient to fetch millions of data-points just to see
# what time_series exists.

# I will start by showing you how we can retrieve the data of the time-series we just listed
client.datapoints.retrieve(external_id=listed_items[0].external_id,start=datetime(2022,1,1,0),end="now")

# Easier to work with as a pandas dateframe so we could do
client.datapoints.retrieve(external_id=listed_items[0].external_id,start=datetime(2022,1,1,0),end="now").to_pandas()

# Or we could have just worked with the list of datapoints directly
# For example heres how you can access the values and timestamps of the retrieved datapoints separately
datapoints = client.datapoints.retrieve(external_id=listed_items[0].external_id,start=datetime(2022,1,1,0),end="now")
values = datapoints.value
timestamps = datapoints.timestamp

print(values,timestamps)

In [None]:
# In many situations there may be far more datapoints in a period than we need, to reduce the number of datapoints we can therefore use agregates

# Say for example that I only want one datapoint per day then I could write:
client.datapoints.retrieve(external_id=listed_items[0].external_id,start=datetime(2022,1,1,0),end="now",granularity="1d",aggregates=["average"])

# could also have used max or min

In [None]:
# Retrieving a sequence is very similar to how time-series are retrieved
client.sequences.data.retrieve_dataframe(external_id="SHOP_OE_base_mapping",start = 0, end = -1) # start = 0 indicates that I want everything from the first row

# Or I could fetch just the first 5 rows
client.sequences.data.retrieve_dataframe(external_id="SHOP_OE_base_mapping",start = 0, end = 5)

In [None]:
# Now I want to show you one of the very unique capabilities that make CDF very good for working with real world data
# that is: Relationships.

# I said earlier that CDF maintains the relationships between data, and the way it does this is with this special data-type called "relationships"

# Show this for sequences in CDF, but this could for example also be a time-series which is linked to a specific asset, or as I show here
# we can show that this plant has these generators.

# Relationships can also be accessed easily through the API. Let's do it here for an event
client.relationships.list(source_external_ids=["POWEROPS_SHOP_RUN_1651485019415"]).to_pandas()["targetExternalId"][0]

# See the type of relationship from the label
client.relationships.list(target_external_ids=["POWEROPS_SHOP_RUN_1651485019415"]).to_pandas()["labels"][1]

client.relationships.list(target_external_ids=["SHOP_OE_incremental_mapping_multi_scenario_20_scenario_19_1651485012082"])


In [None]:
# Writing data to CDF

from cognite.client.data_classes import Sequence, TimeSeries
import pandas as pd

# Let us see how to create a sequence in CDF

# First need to find some data which we want to upload
column_def = [
    {
        "valueType":"LONG",
        "externalId":"user-number",
        "description":"Unique ID"
    }, 
    {
        "valueType":"String",
        "externalId":"user-name",
        "description":"User"
    }
]

df = pd.DataFrame(
    data = [
        [45345,"Edvard"],
        [23523,"Jørgen"],
        [345,"Hauk"],
        [3626,"Ole"]
    ],
    columns=["user-number","user-name"]
)

# Create the sequence, first we only give create a sequence with the column headers
client.sequences.create(
    Sequence(
        name = "My dummy sequence",
        external_id="my_sequence", 
        columns=column_def,
        data_set_id=client.data_sets.retrieve(external_id="uc:001:shop").id
    )
)
# Show what happens if the external_id already exists, by running code again

# Insert the data into the sequence, like to use dataframe as it is simple to work with.
client.sequences.data.insert_dataframe(dataframe=df, external_id="my_sequence")



In [None]:

# Can add some more rows, by downloading adding new rows and then reuploading
downloaded_df = client.sequences.data.retrieve_dataframe(
    external_id = "my_sequence",
    start=0,
    end=-1
)

downloaded_df.loc[4] = [21341324,"Peder"]
downloaded_df.loc[5] = [2134324,"Stina"]

client.sequences.data.insert_dataframe(dataframe=downloaded_df, external_id="my_sequence")

In [None]:
# Create a time-series
client.time_series.create(
    TimeSeries(
        name="Meeting attendance",
        external_id="meeting_attendance",
        data_set_id=client.data_sets.retrieve(external_id="uc:001:shop").id,
        unit="people"
    )
)


In [None]:
# Insert datapoints into time series

from datetime import datetime

# Tuple of datetime and value, remember time is given in UTC
datapoints = [
    (datetime(2022,6,20,9), 0),
    (datetime(2022,6,20,9,58), 5),
    (datetime(2022,6,20,10), 30),
    (datetime(2022,6,20,10,50), 24),
    (datetime(2022,6,20,11,50), 0),
]   

client.datapoints.insert(datapoints, external_id="meeting_attendance")

In [None]:
# Creating a relationships between the data we have created

from cognite.client.data_classes import Relationship

ts_seq_relationship = Relationship(
    external_id="ts_seq_relationship",
    source_external_id="meeting_attendance",
    source_type="timeseries",
    target_external_id="my_sequence",
    target_type="sequence",
    data_set_id=client.data_sets.retrieve(external_id="uc:001:shop").id,
)

client.relationships.create([ts_seq_relationship])

# Use Fusion UI to see the relationship

# We could also have put a label on this relationship to explain what type of relationship it is

In [None]:
# Deleting the data we created, as we do not want it to clutter the real data in the CDF project

# Be very careful with deleting data as you might end up deleting data which you did not intend to

# Deleting specific rows of a sequence
client.sequences.data.delete(external_id="my_sequence", rows=[1,2,3])

# Deleting specific datapoints of a timeseries
client.datapoints.delete_range(start=datetime(2022,6,20,10,30), end=datetime(2022,6,20,14,50), external_id="meeting_attendance")

# Each datatype has its own delete function
client.relationships.delete(external_id="ts_seq_relationship")

client.time_series.delete(external_id="meeting_attendance")

client.sequences.delete(external_id="my_sequence")

# Remember to delete relationship!

