# NPFE Processing

This notebook supports the processing of NPFE descriptions.

Start by setting up the notebook. First, you need to provide your gcube token and load the provided functions.

## Setup

In [None]:
import os

# In the Terminal, export GCUBE_TOKEN=''
gcube_token = os.environ['GCUBE_TOKEN']
# Alternatively, include the token here
# gcube_token = ''

In [136]:
import json
import logging
import requests
import pandas as pd
from io import BytesIO, StringIO
from lxml import etree as et
from owslib.wps import WebProcessingService, ComplexDataInput, monitorExecution

pd.options.display.max_colwidth = 256
logging.basicConfig()
logging.getLogger().setLevel(logging.ERROR)
dataminer_url = 'http://dataminer-prototypes.d4science.org/wps/WebProcessingService'
headers = {'gcube-token': gcube_token}
wps = WebProcessingService(dataminer_url, headers=headers)
    
def read():
    identifier = 'org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mappedclasses.transducerers.PFREADDESCRIPTIONS'
    execution = wps.execute(identifier, [('place', place)], output="non_deterministic_output")
    monitorExecution(execution, sleepSecs=2, download=False)
    output = execution.processOutputs[0]
    doc = et.parse(BytesIO(output.retrieveData(headers=headers)))
    outputUrl = doc.xpath('/ogr:FeatureCollection/gml:featureMember/ogr:Result/d4science:Description[text() = "output"]/../d4science:Data/text()', namespaces={
        'ogr': 'http://ogr.maptools.org/',
        'gml': 'http://www.opengis.net/gml',
        'd4science': 'http://www.d4science.org'})[0]
    df = pd.read_csv(StringIO(requests.get(outputUrl).content.decode('utf-8')), 
                     dtype={'classification': 'str', 'place': 'str', 'latitude': 'float', 'longitude': 'float', 'uri': 'str'})
    df.beginning = pd.to_datetime(df.beginning, utc=True).dt.tz_convert('Europe/Helsinki')
    df.end = pd.to_datetime(df.end, utc=True).dt.tz_convert('Europe/Helsinki')
    df.style.hide_columns(['uri'])
    return df

def record(d):
    identifier = 'org.gcube.dataanalysis.wps.statisticalmanager.synchserver.mappedclasses.transducerers.PFRECORDDURATION'
    execution = wps.execute(identifier, [('value', str(d)),('event_uris', ','.join(df['uri'].tolist()))], output="non_deterministic_output")

## Processing

The catalogued event descriptions can be read into a data frame, which is subsequently used to process event descriptions e.g., to compute average event durations or plot events on maps. Note that the system automatically translates the catalogued data into a data frame, which is easier to manipulate for data analysis.

In [137]:
place = 'Hyytiaelae'

In [138]:
# Note that this takes a couple of seconds; wait before you continue ...
df = read()

In [139]:
df.style.hide_columns(['uri'])

Unnamed: 0,beginning,end,classification,place,latitude,longitude
0,2011-10-01 12:00:00+03:00,2011-10-01 15:00:00+03:00,Class Ia,Hyytiälä,61.8456,24.2908
1,2013-04-04 10:00:00+03:00,2013-04-04 12:00:00+03:00,Class Ia,Hyytiälä,61.8456,24.2908


In [140]:
# Mean event duration in hours [h]
d = (df.end - df.beginning).astype('timedelta64[h]').mean()

In [141]:
d

2.5

In [142]:
record(d)