# Nuclio - Generator function

## Environment

In [1]:
# nuclio: ignore
import nuclio

### Configurations

In [2]:
import os
import datetime

In [3]:
# nuclio: ignore
# Set initial timestamp as 7 days ago
os.environ['INITIAL_TS'] = str(int((datetime.datetime.now()-datetime.timedelta(days=1)).timestamp()))
print(os.getenv('INITIAL_TS', 0))

1577087965


In [4]:
%%nuclio config 

# Trigger
spec.triggers.secs.kind = "cron"
spec.triggers.secs.attributes.interval = "10s"

# Base image
spec.build.baseImage = "python:3.6-jessie"

%nuclio: setting spec.triggers.secs.kind to 'cron'
%nuclio: setting spec.triggers.secs.attributes.interval to '10s'
%nuclio: setting spec.build.baseImage to 'python:3.6-jessie'


In [27]:
%nuclio mount /User ~/

mounting volume path /User as ~/


### Set configuration file

In [31]:
%nuclio env METRICS_CONFIGURATION_FILEPATH=/User/mlrun-demos/demos/netops/configurations/metrics-configuration.yaml

%nuclio: setting 'METRICS_CONFIGURATION_FILEPATH' environment variable


### Setups

In [8]:
%%nuclio cmd -c

# Utils
pip install pyarrow
pip install pyyaml --upgrade
pip install pandas
pip install pytimeparse

# Igz DB
pip install v3io_frames --upgrade

# Function
pip install -i https://test.pypi.org/simple/ v3io-generator
pip install faker

### Variables

In [42]:
# DB Config
%nuclio env %v3io

In [9]:
%%nuclio env

# Deployment
SAVE_DEPLOYMENT=1
DEPLOYMENT_TABLE=netops_devices

# Metrics

# Parquet
SAVE_TO=/v3io/bigdata/netops_metrics_parquet
# SAVE_TO=netops_metrics

SECS_TO_GENERATE=3600

# Save as
SAVE_TO_TSDB=0

%nuclio: setting 'V3IO_FRAMESD' environment variable
%nuclio: setting 'V3IO_USERNAME' environment variable
%nuclio: setting 'V3IO_ACCESS_KEY' environment variable
%nuclio: setting 'SAVE_DEPLOYMENT' environment variable
%nuclio: setting 'DEPLOYMENT_TABLE' environment variable
%nuclio: setting 'SAVE_TO' environment variable
%nuclio: setting '# SAVE_TO' environment variable
%nuclio: setting 'INITIAL_TIMESTAMP' environment variable
%nuclio: setting 'SECS_TO_GENERATE' environment variable
%nuclio: setting 'SAVE_TO_TSDB' environment variable


%nuclio: cannot find "=" in line
%nuclio: cannot find "=" in line
%nuclio: cannot find "=" in line
%nuclio: cannot find "=" in line
%nuclio: cannot find "=" in line


## Function

In [10]:
# import os # Already imported earlier
import time
import yaml
import pandas as pd
import itertools

# DB Connection
import v3io_frames as v3f

# Data generator
from v3io_generator import metrics_generator, deployment_generator

### Helper functions

In [11]:
def _create_deployment():
    print('creating deployment')
    # Create meta-data factory
    dep_gen = deployment_generator.deployment_generator()
    faker=dep_gen.get_faker()

    # Design meta-data
    dep_gen.add_level(name='company',number=2,level_type=faker.company)
    dep_gen.add_level('data_center',number=2,level_type=faker.street_name)
    dep_gen.add_level('device',number=2,level_type=faker.msisdn)

    # Create meta-data
    deployment_df = dep_gen.generate_deployment()
    return deployment_df

In [12]:
def _is_deployment_exist(path):
    # Checking shared path for the devices table
    return os.path.exists(f'/v3io/bigdata/{path}')

In [13]:
def _get_deployment_from_kv(client, path):
    print(f'Retrieving deployment from {path}')
    context.logger.debug(f'Retrieving deployment from {path}')
    # Read the devices table from our KV store
    deployment_df = client.read(backend='kv', table=path)
    
    # Reset index to column
    deployment_df.index.name = 'device'
    deployment_df = deployment_df.reset_index()
    return deployment_df

In [14]:
def _save_deployment_to_kv(path, df, client=v3f.Client('framesd:8081')):
    # Save deployment to our KV store
    client.write(backend='kv', table='netops_devices',dfs=df, index_cols=['device'])

In [15]:
def get_or_create_deployment(path, save_to_cloud=False, client=None):
    if client and _is_deployment_exist(path):
        # Get deployment from KV
        deployment_df = _get_deployment_from_kv(client, path)
    else:
        # Create deployment
        deployment_df = _create_deployment()
        
        if client and save_to_cloud:
            _save_deployment_to_kv(path, deployment_df, client)

    return deployment_df

In [16]:
def set_indexes(df):
    df = df.set_index(['timestamp', 'company', 'data_center', 'device'])
    return df

In [17]:
def save_metrics_to_tsdb(context, metrics: pd.DataFrame):
    print('Saving metrics to TSDB')
    
    context.v3f.write('tsdb', context.metrics_table, metrics)

In [18]:
def save_metrics_to_parquet(context, metrics):
    print('Saving metrics to Parquet')
    df = pd.concat(itertools.chain(metrics))
    
    # Need to fix timestamps from ns to ms if we write to parquet
    df = df.reset_index()
    df['timestamp'] = df.loc[:, 'timestamp'].astype('datetime64[ms]')
    
    # Fix indexes
    df = set_indexes(df)
    
    # Save parquet
    first_timestamp = df.index[0][0].strftime('%Y%m%dT%H%M%S')
    last_timestamp = df.index[-1][0].strftime('%Y%m%dT%H%M%S')
    filename = first_timestamp + '-' + last_timestamp + '.parquet'
    print(filename)
    filepath = os.path.join(context.metrics_table, filename)
    print(filepath)
    with open(filepath, 'wb+') as f:
        df.to_parquet(f)

In [19]:
def is_deployment_initialized(context):
    return hasattr(context, 'metric_generator')

### Init context

In [37]:
def init_context(context):
    
    # Get saving configuration
    save_to_tsdb = (int(os.getenv('SAVE_TO_TSDB', 1)) == 1)
    
    # Set metrics table
    metrics_table = os.getenv('SAVE_TO', 'netops_metrics')
    setattr(context, 'metrics_table', metrics_table) 

    # TSDB Based demo
    if save_to_tsdb:
        context.logger.debug('Saving to TSDB')
        # Create our DB client
        client = v3f.Client(address='framesd:8081', container='bigdata')
        
        # Create TSDB table if needed
        client.create('tsdb', metrics_table, attrs={'rate': '1/s'}, if_exists=1)
        
        # Set saving function
        setattr(context, 'write', save_metrics_to_tsdb)
    
    # Parquet based demo
    else:
        context.logger.debug('Saving to Parquet')
        # Set empty client for verification purposes
        client = None
          
        # Create saving directory
        filepath = os.path.join(metrics_table)
        if not os.path.exists(filepath):
            os.makedirs(filepath)
        
        # Set saving function
        setattr(context, 'write', save_metrics_to_parquet)
    
          
    # Set batch endtime
    secs_to_generate = os.getenv('SECS_TO_GENERATE', 10)
    setattr(context, 'secs_to_generate', secs_to_generate)
    
     
    
    # Generate or create deployment
    deployment_df = get_or_create_deployment(os.environ['DEPLOYMENT_TABLE'], os.environ['SAVE_DEPLOYMENT'], client)
    
    deployment_df['cpu_utilization'] = 70
    deployment_df['latency'] = 0
    deployment_df['packet_loss'] = 0
    deployment_df['throughput'] = 290
    deployment_df.head()
    
    # Get metrics configuration
    with open(os.getenv('METRICS_CONFIGURATION_FILEPATH', '/configurations/metrics-configuration.yaml'), 'r') as f:
        metrics_configuration = yaml.load(f)
        
    # Create metrics generator
    initial_timestamp = int(os.getenv('initial_timestamp', (datetime.datetime.now()-datetime.timedelta(days=1)).timestamp()))
    met_gen = metrics_generator.Generator_df(metrics_configuration, 
                                             user_hierarchy=deployment_df, 
                                             initial_timestamp=initial_timestamp)
    setattr(context, 'metric_generator', met_gen)
    
    # Set client
    setattr(context, 'v3f', client)


### Handler

In [21]:
def handler(context, event):
       
    # Create metrics generator based on YAML configuration and deployment
    metrics = context.metric_generator.generate_range(start_time=datetime.datetime.now(),
                                     end_time=datetime.datetime.now()+datetime.timedelta(seconds=int(context.secs_to_generate)),
                                     as_df=True,
                                     as_iterator=True)
    
    # Save Generated metrics
    context.write(context, metrics)

## Test

In [34]:
# nuclio: ignore
init_context(context)
event = nuclio.Event(body='')
output = handler(context, event)
output

creating deployment




Saving metrics to Parquet
20191224T080931-20191224T090931.parquet
/v3io/bigdata/netops_metrics_parquet/20191224T080931-20191224T090931.parquet


# Deploy

In [43]:
%nuclio deploy -p netops -n generator -c

[nuclio.deploy] 2019-12-24 08:25:10,270 (info) Building processor image
[nuclio.deploy] 2019-12-24 08:25:15,310 (info) Build complete
[nuclio.deploy] 2019-12-24 08:25:21,359 (info) Function deploy complete
[nuclio.deploy] 2019-12-24 08:25:21,367 done updating generator, function address: 3.18.11.15:30541
%nuclio: function deployed
