# Nuclio - Generator function

## Environment

In [None]:
# nuclio: ignore
import nuclio

### Configurations

In [None]:
import os
import datetime

In [None]:
# nuclio: ignore
# Set initial timestamp as 7 days ago
os.environ['INITIAL_TS'] = str(int((datetime.datetime.now()-datetime.timedelta(days=1)).timestamp()))
print(os.getenv('INITIAL_TS', 0))

In [None]:
%%nuclio config 

# Trigger
spec.triggers.secs.kind = "cron"
spec.triggers.secs.attributes.interval = "10s"

# Base image
spec.build.baseImage = "python:3.6-jessie"

### Set configuration file for function pulling

In [None]:
# nuclio: ignore
os.environ['local_configurations_path'] = os.path.join(os.path.dirname(os.getcwd()), 'configurations', 'metrics_configuration.yaml')
os.environ['shared_configurations_path'] = os.path.join('/', 'v3io', 'bigdata', 'netops_configurations')
os.environ['webapi_configuration_path'] = os.path.join('/', 'bigdata', 'netops_configurations', 'metrics_configuration.yaml')
os.environ['function_configuration_dir'] = os.path.join('/', 'configurations')

!mkdir ${shared_configurations_path}
!cp ${local_configurations_path} -t ${shared_configurations_path}

In [None]:
%nuclio env -c METRICS_CONFIGURATION_FILEPATH=/configurations/metrics_configuration.yaml
%nuclio env -l METRICS_CONFIGURATION_FILEPATH=../configurations/metrics_configuration.yaml
%nuclio env function_configuration_dir=/lbr/configurations

In [None]:
%%nuclio cmd -c

# Pull configuration file to function
apt-get update && apt-get install -y wget
mkdir -p ${function_configuration_dir}
wget -O ${METRICS_CONFIGURATION_FILEPATH} --header "x-v3io-session-key: ${V3IO_ACCESS_KEY}" http://${V3IO_WEBAPI_SERVICE_HOST}:8081${webapi_configuration_path}

### Setups

In [72]:
%%nuclio cmd

# Utils
pip install pyarrow
pip install pyyaml --upgrade
pip install pandas
pip install pytimeparse

# Igz DB
pip install v3io_frames --upgrade
#pip install v3io_frames

# Function
pip install -i https://test.pypi.org/simple/ v3io-generator
pip install faker

Requirement already up-to-date: pyyaml in /conda/lib/python3.6/site-packages (5.1.1)
Requirement already up-to-date: v3io_frames in /User/.pythonlibs/lib/python3.6/site-packages (0.5.9)
Looking in indexes: https://test.pypi.org/simple/


### Variables

In [None]:
%%nuclio env

# DB Config
V3IO_FRAMESD=${V3IO_FRAMESD}
V3IO_USERNAME=${V3IO_USERNAME}
V3IO_ACCESS_KEY=${V3IO_ACCESS_KEY}

# Deployment
SAVE_DEPLOYMENT=1
DEPLOYMENT_TABLE=netops_devices

# Metrics

# Parquet
# SAVE_TO=/v3io/bigdata/netops_metrics_parquet
SAVE_TO=netops_metrics

INITIAL_TIMESTAMP=${INITIAL_TS}
SECS_TO_GENERATE=10

# Save as
SAVE_TO_TSDB=1

## Function

In [None]:
# import os # Already imported earlier
import time
import yaml
import pandas as pd
import itertools

# DB Connection
import v3io_frames as v3f

# Data generator
from v3io_generator import metrics_generator, deployment_generator

### Helper functions

In [None]:
def _create_deployment():
    print('creating deployment')
    # Create meta-data factory
    dep_gen = deployment_generator.deployment_generator()
    faker=dep_gen.get_faker()

    # Design meta-data
    dep_gen.add_level(name='company',number=2,level_type=faker.company)
    dep_gen.add_level('data_center',number=2,level_type=faker.street_name)
    dep_gen.add_level('device',number=2,level_type=faker.msisdn)

    # Create meta-data
    deployment_df = dep_gen.generate_deployment()
    return deployment_df

In [None]:
def _is_deployment_exist(path):
    # Checking shared path for the devices table
    return os.path.exists(f'/v3io/bigdata/{path}')

In [None]:
def _get_deployment_from_kv(client, path):
    print(f'Retrieving deployment from {path}')
    context.logger.debug(f'Retrieving deployment from {path}')
    # Read the devices table from our KV store
    deployment_df = client.read(backend='kv', table=path)
    
    # Reset index to column
    deployment_df.index.name = 'device'
    deployment_df = deployment_df.reset_index()
    return deployment_df

In [None]:
def _save_deployment_to_kv(path, df, client=v3f.Client('framesd:8081')):
    # Save deployment to our KV store
    client.write(backend='kv', table='netops_devices',dfs=df, index_cols=['device'])

In [None]:
def get_or_create_deployment(path, save_to_cloud=False, client=None):
    if client and _is_deployment_exist(path):
        # Get deployment from KV
        deployment_df = _get_deployment_from_kv(client, path)
    else:
        # Create deployment
        deployment_df = _create_deployment()
        
        if client and save_to_cloud:
            _save_deployment_to_kv(path, deployment_df, client)

    return deployment_df

In [None]:
def set_indexes(df):
    df = df.set_index(['timestamp', 'company', 'data_center', 'device'])
    return df

In [None]:
def save_metrics_to_tsdb(context, metrics: pd.DataFrame):
    print('Saving metrics to TSDB')
    
    context.v3f.write('tsdb', context.metrics_table, metrics)

In [None]:
def save_metrics_to_parquet(context, metrics):
    print('Saving metrics to Parquet')
    df = pd.concat(itertools.chain(metrics))
    
    # Need to fix timestamps from ns to ms if we write to parquet
    df = df.reset_index()
    df['timestamp'] = df.loc[:, 'timestamp'].astype('datetime64[ms]')
    
    # Fix indexes
    df = set_indexes(df)
    
    # Save parquet
    first_timestamp = df.index[0][0].strftime('%Y%m%dT%H%M%S')
    last_timestamp = df.index[-1][0].strftime('%Y%m%dT%H%M%S')
    filename = first_timestamp + '-' + last_timestamp + '.parquet'
    print(filename)
    filepath = os.path.join(context.metrics_table, filename)
    print(filepath)
    with open(filepath, 'wb+') as f:
        df.to_parquet(f)

In [None]:
def is_deployment_initialized(context):
    return hasattr(context, 'metric_generator')

### Init context

In [None]:
def init_context(context):
    
    # Get saving configuration
    save_to_tsdb = (int(os.getenv('SAVE_TO_TSDB', 1)) == 1)
    
    # Set metrics table
    metrics_table = os.getenv('SAVE_TO', 'netops_metrics')
    setattr(context, 'metrics_table', metrics_table) 
    # TSDB Based demo
    if save_to_tsdb:
        context.logger.debug('Saving to TSDB')
        # Create our DB client
        client = v3f.Client(address='framesd:8081') 
        #client = v3f.Client(address='framesd:8081', 
        #                container='bigdata', 
        #                password=os.environ['V3IO_ACCESS_KEY'], 
        #                user=os.environ['V3IO_USERNAME'])
        
        # Create TSDB table if needed
        client.create('tsdb', metrics_table, attrs={'rate': '1/s'}, if_exists=1)
        
        # Set saving fucntion
        setattr(context, 'write', save_metrics_to_tsdb)
    
    # Parquet based demo
    else:
        context.logger.debug('Saving to Parquet')
        # Set empty client for verification purposes
        client = None
          
        # Create saving directory
        filepath = os.path.join(metrics_table)
        if not os.path.exists(filepath):
            os.makedirs(filepath)
        
        # Set saving fucntion
        setattr(context, 'write', save_metrics_to_parquet)
    
          
    # Set batch endtime
    secs_to_generate = os.getenv('SECS_TO_GENERATE', 10)
    setattr(context, 'secs_to_generate', secs_to_generate)
    
     
    
    # Generate or create deployment
    deployment_df = get_or_create_deployment(os.environ['DEPLOYMENT_TABLE'], os.environ['SAVE_DEPLOYMENT'], client)
    
    deployment_df['cpu_utilization'] = 70
    deployment_df['latency'] = 0
    deployment_df['packet_loss'] = 0
    deployment_df['throughput'] = 290
    deployment_df.head()
    
    # Get metrics configuration
    with open(os.getenv('METRICS_CONFIGURATION_FILEPATH', '/configurations/metrics_configuration.yaml'), 'r') as f:
        metrics_configuration = yaml.load(f)
        
    # Create metrics generator
    initial_timestamp = int(os.getenv('INITIAL_TIMESTAMP', time.time()))
    met_gen = metrics_generator.Generator_df(metrics_configuration, 
                                             user_hierarchy=deployment_df, 
                                             initial_timestamp=initial_timestamp)
    setattr(context, 'metric_generator', met_gen)
    
    # Set client
    setattr(context, 'v3f', client)


### Handler

In [None]:
def handler(context, event):
       
    # Create metrics generator based on YAML configuration and deployment
    metrics = context.metric_generator.generate_range(start_time=datetime.datetime.now(),
                                     end_time=datetime.datetime.now()+datetime.timedelta(seconds=int(context.secs_to_generate)),
                                     as_df=True,
                                     as_iterator=True)
    
    # Save Generated metrics
    context.write(context, metrics)

## Test

In [None]:
# nuclio: ignore
init_context(context)

In [None]:
# nuclio: ignore
event = nuclio.Event(body='')
output = handler(context, event)
output

# Deploy

In [None]:
%nuclio deploy -p netops -n generator -c