# Network Operations
## Pre-Processing

In [2]:
# nuclio: ignore
import nuclio

Define the MLRun environment

In [3]:
from mlrun import new_function, code_to_function, get_run_db, mount_v3io, NewTask, mlconf, new_model_server, run_local
mlconf.dbpath = 'http://mlrun-api:8080'

Add used packages

In [4]:
%%nuclio cmd -c
pip install pyarrow
pip install pandas

## Function

In [5]:
# nuclio: start-code

In [11]:
import os
import pandas as pd

In [49]:
def aggregate(context,
              df_artifact, 
              keys=None, 
              metrics=None, 
              labels=None, 
              metric_aggs=['mean'], 
              label_aggs=['max'], 
              suffix=None, 
              window=3, 
              center=False, 
              append_to_df=True,
              save_to='aggregate_df.pq'):
    
    context.logger.info(df_artifact)
    input_df = pd.read_parquet(df_artifact)
    
    # Verify there is work to be done
    if not (metrics or labels):
        context.log_artifact('df', input_df)
        return input_df
    
    # Select the correct indexes
    if keys:
        current_index = input_df.index.names
        indexes_to_drop = [col for col in input_df.index.names if col not in keys]
        df = input_df.reset_index(level=indexes_to_drop)
    else:
        df = input_df
    
    # For each metrics
    if metrics:
        metrics_df = df.loc[:, metrics].rolling(window=window,
                                                center=center).aggregate(metric_aggs)
        
        # Flatten all the aggs
        metrics_df.columns = ['_'.join(col).strip() for col in metrics_df.columns.values]
        
        # Add suffix
        if suffix:
            metrics_df.columns = [f'{metric}_{suffix}' for metric in metrics_df.columns]
            
        if append_to_df:
            final_df = pd.merge(input_df, metrics_df, suffixes=('', suffix), left_index=True, right_index=True)
        else:
            final_df = metrics_df

    # For each label
    if labels:
        labels_df = df.loc[:, labels].rolling(window=window,
                                              center=center).aggregate(label_aggs)
        # Flatten all the aggs
        labels_df.columns = ['_'.join(col).strip() for col in labels_df.columns.values]
        
        # Add suffix
        if suffix:
            labels_df.columns = [f'{label}_{suffix}' for label in labels_df.columns]
            
        if metrics:
            final_df = pd.merge(final_df, labels_df, suffixes=('', suffix), left_index=True, right_index=True)   
        else:
            if append_to_df:
                final_df = pd.merge(input_df, labels_df, suffixes=('', suffix), left_index=True, right_index=True)      
            else:
                final_df = labels_df
        
    # Save the result dataframe
    os.makedirs(os.path.dirname(save_to), exist_ok=True)
    final_df.to_parquet(save_to, engine='pyarrow')
    context.log_artifact('aggregate', local_path=save_to)

In [13]:
# nuclio: end-code

## Test
Define client to get metrics sample

In [61]:
# Define V3IO Client
import v3io_frames as v3f
client = v3f.Client('framesd:8081', container='bigdata')

# Define base dirs
project_dir = os.path.join('/', 'User', 'demo-network-operations')

In [73]:
metrics = client.read('tsdb', 'netops_metrics', multi_index=True)
metrics_pq = os.path.join(project_dir, 'data', 'metrics.pq')
metrics.to_parquet(metrics_pq, engine='pyarrow', index=True)
metrics.head(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,cpu_utilization,cpu_utilization_is_error,is_error,latency,latency_is_error,packet_loss,packet_loss_is_error,throughput,throughput_is_error
time,company,data_center,device,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2020-03-10 08:52:37.874000+00:00,Elliott_Inc,Baldwin_Views,2988452888378,87.9888,0.0,0.0,0.0,0.0,2.318395,0.0,225.476913,0.0
2020-03-10 08:52:42.874000+00:00,Elliott_Inc,Baldwin_Views,2988452888378,51.722187,0.0,0.0,7.531659,0.0,0.194195,0.0,247.212094,0.0


### Local Test
Define the aggregate test task

In [74]:
aggregate_task = NewTask(name='aggregate',
                         project='network-operations',
                         params={'df_artifact': os.path.join(project_dir, 'data', 'metrics.pq'),
                                 'metrics': ['cpu_utilization'],
                                 'labels': ['is_error'],
                                 'metric_aggs': ['mean', 'sum'],
                                 'label_aggs': ['max'],
                                 'suffix': 'daily',
                                 'append_to_df': True,
                                 'window': 5,
                                 'center': True,
                                 'save_to': os.path.join(project_dir, 'data', 'aggregate.pq')},
                         handler=aggregate)

In [75]:
run_local(aggregate_task)

[mlrun] 2020-03-10 08:54:32,345 starting run aggregate uid=ae0380d6717849a79ae8f7202f6b500f  -> http://mlrun-api:8080
[mlrun] 2020-03-10 08:54:32,371 /User/demo-network-operations/data/metrics.pq
[mlrun] 2020-03-10 08:54:32,421 log artifact aggregate at /User/demo-network-operations/data/aggregate.pq, size: 18131, db: Y



uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...6b500f,0,Mar 10 08:54:32,completed,aggregate,kind=handlerowner=adminhost=jupyter-78ddb8b99c-mz8dk,,"df_artifact=/User/demo-network-operations/data/metrics.pqmetrics=['cpu_utilization']labels=['is_error']metric_aggs=['mean', 'sum']label_aggs=['max']suffix=dailyappend_to_df=Truewindow=5center=Truesave_to=/User/demo-network-operations/data/aggregate.pq",,aggregate


to track results use .show() or .logs() or in CLI: 
!mlrun get run ae0380d6717849a79ae8f7202f6b500f --project network-operations , !mlrun logs ae0380d6717849a79ae8f7202f6b500f --project network-operations
[mlrun] 2020-03-10 08:54:32,449 run executed, status=completed


<mlrun.model.RunObject at 0x7fb7e947b668>

### Test on cluster

Convert the code to an MLRun function

In [67]:
fn = code_to_function('aggregate', 
                      code_output=os.path.join(project_dir, 'src', 'aggregate.py'),
                      kind='job').apply(mount_v3io())

In [68]:
fn.deploy()

[mlrun] 2020-03-10 08:45:58,390 starting remote build, image: .mlrun/func-default-aggregate-latest
[36mINFO[0m[0000] Resolved base name mlrun/mlrun:0.4.4 to mlrun/mlrun:0.4.4 
[36mINFO[0m[0000] Resolved base name mlrun/mlrun:0.4.4 to mlrun/mlrun:0.4.4 
[36mINFO[0m[0000] Downloading base image mlrun/mlrun:0.4.4     
[36mINFO[0m[0000] Error while retrieving image from cache: getting file info: stat /cache/sha256:6acdce89d632b5e683a6d7fa651a928ba2227f7322060d207491518dd555543c: no such file or directory 
[36mINFO[0m[0000] Downloading base image mlrun/mlrun:0.4.4     
[36mINFO[0m[0000] Built cross stage deps: map[]                
[36mINFO[0m[0000] Downloading base image mlrun/mlrun:0.4.4     
[36mINFO[0m[0000] Error while retrieving image from cache: getting file info: stat /cache/sha256:6acdce89d632b5e683a6d7fa651a928ba2227f7322060d207491518dd555543c: no such file or directory 
[36mINFO[0m[0000] Downloading base image mlrun/mlrun:0.4.4     
[36mINFO[0m[0000] Unpacking

True

In [76]:
fn.run(aggregate_task)

[mlrun] 2020-03-10 08:54:41,476 starting run aggregate uid=6c065c9d39b14b7f839a0ffe8e177894  -> http://mlrun-api:8080
[mlrun] 2020-03-10 08:54:41,530 Job is running in the background, pod: aggregate-jmght
[mlrun] 2020-03-10 08:54:46,243 /User/demo-network-operations/data/metrics.pq
[mlrun] 2020-03-10 08:54:46,339 log artifact aggregate at /User/demo-network-operations/data/aggregate.pq, size: 18131, db: Y

[mlrun] 2020-03-10 08:54:46,349 run executed, status=completed
final state: succeeded


uid,iter,start,state,name,labels,inputs,parameters,results,artifacts
...177894,0,Mar 10 08:54:46,completed,aggregate,host=aggregate-jmghtkind=jobowner=admin,,"append_to_df=Truecenter=Truedf_artifact=/User/demo-network-operations/data/metrics.pqlabel_aggs=['max']labels=['is_error']metric_aggs=['mean', 'sum']metrics=['cpu_utilization']save_to=/User/demo-network-operations/data/aggregate.pqsuffix=dailywindow=5",,aggregate


to track results use .show() or .logs() or in CLI: 
!mlrun get run 6c065c9d39b14b7f839a0ffe8e177894 --project network-operations , !mlrun logs 6c065c9d39b14b7f839a0ffe8e177894 --project network-operations
[mlrun] 2020-03-10 08:54:47,638 run executed, status=completed


<mlrun.model.RunObject at 0x7fb7e95d7b38>

### Show results

In [77]:
pd.read_parquet(os.path.join(project_dir, 'data', 'aggregate.pq'))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,cpu_utilization,cpu_utilization_is_error,is_error,latency,latency_is_error,packet_loss,packet_loss_is_error,throughput,throughput_is_error,cpu_utilization_mean_daily,cpu_utilization_sum_daily,is_error_max_daily
time,company,data_center,device,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2020-03-10 08:52:37.874000+00:00,Elliott_Inc,Baldwin_Views,2988452888378,87.988800,0.0,0.0,0.000000,0.0,2.318395,0.0,225.476913,0.0,,,
2020-03-10 08:52:42.874000+00:00,Elliott_Inc,Baldwin_Views,2988452888378,51.722187,0.0,0.0,7.531659,0.0,0.194195,0.0,247.212094,0.0,,,
2020-03-10 08:52:47.874000+00:00,Elliott_Inc,Baldwin_Views,2988452888378,80.521866,0.0,0.0,0.000000,0.0,3.215264,0.0,256.570104,0.0,70.231694,351.158469,0.0
2020-03-10 08:52:52.874000+00:00,Elliott_Inc,Baldwin_Views,2988452888378,68.418494,0.0,0.0,4.965965,0.0,0.000000,0.0,236.683272,0.0,67.491989,337.459946,0.0
2020-03-10 08:52:57.874000+00:00,Elliott_Inc,Baldwin_Views,2988452888378,62.507122,0.0,0.0,0.000000,0.0,0.000000,0.0,236.058218,0.0,71.753012,358.765062,0.0
2020-03-10 08:53:02.874000+00:00,Elliott_Inc,Baldwin_Views,2988452888378,74.290277,0.0,0.0,0.000000,0.0,0.000000,0.0,230.421777,0.0,70.979081,354.895404,0.0
2020-03-10 08:53:07.874000+00:00,Elliott_Inc,Baldwin_Views,2988452888378,73.027303,0.0,0.0,1.352214,0.0,2.406516,0.0,233.408268,0.0,70.138576,350.692880,0.0
2020-03-10 08:53:12.874000+00:00,Elliott_Inc,Baldwin_Views,2988452888378,76.652209,0.0,0.0,4.090376,0.0,0.000000,0.0,265.515381,0.0,72.887151,364.435755,0.0
2020-03-10 08:53:17.874000+00:00,Elliott_Inc,Baldwin_Views,2988452888378,64.215970,0.0,0.0,0.000000,0.0,0.835418,0.0,281.644191,0.0,71.886204,359.431019,0.0
2020-03-10 08:53:22.874000+00:00,Elliott_Inc,Baldwin_Views,2988452888378,76.249996,0.0,0.0,0.623090,0.0,0.000000,0.0,240.905564,0.0,71.246887,356.234435,0.0
