# Network Operations
## Pre-Processing

In [None]:
# nuclio: ignore
import nuclio

Define the MLRun environment

In [None]:
from mlrun import new_function, code_to_function, get_run_db, mount_v3io, NewTask, mlconf, new_model_server, run_local
mlconf.dbpath = 'http://mlrun-api:8080'

In [None]:
%nuclio config kind = "job"

## Function

In [None]:
# nuclio: start-code

In [None]:
import os
import pandas as pd

In [None]:
def aggregate(context,
              df_artifact, 
              keys=None, 
              metrics=None, 
              labels=None, 
              metric_aggs=['mean'], 
              label_aggs=['max'], 
              suffix=None, 
              window=3, 
              center=False, 
              inplace=False):
    
    context.logger.info(df_artifact)
    input_df = pd.read_parquet(df_artifact)
    
    # Verify there is work to be done
    if not (metrics or labels):
        context.log_artifact('df', input_df)
        return input_df
    
    # Select the correct indexes
    if keys:
        current_index = input_df.index.names
        indexes_to_drop = [col for col in input_df.index.names if col not in keys]
        df = input_df.reset_index(level=indexes_to_drop)
    else:
        df = input_df
    
    # For each metrics
    if metrics:
        metrics_df = df.loc[:, metrics].rolling(window=window,
                                                center=center).aggregate(metric_aggs)
        
        # Flatten all the aggs
        metrics_df.columns = ['_'.join(col).strip() for col in metrics_df.columns.values]
        
        # Add suffix
        if suffix:
            metrics_df.columns = [f'{metric}_{suffix}' for metric in metrics_df.columns]
            
        if not inplace:
            final_df = pd.merge(input_df, metrics_df, suffixes=('', suffix), left_index=True, right_index=True)
        else:
            final_df = metrics_df

    # For each label
    if labels:
        labels_df = df.loc[:, labels].rolling(window=window,
                                              center=center).aggregate(label_aggs)
        # Flatten all the aggs
        labels_df.columns = ['_'.join(col).strip() for col in labels_df.columns.values]
        
        # Add suffix
        if suffix:
            labels_df.columns = [f'{label}_{suffix}' for label in labels_df.columns]
            
        if metrics:
            final_df = pd.merge(final_df, labels_df, suffixes=('', suffix), left_index=True, right_index=True)   
        else:
            if not inplace:
                final_df = pd.merge(input_df, labels_df, suffixes=('', suffix), left_index=True, right_index=True)      
            else:
                final_df = labels_df
        
    # Save the result dataframe
    # TODO: Change to log_datset
    context.log_dataset(key='aggregate', 
                        df=final_df, 
                        format='parquet')

In [None]:
# nuclio: end-code

## Test
Do tests using data from the `Network Operations Demo`

In [None]:
# Define V3IO Client
import v3io_frames as v3f
client = v3f.Client('framesd:8081', container='bigdata')

# Define base dirs
project_dir = os.path.join('/', 'User', 'demo-network-operations')

In [None]:
metrics = client.read('tsdb', 'netops_metrics', multi_index=True)
metrics_pq = os.path.join(project_dir, 'data', 'metrics.pq')
metrics.to_parquet(metrics_pq, engine='pyarrow', index=True)
metrics.head(2)

In [None]:
metrics_path = '/User/v3io/bigdata/netops_metrics_parquet/20200329T133835-20200329T143835.parquet'
metrics = pd.read_parquet('/User/v3io/bigdata/netops_metrics_parquet/20200329T133835-20200329T143835.parquet')

### Local Test
Define the aggregate test task

In [None]:
aggregate_task = NewTask(name='aggregate',
                         project='network-operations',
                         params={'df_artifact': metrics_path,
                                 'metrics': ['cpu_utilization'],
                                 'labels': ['is_error'],
                                 'metric_aggs': ['mean', 'sum'],
                                 'label_aggs': ['max'],
                                 'suffix': 'daily',
                                 'inplace': False,
                                 'window': 5,
                                 'center': True},
                         handler=aggregate)

In [None]:
aggregate_run = run_local(aggregate_task)

### Test on cluster

Convert the code to an MLRun function

In [None]:
fn = code_to_function('aggregate', 
                      kind='job',
                      image='mlrun/mlrun:0.4.6').apply(mount_v3io(remote='bigdata', mount_path='/User/v3io/bigdata'))
fn.export(os.path.join(project_dir, 'yaml', 'function.yaml'))

In [None]:
fn.run(aggregate_task)

### Show results

In [None]:
pd.read_parquet(aggregate_run.outputs['aggregate'])