# Nuclio - Infer function

## Function

### Imports

In [7]:
import os
import pickle

import v3io_frames as v3f

import pandas as pd
import xgboost as xgb

### Helper functions

In [8]:
def format_df_from_tsdb(context, df):
    df.index.names = ['timestamp', 'company', 'data_center', 'device']
    return df

In [None]:
def save_to_tsdb(context, df: pd.DataFrame):   
    # Fix indexes before saving to TSDB
    df = set_indexes(df)
    # Save to TSDB
    context.v3f.write('tsdb', context.predictions_table, df)

In [9]:
def set_indexes(df):
    df = df.set_index(['timestamp', 'company', 'data_center', 'device'])
    return df

In [11]:
def get_data_parquet(context):
    # Get parquet files
    mpath = [os.path.join(context.features_table, file) for file in os.listdir(context.features_table)]
    
    # Get latest filename
    latest = max(mpath, key=os.path.getmtime)
    print(latest)
    context.logger.debug(f'Reading data from: {latest}')
    
    # Load parquet to dask
    df = pd.read_parquet(latest)
    
    # Keep columns
    keep_columns = [col for col in df.columns if 'is_error' not in col]
    
    # Keep good columns and Sort them
    df = df[sorted(keep_columns)]
    
    return df

In [13]:
def save_to_parquet(context, df: pd.DataFrame):
    print('Saving features to Parquet')
    
    # Need to fix timestamps from ns to ms if we write to parquet
    df = df.reset_index()
    df['timestamp'] = df.loc[:, 'timestamp'].astype('datetime64[ms]')
    
    # Fix indexes
    df= set_indexes(df)
    
    # Save parquet
    first_timestamp = df.index[0][0].strftime('%Y%m%dT%H%M%S')
    last_timestamp = df.index[-1][0].strftime('%Y%m%dT%H%M%S')
    filename = first_timestamp + '-' + last_timestamp + '.parquet'
    filepath = os.path.join(context.predictions_table, filename)
    with open(filepath, 'wb+') as f:
        df.to_parquet(f)

### Init context

In [14]:
def init_context(context):
    
    # Set Iguazio v3io connection
    v3io_client = v3f.Client(address='framesd:8081', container='bigdata')
    setattr(context, 'v3f', v3io_client)
    
    # Save features directory
    features_table = os.path.join(os.getenv('NETAPP_MOUNT_PATH'),os.getenv('FEATURES_TABLE'))
    setattr(context, 'features_table', features_table)
    
    # Save predictions directory
    predictions_table = os.path.join('netops/',os.getenv('PREDICTIONS_TABLE'))
    setattr(context, 'predictions_table', predictions_table)
    
    # Create predictions table if neede
    context.v3f.create('tsdb', context.predictions_table, rate='1/s', if_exists=1)
    
    # Get saving configuration
    is_from_tsdb = (int(os.getenv('FROM_TSDB', 1)) == 1)
    

    # Save to Parquet

    # Create saving directory if needed
    filepath = os.path.join(context.predictions_table)
    if not os.path.exists(filepath):
        os.makedirs(filepath)

    # Set Parquet reading function
    setattr(context, 'read', get_data_parquet)

    # Set Parquet saving function
    setattr(context, 'write', save_to_tsdb)

    # Load the model
    model_path = os.path.join(os.getenv('APP_DIR'),os.getenv('SAVE_TO'),os.getenv('MODEL_FILENAME'))

    with open(model_path, 'rb') as f:
        model = pickle.load(f)
    setattr(context, 'model', model)

### Handler

In [15]:
def handler(context, event):

    # Load last hour data
    df = context.read(context)
    
    # limit for testing
    df = df.head(2)
    
    # Predict
    df['prediction'] = context.model.predict(df.values)
    
    #print(df.head(1))
    
    # Prepare to save predictions
    df = df.reset_index()
    df = df.rename({'level_0': 'time',
                    'level_1': 'company',
                    'level_2': 'data_center',
                    'level_3': 'device'}, axis=1)
    
    # Save
    context.write(context, df)