### Weekly Pre-Processing

#### modules

In [None]:
%load_ext autoreload
%autoreload 2
import warnings
warnings.filterwarnings('ignore')

In [None]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path: sys.path.append(module_path)

#### configuration

In [None]:
runtime = 'local'
if runtime == 'local': from datapipeline_framework.vfu_datapipeline import *

In [None]:
from datetime import datetime
today = datetime.today().strftime('%d.%m.%Y')

#### custom functions

#### items

In [None]:
def item_learning_levels(df, col, mapping=None):
    
    if mapping:
        learning_level_names = mapping
    else:
        learning_level_names = {'1': '1:Core', '2':'2:Intermediate', '3':'3:Advanced',
                    '4':'4:Expert', 'SCM Essentials':'1:Core', '5':'5:Others'}
    df[col] = df[col].str.split(' ').str[0]
    df[col] = np.where(df[col] == '', 'Missing Info', df[col])
    df[col] = df[col].map(learning_level_names)  
    df[col] = df[col].fillna(value='Missing Info')

    return df

In [None]:
items = Workflow(entity_name='assets',
                 multiple_files=True,
                 file_extension='json',
                 runtime='local',)

items.load_raw_data()
items.validate_data()
# custom part
items.data = (items.data
              .pipe(items.clean.format_dates, cols=['Created', 'RevisionDate', 'revisionDateEpoch'], format='%m/%d/%Y')
              .pipe(items.clean.round_numeric_column, col='AverageRating', decimal_places=2)
              .pipe(item_learning_levels, col='LearningLevels')
              )
items.clean_data()
items.generate_profile_report()
items.generate_power_bi_script()
items.save_data()

<datapipeline_framework.vfu_datapipeline.Workflow at 0x1315150d0>

### nps

In [None]:
nps = Workflow(entity_name='nps',
               multiple_files=False,
               file_extension='json',
               run_data_quality_report=False,
               runtime='local')
nps.load_raw_data()
nps.validate_data()
# custom part
nps.data = (nps.data
               .pipe(nps.clean.format_dates, cols=['Modified'], format='%Y-%m-%d')
               .pipe(nps.clean.format_dates, cols=['Created'], format='%m/%d/%Y')
           )
nps.clean_data()
nps.generate_profile_report()
nps.generate_power_bi_script()
nps.save_data()

<datapipeline_framework.vfu_datapipeline.Workflow at 0x1314bb210>

In [None]:
nps.profile_report



### lms-item-events

In [None]:
def lms_items_streamline_assignment_type(df, col, mapping=None):
    
    if mapping:
        assignment_type_names = mapping
    else:
        assignment_type_names = {'OPT': 'Optional', 'REC':'Recommended', 'REQ':'Mandatory'}
    df[col] = df[col].str.split(' ').str[0]
    df[col] = np.where(df[col] == '', 'Optional', df[col])
    df[col] = df[col].map(assignment_type_names)  
    df[col] = df[col].fillna(value='Optional')

    return df

In [None]:
lms_item_events = Workflow(entity_name='lms-asset-events',
                           multiple_files=True,
                           file_extension='csv',
                           run_data_quality_report=False,
                           runtime='local')

lms_item_events.load_raw_data()

lms_item_events.validate_data()

lms_item_events.clean_data()

#custom part
lms_item_events.lms_asset_events = (lms_item_events.data
                                    .pipe(lms_item_events.clean.format_dates, cols=['completion_date'], format='%Y-%m-%d')
                                    .pipe(lms_items_streamline_assignment_type, col='assignment_type')
                                    )
#lms_item_events.generate_profile_report()
lms_item_events.generate_power_bi_script()
lms_item_events.save_data()

<datapipeline_framework.vfu_datapipeline.Workflow at 0x132c192d0>

### learner profiles

* load the old and new sharepoint datasets separately 
* merge them (entries in the weekly file are newer)

In [None]:
wf = Workflow(service_name='vfu', runtime='local')
old_learner_profiles = wf.storage.blob_to_df(container_name='vfu-learner-profiles-raw', blob_name='UserProfileMaster_WeeklyUpdate.json')
new_learner_profiles = wf.storage.blob_to_df(container_name='vfu-learner-profiles-raw', blob_name='vf-learner-profiles-raw.csv')