# Step 0: Imports

In [2]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [19]:
# import this to stop opensearch-py-ml from yelling every time a DataFrame connection made
import warnings
warnings.filterwarnings('ignore')

In [20]:
# imports to demonstrate DataFrame support
import pandas as pd
import numpy as np
import opensearch_py_ml as oml
from opensearchpy import OpenSearch

In [21]:
# imports to demonstrate SageMaker integration
import sagemaker
from opensearchpy import helpers
import boto3
import json

In [22]:
# imports to demonstrate ml-commons integration
from opensearch_py_ml.ml_commons_integration import MLCommonClient

# Step 1: Setup clients 

In [23]:
CLUSTER_URL = 'https://localhost:9200'

In [24]:
def get_os_client(cluster_url = CLUSTER_URL,
                  username='admin',
                  password='admin'):
    '''
    Get OpenSearch client
    :param cluster_url: cluster URL like https://ml-te-netwo-1s12ba42br23v-ff1736fa7db98ff2.elb.us-west-2.amazonaws.com:443
    :return: OpenSearch client
    '''
    client = OpenSearch(
        hosts=[cluster_url],
        http_auth=(username, password),
        verify_certs=False
    )
    return client

In [25]:
client = get_os_client()
ml_client = MLCommonClient(client)

# Step 2: Showcase DataFrame support

opensearch-py-ml Dataframes function similarly to pandas Dataframes, but reside entirely in the OpenSearch cluster and are refreshed with each call.

In [26]:
ecommerce_oml = oml.DataFrame(client, 'opensearch_dashboards_sample_data_ecommerce')

In [27]:
ecommerce_oml.head(10)

Unnamed: 0,category,currency,customer_birth_date,customer_first_name,customer_full_name,customer_gender,customer_id,customer_last_name,customer_phone,day_of_week,...,products.taxful_price,products.taxless_price,products.unit_discount_amount,sku,taxful_total_price,taxless_total_price,total_quantity,total_unique_products,type,user
0,[Men's Clothing],EUR,NaT,Eddie,Eddie Underwood,MALE,38,Underwood,,Monday,...,"[11.99, 24.99]","[11.99, 24.99]","[0, 0]","[ZO0549605496, ZO0299602996]",36.98,36.98,2,2,order,eddie
1,[Women's Clothing],EUR,NaT,Mary,Mary Bailey,FEMALE,20,Bailey,,Sunday,...,"[24.99, 28.99]","[24.99, 28.99]","[0, 0]","[ZO0489604896, ZO0185501855]",53.98,53.98,2,2,order,mary
2,"[Women's Shoes, Women's Clothing]",EUR,NaT,Gwen,Gwen Butler,FEMALE,26,Butler,,Sunday,...,"[99.99, 99.99]","[99.99, 99.99]","[0, 0]","[ZO0374603746, ZO0272202722]",199.98,199.98,2,2,order,gwen
3,"[Women's Shoes, Women's Clothing]",EUR,NaT,Diane,Diane Chandler,FEMALE,22,Chandler,,Sunday,...,"[74.99, 99.99]","[74.99, 99.99]","[0, 0]","[ZO0360303603, ZO0272002720]",174.98,174.98,2,2,order,diane
4,"[Men's Clothing, Men's Accessories]",EUR,NaT,Eddie,Eddie Weber,MALE,38,Weber,,Monday,...,"[59.99, 20.99]","[59.99, 20.99]","[0, 0]","[ZO0542505425, ZO0601306013]",80.98,80.98,2,2,order,eddie
5,"[Women's Shoes, Women's Clothing]",EUR,NaT,Diane,Diane Goodwin,FEMALE,22,Goodwin,,Sunday,...,"[59.99, 11.99]","[59.99, 11.99]","[0, 0]","[ZO0376303763, ZO0212402124]",71.98,71.98,2,2,order,diane
6,[Men's Clothing],EUR,NaT,Oliver,Oliver Rios,MALE,7,Rios,,Monday,...,"[20.99, 24.99]","[20.99, 24.99]","[0, 0]","[ZO0417504175, ZO0535205352]",45.98,45.98,2,2,order,oliver
7,"[Men's Clothing, Men's Accessories, Men's Shoes]",EUR,NaT,Abd,Abd Sutton,MALE,52,Sutton,,Monday,...,"[28.99, 41.99, 59.99, 7.99]","[28.99, 41.99, 59.99, 7.99]","[0, 0, 0, 0]","[ZO0423104231, ZO0314203142, ZO0394403944, ZO0...",138.96,138.96,4,4,order,abd
8,"[Women's Accessories, Women's Clothing]",EUR,NaT,Wilhemina St.,Wilhemina St. Tran,FEMALE,17,Tran,,Sunday,...,"[8.99, 20.99, 16.99, 41.99]","[8.99, 20.99, 16.99, 41.99]","[0, 0, 0, 0]","[ZO0186801868, ZO0074100741, ZO0223202232, ZO0...",88.96,88.96,4,4,order,wilhemina
9,"[Women's Shoes, Women's Clothing]",EUR,NaT,Rabbia Al,Rabbia Al Baker,FEMALE,5,Baker,,Monday,...,"[74.99, 32.99, 13.99, 49.99]","[74.99, 32.99, 13.99, 49.99]","[0, 0, 0, 0]","[ZO0249902499, ZO0068400684, ZO0494704947, ZO0...",171.96,171.96,4,4,order,rabbia


Just like in pandas, the output of a DataFrame method is another DataFrame, allowing for methods to be chained:

In [12]:
ecommerce_oml.filter(regex='produ.', axis=1)[['products.base_price', 'products.category']].head()

Unnamed: 0,products.base_price,products.category
0,"[11.99, 24.99]","[Men's Clothing, Men's Clothing]"
1,"[24.99, 28.99]","[Women's Clothing, Women's Clothing]"
2,"[99.99, 99.99]","[Women's Shoes, Women's Clothing]"
3,"[74.99, 99.99]","[Women's Shoes, Women's Clothing]"
4,"[59.99, 20.99]","[Men's Clothing, Men's Accessories]"


In [13]:
ecommerce_oml.groupby(['day_of_week', 'day_of_week_i', 'type']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,products.base_price,products.base_unit_price,products.discount_amount,products.discount_percentage,products.min_price,products.price,products.product_id,products.quantity,products.tax_amount,products.taxful_price,products.taxless_price,products.unit_discount_amount,taxful_total_price,taxless_total_price,total_quantity,total_unique_products
day_of_week,day_of_week_i,type,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Friday,4,order,34.665464,34.665464,0.074866,0.118835,17.296444,34.590598,16526.074272,1.0,0.0,34.590598,34.590598,0.074866,75.600248,75.600248,2.185714,2.185714
Monday,0,order,35.963039,35.073007,0.037975,0.063291,17.419775,35.925064,16824.3125,1.003165,0.0,35.925064,35.925064,0.037975,78.421901,78.421901,2.189983,2.183074
Saturday,5,order,34.357962,34.357962,0.064331,0.101911,17.137746,34.293631,16746.33949,1.0,0.0,34.293631,34.293631,0.064331,73.148506,73.148506,2.133152,2.133152
Sunday,6,order,35.278721,35.278721,0.144828,0.229885,17.529815,35.133893,16823.595402,1.0,0.0,35.133893,35.133893,0.144828,74.669203,74.669203,2.125407,2.125407
Thursday,3,order,34.680409,34.680409,0.127316,0.203228,17.300117,34.553093,16895.474597,1.0,0.0,34.553093,34.553093,0.127316,74.584878,74.584878,2.15871,2.15871
Tuesday,1,order,34.335716,34.335716,0.151492,0.244836,17.174665,34.184224,16484.140015,1.0,0.0,34.184224,34.184224,0.151492,73.359492,73.359492,2.146141,2.146141
Wednesday,2,order,35.193203,35.193203,0.111284,0.171206,17.576341,35.081919,16470.031907,1.0,0.0,35.081919,35.081919,0.111284,76.143869,76.143869,2.170608,2.170608


unlike pandas DataFrames, opensearch-py-ml does not allow for assignment in DataFrames (since the data itself is being pulled from OpenSearch).

In [14]:
ecommerce_oml['taxless_total_price'] = ecommerce_oml['taxless_total_price'] * 1.15

TypeError: 'DataFrame' object does not support item assignment

In [15]:
ecommerce_pd = ecommerce_oml.head().to_pandas()

In [17]:
ecommerce_pd['taxless_total_price'] = ecommerce_pd['taxless_total_price'] * 1.15

In [18]:
ecommerce_pd['taxless_total_price']

0     42.527
1     62.077
2    229.977
3    201.227
4     93.127
Name: taxless_total_price, dtype: float64

note that because individual fields in OpenSearch are saved as mappings, which have no explicit order, we need to specify a column order when uploading data to our SageMaker endpoint.

# Step 4: Showcase ml-commons Support

all ML-related functionality in `opensearch-py-ml` is stored in a separate client:

In [28]:
ml_client = MLCommonClient(client)

In [30]:
ml_client.put_model("/workplace/dhrubo/upload_content/all-MiniLM-L6-v2.zip", "/workplace/dhrubo/upload_content/model_config.json", verbose=True)

FileNotFoundError: [Errno 2] No such file or directory: '/workplace/dhrubo/upload_content/all-MiniLM-L6-v2.zip'