# Get data from Snowflake Marketplace
## Onsite Search — Product demand analysis on retail sites and marketplaces by Similarweb Ltd.

Daily Onsite Search keywords on Amazon https://app.snowflake.com/marketplace/listing/GZT1ZA3NIS/similarweb-ltd-onsite-search-product-demand-analysis-on-retail-sites-and-marketplaces

In [None]:
create or replace table ON_SITE_SEARCH as 
select * from ONSITE_SEARCH__PRODUCT_DEMAND_ANALYSIS_ON_RETAIL_SITES_AND_MARKETPLACES.DATAFEEDS.ON_SITE_SEARCH;

# Exploratory data analysis
Keyword search distribution by day - all keywords

In [None]:
-- all keyword searches by day
select date, count(*) as cnt 
from ON_SITE_SEARCH 
group by all;

In [None]:
# Import python and streamlit packages
import streamlit as st
import pandas as pd
from snowflake.snowpark.context import get_active_session

session = get_active_session()

# Turn cell4 into a pandas data frame
forecast_all_keywords_df = cell4.to_pandas()

# Chart the data
st.subheader("Keyword search by day - all keywords")
st.line_chart(forecast_all_keywords_df, x='DATE', y='CNT')


In [None]:
-- keyword searches for backpack by day
select date, sum(calibrated_visits) as cnt
from ON_SITE_SEARCH
where oss_keyword ilike '%backpack%'
group by all;

In [None]:
# Turn cell6 into a pandas data frame
forecast_all_keywords_df = cell6.to_pandas()

# Chart the data
st.subheader("Keyword search by day - all keywords")
st.line_chart(forecast_all_keywords_df, x='DATE', y='CNT')


In [None]:
-- create a view that represents historical data for backpack keyword search
create or replace view backpack_keyword_data as
select 
  date::TIMESTAMP_NTZ AS search_date,
  sum(calibrated_visits) AS search_cnt
from ON_SITE_SEARCH
where oss_keyword ilike '%backpack%'
group by all;

## Train the Forecast model

In [None]:
-- train the model on historical data
create or replace SNOWFLAKE.ML.FORECAST backpack_model(
  INPUT_DATA => SYSTEM$REFERENCE('VIEW', 'backpack_keyword_data'),
  TIMESTAMP_COLNAME => 'search_date',
  TARGET_COLNAME => 'search_cnt'
);

## Predict demand in the next 90 days
Call the FORECAST method of the trained model and save the results to a table

In [None]:
-- call the model to predict future values
create or replace table backpack_model_forecast as
select * from table(backpack_model!FORECAST(FORECASTING_PERIODS => 90));

In [None]:
-- combine historical and predicted values in a single query
select search_date, search_cnt, null as forecast 
from backpack_keyword_data
union all
select ts as search_date, null as search_cnt, forecast 
from backpack_model_forecast;

In [None]:
# Turn cell13 into a pandas data frame
forecast_all_keywords_df = cell13.to_pandas()

# Chart the data
st.subheader("Keyword search by day - all keywords")
st.line_chart(forecast_all_keywords_df, x='SEARCH_DATE', y=['SEARCH_CNT', 'FORECAST'])


# Improve the model with features
## Features or exogenous variables
- Holidays
- Weather
- Special events
- Marketing events

In [None]:
-- create a table that represents historical data for backpack keyword search
-- adding features to indicate Amazon Prime day and summr holidays
create or replace view backpack_data_with_features as
select 
  date::TIMESTAMP_NTZ AS search_date,
  sum(calibrated_visits) AS search_cnt,
  case 
    when date = '2021-06-21' then 1 
    else 0 
  end as amazon_prime_day_flg,
  case 
    when date >= '2021-07-11' and date <= '2021-09-06' then 1 
    else 0 
  end as summer_holiday_flg
from ON_SITE_SEARCH
where oss_keyword ilike '%backpack%'
group by search_date, amazon_prime_day_flg, summer_holiday_flg;

## Train the FORECAST model with features

In [None]:
-- train the model on historical data
create or replace SNOWFLAKE.ML.FORECAST backpack_model_with_features(
  INPUT_DATA => table(backpack_data_with_features),
  TIMESTAMP_COLNAME => 'search_date',
  TARGET_COLNAME => 'search_cnt'
);

In [None]:
-- create a view with future features for the forecasting period
create or replace view future_features as 
select 
  dateadd(
    'day', 
    row_number() over (partition by null order by null), 
    '2022-06-30'
  ) as search_date,
  case 
    when search_date >= '2022-07-12' and search_date <= '2022-07-13' then 1 
    else 0 
  end as amazon_prime_day_flg,
  case 
    when search_date >= '2022-07-11' and search_date <= '2022-09-06' then 1 
    else 0 
  end as summer_holiday_flg
from table (generator(rowcount => 90));

## Predict demand with features

In [None]:
-- call the model to predict future values
create or replace table backpack_model_with_features as
select * from table(backpack_model_with_features!FORECAST(
    INPUT_DATA => TABLE(future_features),
    TIMESTAMP_COLNAME => 'search_date'));

In [None]:
-- combine historical and predicted values in a single query
select search_date, search_cnt, null as forecast 
from backpack_data_with_features
union all
select ts as search_date, null as search_cnt, forecast 
from backpack_model_with_features;

In [None]:
# Turn cell22 into a pandas data frame
forecast_all_keywords_df = cell22.to_pandas()

# Chart the data
st.subheader("Keyword search by day - all keywords")
st.line_chart(forecast_all_keywords_df, x='SEARCH_DATE', y=['SEARCH_CNT', 'FORECAST'])


In [None]:
select WO.ts, WO.forecast as forecast_without_features, WH.forecast as forecast_with_features
from backpack_model_forecast WO
inner join backpack_model_with_features WH
on WH.ts = WO.TS;

In [None]:
# Turn cell24 into a pandas data frame
forecast_all_keywords_df = cell24.to_pandas()

# Chart the data
st.subheader("Compare forecasts without and with features")
st.line_chart(forecast_all_keywords_df, x='TS', y=['FORECAST_WITHOUT_FEATURES', 'FORECAST_WITH_FEATURES'])
