In [None]:
%var SystemName=Vantage

Password is **TRNG_TDU_TD01** Enter in Password dialog box then hit enter/return key to connect

In [None]:
%connect ${SystemName}

---------------------------------------------------------------------
-- Teradata Vantage Analytics Workshop Foundations I  --
-- Module 02 - Sessionize             --



In [None]:
-------------------------------------
-- Lab 01a: View the Data --
------------------------------------
SELECT * FROM TRNG_TDU_TD01.sessionme;

In [None]:
-------------------------------------------------
-- Lab 01b: Sessionize using SQLE --
------------------------------------------------
SELECT * FROM Sessionize 
(ON TRNG_TDU_TD01.sessionme 
 PARTITION BY userid 
 ORDER BY clicktime 
 USING
 TimeColumn ('clicktime')
 TimeOut (60)
 ClickLag (0.2) 
 EmitNull ('false')
) ORDER BY userid, clicktime;

In [None]:
-------------------------------------
-- Lab 02a: View the Data --
------------------------------------
SELECT * FROM TRNG_TDU_TD01.bank_web
WHERE customer_id IN (620,8263, 30324)
ORDER BY customer_id ASC, datestamp ASC;
 

In [None]:
---------------------------------------------------------------
-- Lab 02b: 2 Required arguments and Output --
---------------------------------------------------------------
SELECT * FROM Sessionize 
(ON TRNG_TDU_TD01.bank_web 
 PARTITION BY customer_id 
 ORDER BY datestamp 
 USING
 TimeColumn ('datestamp')
 TimeOut (600)
) ORDER BY customer_id,datestamp;

In [None]:
------------------------------------------------------------------
-- Lab 03a: Specifying a query in the ON Clause --
------------------------------------------------------------------
SELECT * FROM Sessionize 
(ON (SELECT * FROM TRNG_TDU_TD01.bank_web 
     WHERE customer_id 
     IN (8263, 30324, 620)) 
 PARTITION BY customer_id 
 ORDER BY datestamp 
 USING
 TimeColumn ('datestamp')
 TimeOut (120)
) ORDER BY customer_id, datestamp;

In [None]:
-----------------------------------------
-- Lab 04a: Detecting Robots --
-----------------------------------------
SELECT * FROM Sessionize
(ON (SELECT * FROM TRNG_TDU_TD01.bank_web 
     WHERE customer_id in (7172)) 
 PARTITION BY customer_id 
 ORDER BY datestamp 
 USING
 TimeColumn ('datestamp')
 TimeOut (60)
 ClickLag (0.1) 
) as dt ORDER BY customer_id, datestamp;

In [None]:
-----------------------------------------------------------------------------------------
-- Lab 05: Landing Sessionize Results and Summarizing Findings -- 
-----------------------------------------------------------------------------------------
-- View the Data (prior to Sessionizing)
SELECT * FROM TRNG_TDU_TD01.chips_clean;

In [None]:
----------------------------------
-- Lab 5a: Create Table --
---------------------------------
DROP TABLE chips_sessionized;

In [None]:
CREATE MULTISET TABLE chips_sessionized as 
(SELECT * FROM Sessionize 
(ON (SELECT remote_host, request_time, requested_page 
     FROM TRNG_TDU_TD01.chips_clean) 
PARTITION BY remote_host
ORDER BY request_time asc
USING
TimeColumn ('request_time')
TimeOut (3600)
)
) WITH DATA;

In [None]:
--------------------------------------------------
-- Lab 5b: View the Sessionize Data --
--------------------------------------------------
SELECT * FROM chips_sessionized SAMPLE 10;

In [None]:
--------------------------------------------------
-- Lab 5c: View Most Popular Pages --
--------------------------------------------------
SELECT requested_page, 
       COUNT (DISTINCT remote_host || '_ ' || sessionid) as distinct_sessions
FROM chips_sessionized
GROUP BY requested_page
HAVING distinct_sessions >= 700
ORDER BY distinct_sessions DESC;

In [None]:
-- Lab 5d: Create a summary table using sessionized data to answer various questions 
--  * How many pages visited per session?
--  * How many distinct pages visited per session?
--  * How long in duration is each session?
--  * What % of sessions contain an actual order?

DROP TABLE x_summary;

In [None]:
CREATE MULTISET TABLE x_summary as
(SELECT remote_host, sessionid,
 cast (0 as integer) as checkouts,
 cast (0 as integer) as payments,
 count (requested_page) as pages,
 count (distinct requested_page) as distinct_pages,
 min (request_time) as min_request_time,
 max (request_time) as max_request_time,
 max_request_time - min_request_time DAY(4) TO SECOND as session_duration
 FROM chips_sessionized
 WHERE remote_host not in ('98.239.41.45','199.253.184.250',
   '24.182.98.90','208.103.14.206','24.62.170.79','72.198.7.76',
   '104.169.48.130','174.28.182.211','38.124.19.66','66.87.99.152',
   '107.77.68.127','108.231.245.71','50.205.80.194','166.172.59.102',
   '97.87.26.18','68.122.5.1','50.156.225.101','24.123.193.230',
   '68.227.86.106','180.94.82.42','108.237.161.125','104.11.128.58',
   '107.202.120.14','71.80.75.49','73.200.201.147','23.28.62.87',
   '107.203.33.144','50.176.94.179','67.214.86.18')
 GROUP BY remote_host, sessionid,checkouts,payments
) WITH DATA ;

In [None]:
DROP TABLE checkouts;

In [None]:
CREATE MULTISET TABLE checkouts as 
(SELECT remote_host, sessionid,count (requested_page) as checkouts
 FROM chips_sessionized
 WHERE requested_page = '/checkout.php'
 GROUP BY remote_host, sessionid
) WITH DATA;



In [None]:
SELECT * FROM checkouts;

In [None]:
UPDATE x_summary set checkouts = checkouts.checkouts
WHERE x_summary.remote_host = checkouts.remote_host
AND x_summary.sessionid = checkouts.sessionid;

In [None]:
DROP TABLE payments;

In [None]:
CREATE MULTISET TABLE  payments as 
(SELECT remote_host, sessionid, count(requested_page) as payments
 FROM chips_sessionized
 WHERE requested_page = '/payment.php'
 GROUP BY remote_host, sessionid
) WITH DATA;

In [None]:
SELECT * FROM payments;

In [None]:
UPDATE x_summary set payments = payments.payments
WHERE x_summary.remote_host = payments.remote_host
AND x_summary.sessionid = payments.sessionid;

In [None]:
-- View Summary Table
SELECT * FROM x_summary SAMPLE 10;

In [None]:
-------------------------------------------------------------
-- Lab 05e: Retrive General Session Metrics --
------------------------------------------------------------
SELECT count (distinct remote_host) as remote_hosts, 
       count (distinct remote_host || '_' || sessionid) as sessions,
       sessions*1.00 / remote_hosts as avg_sessions_per_host,
       cast (avg (pages) as decimal (4,2)) as avg_pages, 
       cast (avg (distinct_pages) as decimal (4,2)) as avg_distinct_pages, 
       avg (session_duration) as avg_session_duration
FROM x_summary;

In [None]:
----------------------------------------------------------------------------------------
-- Lab 05f: Scrutinize % of sessions with Checkout and Payment --
--          Are there Abandoned Carts?                                             --
---------------------------------------------------------------------------------------
SELECT case when payments > 0 then 'y' else 'n' end as sessions_with_payment,
       case when checkouts > 0 then 'y' else 'n' end as sessions_with_checkout,
       count (*) as number_of_sessions
FROM x_summary
GROUP BY sessions_with_payment,sessions_with_checkout
ORDER BY sessions_with_payment,sessions_with_checkout;

In [None]:
--------------------------------------------------------------------------------------------
-- Lab 05g: General Session Metrics by Checkout/Purchase Groups --
--------------------------------------------------------------------------------------------
SELECT 
  case when payments > 0 then 'y' else 'n' end as sessions_with_payment,
  case when checkouts > 0 then 'y' else 'n' end as sessions_with_checkout,
  count (distinct remote_host) as remote_hosts, 
  count (distinct remote_host || '_' || sessionid) as sessions,
  sessions*1.00 / remote_hosts as avg_sessions_per_host,
  cast (avg (pages) as decimal (4,2)) as avg_pages, 
  cast (avg (distinct_pages) as decimal (4,2)) as avg_distinct_pages, 
  avg (session_duration) as avg_session_duration
FROM x_summary
GROUP BY sessions_with_payment, sessions_with_checkout
ORDER BY sessions_with_payment, sessions_with_checkout;

In [None]:
------------------------------------------------------------------
-- Lab 05h: Sessionize 'Fix the Syntax' ERRORs  --
--   Answer key appears further below                 --
------------------------------------------------------------------
-- 01) Fix the error(s): Erroneous code 01:
SELECT * FROM Sessionize 
(ON TRNG_TDU_TD01.sessionme 
 PARTITION BY userid 
 ORDER BY clicktime 
 USING
 TimeColumns ('clicktime')
 TimeOut (60)
 ClickLag (0.2) 
 EmitNull ('false')
) ORDER BY userid, clicktime;

In [None]:
-- 02) Fix the error(s): Erroneous code 02: 
SELECT * FROM Sessionize 
(ON TRNG_TDU_TD01.bank_web 
 PARTITION BY customer_id 
 ORDER BY datestamp 
 USING
 TimeColumn ('datestamp')
 TimeOut ('600')
) ORDER BY customer_id, datestamp;

#### Disconnect from Vantage

In [None]:
%disconnect ${SystemName}

Copyright 2021 Teradata. All rights reserved.