In [None]:
#  Clickhouse (via Tabix) was used to solve the tasks.

In [None]:
# LESSON 1 -- Intro to SQL

In [None]:
# Show the first 10 rows.
SELECT 
  * 
FROM 
  checks 
LIMIT 
  10

In [None]:
# Show top-10 values in Rub column. Sort them in descending order.
SELECT 
  * 
FROM 
  checks 
ORDER BY 
  Rub DESC 
LIMIT 
  10;

In [None]:
# Show the first 15 rows of the table renaming Rub to Revenue 
# and sort the result by UserID (ascending order).
SELECT 
  Rub AS Revenue, 
  BuyDate, 
  UserID 
FROM 
  checks 
ORDER BY 
  UserID ASC 
LIMIT 
  15

In [None]:
# Show 10 unique UserID's. Sort the results by UserID in ascending order.
SELECT 
  DISTINCT UserID 
FROM 
  checks 
ORDER BY 
  UserID ASC 
LIMIT 
  10

In [None]:
# Show the sales data: orders purchased on the 8th of March 2019.
# Sort the results by Rub in descending order. Limit the data to the first 10 rows.
SELECT 
  * 
FROM 
  checks 
WHERE 
  BuyDate IN ('2019-03-08') 
ORDER BY 
  Rub DESC 
LIMIT 
  10

In [None]:
# Show unique UserID's for buyers made purchase on the 1st of September 2019
# and their expenditures were higer than 2000 rubles.
# Sort the resul by UserID in descending order.
SELECT 
  DISTINCT UserID 
FROM 
  checks 
WHERE 
  (
    BuyDate = '2019-09-01' 
    AND Rub > 2000
  ) 
ORDER BY 
  UserID DESC

In [None]:
# LESSON 2 -- Grouping and Aggregate

In [None]:
# Count number of purchases per each client, name the column as NumChecks.
# Sort the result by NumChecks in descending order. Limit output to 10 rows.
SELECT 
  UserID, 
  COUNT(Rub) AS NumChecks 
FROM 
  checks 
GROUP BY 
  UserID 
ORDER BY 
  NumChecks DESC 
LIMIT 
  10

In [None]:
# Count sum of purchases made by each client, name the column as Revenue.
# Sort the result by Revenue in descending order. Limit output to 10 rows.
SELECT 
  UserID, 
  COUNT(Rub) AS NumChecks, 
  SUM(Rub) AS Revenue 
FROM 
  checks 
GROUP BY 
  UserID 
ORDER BY 
  Revenue DESC 
LIMIT 
  10

In [None]:
# Count by date min, max and average money spent.
# Use sort by BuyDate in descending order. Limit output ot 10 rows.
SELECT 
  BuyDate, 
  MIN(Rub) AS MinCheck, 
  MAX(Rub) AS MaxCheck, 
  AVG(Rub) AS AvgCheck 
FROM 
  checks 
GROUP BY 
  BuyDate 
ORDER BY 
  BuyDate DESC 
LIMIT 
  10

In [None]:
# Find customers who has spent more than 10000 rubles.
# Sort the result by UserID in descending order. Limit output to 10 rows.
SELECT 
  UserID, 
  SUM(Rub) AS Revenue 
FROM 
  checks 
GROUP BY 
  UserID 
HAVING 
  Revenue > 10000 
ORDER BY 
  UserID DESC 
LIMIT 
  10

In [None]:
# Count Revenue by country. Sort the result by Revenue in descenig order.
# Limit output to 10 rows.
SELECT 
  Country, 
  SUM(Quantity * UnitPrice) AS Revenue 
FROM 
  default.retail 
GROUP BY 
  Country 
ORDER BY 
  Revenue DESC 
LIMIT 
  10

In [None]:
# Count average number of purchases and an average price of a unit.
# Group the result by country and sort it by an average price of a unit 
# in descending order. Do not include purchases having "Manual" in Description.
SELECT 
  Country, 
  AVG(UnitPrice), 
  AVG(Quantity) 
FROM 
  default.retail 
WHERE 
  Description != 'Manual' 
GROUP BY 
  Country 
ORDER BY 
  AVG(UnitPrice) DESC

In [None]:
# Calculate Revenue by month. Sort the result by Revenue in descending order.
# Do not include purchases having "Manual" in Description.
SELECT 
  toStartOfMonth(InvoiceDate) AS month, 
  SUM(UnitPrice * Quantity) AS revenue 
FROM 
  default.retail 
WHERE 
  Description != 'Manual' 
GROUP BY 
  month 
ORDER BY 
  revenue DESC

In [None]:
# Calculate an average price of a unit bought in march 2011.
# Group the results by CustomerID and sort it by an average price of a unit in
# descending order. Do not include purchases having "Manual" in Description.
# Limit output to 10 rows.
SELECT 
  CustomerID, 
  AVG(UnitPrice) AS avg_purch_in_march 
FROM 
  default.retail 
WHERE 
  Description != 'Manual' 
GROUP BY 
  CustomerID 
HAVING 
  toStartOfMonth(InvoiceDate) = '2011-03-01' 
ORDER BY 
  avg_purch_in_march DESC 
LIMIT 
  10

In [None]:
# Group the data by month and country and calculate average, min, max quanitity of
# bought items. Sort the result by average quantity in descending order.
# Do not include purchases having "Manual" in Description and purchases having a
# negative quantity. Show the results for United Kingdom only.
SELECT 
  Country, 
  toStartOfMonth(InvoiceDate) AS month, 
  AVG(Quantity) AS avg_quantity, 
  MIN(Quantity) AS min_quantity, 
  MAX(Quantity) AS max_quantity 
FROM 
  default.retail 
WHERE 
  Description != 'Manual' 
  AND Quantity > 0 
GROUP BY 
  Country, 
  month 
HAVING 
  Country = 'United Kingdom' 
ORDER BY 
  avg_quantity DESC

In [None]:
# LESSON 3 -- Joins

In [None]:
# Join two tables on DeviceID. Sort the result by DeviceID in descenig order.
# Limit output to 100 rows.
SELECT 
  l.AppPlatform AS AppPlatform, 
  l.events AS events, 
  l.EventDate AS EventDate, 
  r.UserID AS UserID 
FROM 
  events AS l 
  LEFT JOIN devices AS r ON l.DeviceID = r.DeviceID 
ORDER BY 
  DeviceID DESC 
LIMIT 
  100

In [None]:
# Join three tables to find out the Source that brough users with the most amount 
# of money spent. Sort the result by money spent in descenig order. 
# Limit output to 100 rows.
SELECT 
  l.Source, 
  SUM(r.Rub) AS Rub 
FROM 
  (
    SELECT 
      l.Source, 
      l.DeviceID, 
      r.UserID 
    FROM 
      installs AS l 
      JOIN devices AS r ON l.DeviceID = r.DeviceID
  ) AS l 
  JOIN checks AS r ON l.UserID = r.UserID 
GROUP BY 
  l.Source 
ORDER BY 
  Rub DESC 
LIMIT 
  100

In [None]:
# Find the number of unique users who bought something in our app.
# Group and sort the result by Source. Limit output to 100 rows.
SELECT 
  l.Source, 
  uniqExact(r.UserID) 
FROM 
  (
    SELECT 
      l.Source, 
      l.DeviceID, 
      r.UserID 
    FROM 
      installs AS l 
      JOIN devices AS r ON l.DeviceID = r.DeviceID
  ) AS l 
  JOIN checks AS r ON l.UserID = r.UserID 
GROUP BY 
  l.Source 
ORDER BY 
  l.Source DESC 
LIMIT 
  100

In [None]:
# Calculate Revenue, min, max, average check.
# Group and sort the result by Source. Limit output to 100 rows.
SELECT 
  l.Source, 
  SUM(r.Rub) AS Revenue, 
  MIN(r.Rub) AS min_check, 
  MAX(r.Rub) AS max_check, 
  AVG(r.Rub) AS avg_check 
FROM 
  (
    SELECT 
      l.Source, 
      l.DeviceID, 
      r.UserID 
    FROM 
      installs AS l 
      JOIN devices AS r ON l.DeviceID = r.DeviceID
  ) AS l 
  JOIN checks AS r ON l.UserID = r.UserID 
GROUP BY 
  l.Source 
ORDER BY 
  l.Source DESC 
LIMIT 
  100

In [None]:
# Show DeviceID's of users who has made at least one purchase over the last month
# (October 2019). Sort the result by DeviceID in ascending order.
# Limit output to 100 rows.
SELECT 
  r.BuyDate, 
  l.DeviceID, 
  SUM(r.Rub) AS total 
FROM 
  (
    SELECT 
      l.Source, 
      l.DeviceID, 
      r.UserID 
    FROM 
      installs AS l 
      JOIN devices AS r ON l.DeviceID = r.DeviceID
  ) AS l 
  JOIN checks AS r ON l.UserID = r.UserID 
GROUP BY 
  l.DeviceID, 
  r.BuyDate 
HAVING 
  total > 0 
  AND toStartOfMonth(
    CAST(r.BuyDate AS Date)
  ) = '2019-10-01' 
ORDER BY 
  l.DeviceID ASC 
LIMIT 
  100

In [None]:
# Check an average amount of views (events) by users from various Platforms and
# Sources. Sort the result by an average amount of views for each platform-source pair
# in descending order. Limit output to 100 rows.
SELECT 
  r.Platform, 
  r.Source, 
  AVG(l.events) AS avg_views 
FROM 
  events AS l 
  JOIN installs AS r ON l.DeviceID = r.DeviceID 
GROUP BY 
  r.Platform, 
  r.Source 
ORDER BY 
  avg_views DESC 
LIMIT 
  100

In [None]:
# Count the number of unique DeviceIDs in installs only for those DeviceIDs that
# have views (events) in events table for Platform Android.
SELECT 
  COUNT(DISTINCT r.DeviceID) 
FROM 
  events AS l 
  INNER JOIN installs AS r ON l.DeviceID = r.DeviceID 
WHERE 
  r.Platform = 'android' 
LIMIT 
  100

In [None]:
# Calculate the conversion rate from installs to views for users using iOS platform.
SELECT 
  (
    COUNT(DISTINCT r.DeviceID) / COUNT(DISTINCT l.DeviceID)
  ) AS conversion 
FROM 
  installs AS l FULL 
  JOIN events AS r ON l.DeviceID = r.DeviceID 
WHERE 
  l.Platform = 'iOS' 

In [None]:
# Print 10 unique DeviceIDs that are presented in the table events but are missing from the table
# installs because of a logging mistake. List them by DeviceIDs in descending order.
  DISTINCT(r.DeviceID) 
FROM 
  installs AS l RIGHT ANTI 
  JOIN events AS r ON l.DeviceID = r.DeviceID 
ORDER BY 
  r.DeviceID DESC 
LIMIT 
  10

In [None]:
# LESSON 4 -- Data Types

In [None]:
tbc..