In [1]:
import pandas as pd
import sqlite3
import sql

In [2]:
sql.style = 'plain'

In [3]:
%load_ext sql
connection=sqlite3.connect("question_bank.db")
print("Successfully connected to SQL database")
# Connect to SQLite
%sql sqlite:///question_bank.db
print("success")

Successfully connected to SQL database
success


In [4]:
%%sql
-- Create the Logins table
CREATE TABLE Logins (
    user_id INTEGER,
    login_date DATE
);

-- Insert sample login data
INSERT INTO Logins (user_id, login_date) VALUES
(1, '2024-01-01'),
(1, '2024-01-02'),
(1, '2024-01-03'),
(1, '2024-01-05'),
(2, '2024-01-01'),
(2, '2024-01-03'),
(3, '2024-01-10'),
(3, '2024-01-11'),
(3, '2024-01-12');

 * sqlite:///question_bank.db
Done.
9 rows affected.


[]

In [5]:
%%sql
SELECT * FROM  Logins

 * sqlite:///question_bank.db
Done.


user_id,login_date
1,2024-01-01
1,2024-01-02
1,2024-01-03
1,2024-01-05
2,2024-01-01
2,2024-01-03
3,2024-01-10
3,2024-01-11
3,2024-01-12


In [6]:
%%sql
 
-- users who logged in on 3 consecutiive calendar days
-- user 1 = Dates - 1,2,3 - Yes (3 in a row)
-- user 2 = Dates - 1,3 - No
-- user 3 = Dates - 10,11,12 - Yes
-- CTEs and Window function LAG() or LEAD()

-- Logic - Use LAG() and LEAD () Window Fuunctions to look at previous and next day login dates
-- Compare - current login date
-- LAG - Previous login date - 1 day before
-- LEAD - Next login date - 1 day after
-- If all 3 form a 3-day streak - consecutiveness --> Capture That

----------------------------------Query---------------------------------------------------------------

-- Identification of previous date of each user

SELECT user_id,login_date,LAG(login_date) OVER(
                                                PARTITION BY user_id -- groupby each user
                                                ORDER BY login_date  -- chronological order - date-wise
                                                ) AS lag_date
                        , DATE(login_date, '-1 day') AS prev_day,
                        LEAD(login_date) OVER(
                                                PARTITION BY user_id
                                                ORDER BY login_date
                                                ) AS lead_date
                        , DATE(login_date, '+1 day') As next_date
FROM Logins

-----------------------------------Note----------------------------------------------------------------
-- '-1 day' = date modifier in SQL
-- compare whether a user's previous login was exactly 1 day before, we do
-- LAG(login_date) = DATE(login_date, '-1 day')
-- Was the login 1 row before (via LAG) on exactly the previous calendar day?

-- Date Modifiers in SQLite (for reference)
-- Modifier	Effect
-- '-1 day'	         Subtract 1 day
-- '+2 days'	     Add 2 days
-- '-1 month'	     Go back one month
-- '+1 year'	     Add one year
-- 'start of month'	 Move to 1st of the month
----------------------------------------------------------------------------

 * sqlite:///question_bank.db
Done.


user_id,login_date,lag_date,prev_day,lead_date,next_date
1,2024-01-01,,2023-12-31,2024-01-02,2024-01-02
1,2024-01-02,2024-01-01,2024-01-01,2024-01-03,2024-01-03
1,2024-01-03,2024-01-02,2024-01-02,2024-01-05,2024-01-04
1,2024-01-05,2024-01-03,2024-01-04,,2024-01-06
2,2024-01-01,,2023-12-31,2024-01-03,2024-01-02
2,2024-01-03,2024-01-01,2024-01-02,,2024-01-04
3,2024-01-10,,2024-01-09,2024-01-11,2024-01-11
3,2024-01-11,2024-01-10,2024-01-10,2024-01-12,2024-01-12
3,2024-01-12,2024-01-11,2024-01-11,,2024-01-13


In [7]:
%%sql

WITH login_cte AS (SELECT user_id,login_date,LAG(login_date) OVER(
                                                PARTITION BY user_id -- groupby each user
                                                ORDER BY login_date  -- chronological order - date-wise
                                                ) AS lag_date
                        , DATE(login_date, '-1 day') AS prev_day,
                        LEAD(login_date) OVER(
                                                PARTITION BY user_id
                                                ORDER BY login_date
                                                ) AS lead_date
                        , DATE(login_date, '+1 day') As next_date
                FROM Logins
) 

SELECT *
FROM login_cte
WHERE
    lag_date=prev_day
    AND lead_date = next_date;

 * sqlite:///question_bank.db
Done.


user_id,login_date,lag_date,prev_day,lead_date,next_date
1,2024-01-02,2024-01-01,2024-01-01,2024-01-03,2024-01-03
3,2024-01-11,2024-01-10,2024-01-10,2024-01-12,2024-01-12
