In [1]:
import sqlite3
import pandas as pd

conn = sqlite3.connect('CaseStudy.db')
cursor = conn.cursor()

read_data = pd.read_csv('casestudy.csv', usecols=['customer_email','net_revenue','year'])
read_data.to_sql('CUSTOMER', conn, if_exists='append', index = False)

In [92]:
# total revenue for current year (2017)
cursor.execute('''
SELECT round(sum(net_revenue), 2)
FROM CUSTOMER
WHERE year = 2017
''')
print("Total Revenue for 2017: $",cursor.fetchall())

Total Revenue for 2017: $ [(314174950.3,)]


In [97]:
# Total new Customer Revenue e.g. new customer revenue from 2017 only
cursor.execute('''
SELECT round(sum(net_revenue),2)
FROM CUSTOMER 
WHERE year = 2017 and customer_email NOT IN(
    SELECT DISTINCT customer_email
    FROM CUSTOMER
    WHERE year < 2017
)
''')
print("New Customer Revenue 2017: $", cursor.fetchall())

New Customer Revenue 2017: $ [(286766076.4,)]


In [96]:
# Revenue of existing customers for current year i.e.customers exisiting prior to 2017 and revenue from 2017
cursor.execute('''
SELECT round(sum(net_revenue),2)
FROM CUSTOMER
WHERE year = 2017 AND customer_email NOT IN(
    SELECT DISTINCT customer_email
    FROM CUSTOMER
    WHERE year < 2017
)
''')
print("Existing Customer Revenue 2017: $", cursor.fetchall())

Existing Customer Revenue 2017: $ [(286766076.4,)]


In [101]:
# Revenue lost from attrition e.g. revenue lost between 2016 and 2017
cursor.execute('''
SELECT (SELECT round(sum(net_revenue),2)
FROM CUSTOMER 
WHERE year = 2017) - (SELECT round(sum(net_revenue),2)
FROM CUSTOMER 
WHERE year = 2016) 
''')
print("Total Revenue lost in attrition between 2016 and 2017): $", cursor.fetchall())

Revenue lost in attrition between 2016 and 2017): $ [(56865514.400000006,)]


In [88]:
# Existing Customer Revenue Current Year i.e.customer email from 2017 and revenue from 2017
cursor.execute('''
SELECT round(sum(net_revenue),2)
FROM CUSTOMER
WHERE year = 2017 AND customer_email IN(
    SELECT DISTINCT customer_email
    FROM CUSTOMER as C
    WHERE year = 2017
)
''')
print("Revenue for 2017 from exsiting customers:", cursor.fetchall())

Revenue for 2017 from exsiting customers: [(314174950.3,)]


In [87]:
# Existing Customer Revenue Prior Year i.e. current customer email from 2017 but revenue for 2016
cursor.execute('''
SELECT round(sum(net_revenue),2)
FROM CUSTOMER
WHERE year = 2016 AND customer_email IN(
    SELECT DISTINCT customer_email
    FROM CUSTOMER
    WHERE year = 2017
)
''')
print("Revenue for 2016 from exsiting customers:", cursor.fetchall())

Revenue for 2016 from exsiting customers: [(26206486.5,)]


In [83]:
# total number of customers in current year i.e. 2017
cursor.execute('''
SELECT count(DISTINCT customer_email)
FROM CUSTOMER
WHERE year = 2017
''')
print("Total Customers in 2017: ", cursor.fetchall())

Total Customers in 2017:  [(249987,)]


In [62]:
# total number of customers in previous year i.e. 2016
cursor.execute('''
SELECT count(DISTINCT customer_email)
FROM CUSTOMER
WHERE year = 2016
''')
print("Total Customers in 2016: ", cursor.fetchall())

Customes before 2017:  [(376356,)]


In [75]:
# new total customers in 2017 only
cursor.execute('''
SELECT count(DISTINCT customer_email)
FROM CUSTOMER 
WHERE year = 2017 and customer_email NOT IN(
    SELECT DISTINCT customer_email
    FROM CUSTOMER
    WHERE year < 2017
)
''')
print("New Customers in 2017 only: ", cursor.fetchall())

New Customers in 2017 only:  [(228262,)]


In [81]:
# lost customers i.e. customers in previous years but not in 2017
cursor.execute('''
SELECT count(DISTINCT customer_email)
FROM CUSTOMER
WHERE year < 2017 AND customer_email NOT IN (
    SELECT DISTINCT customer_email
    FROM CUSTOMER
    WHERE year = 2017
)
''')
print("Customers lost in 2017: ", cursor.fetchall())

Customers lost in 2017:  [(354631,)]
