In [1]:
import numpy as np
import pandas as pd
import sqlite3
from IPython.display import display

In [2]:
loan = pd.read_csv('loan.csv')
loan.head()

Unnamed: 0,loan_amnt,term,int_rate,emp_length,home_ownership,annual_inc,purpose,addr_state,dti,delinq_2yrs,revol_util,total_acc,bad_loan,longest_credit_length,verification_status
0,5000,36 months,10.65,10.0,RENT,24000.0,credit_card,AZ,27.65,0,83.7,9,0,26,verified
1,2500,60 months,15.27,0.0,RENT,30000.0,car,GA,1.0,0,9.4,4,1,12,verified
2,2400,36 months,15.96,10.0,RENT,12252.0,small_business,IL,8.72,0,98.5,10,0,10,not verified
3,10000,36 months,13.49,10.0,RENT,49200.0,other,CA,20.0,0,21.0,37,0,15,verified
4,5000,36 months,7.9,3.0,RENT,36000.0,wedding,AZ,11.2,0,28.3,12,0,7,verified


In [3]:
# Establish a connection to the in-memory database
conn = sqlite3.connect(":memory:")

In [4]:
loan.to_sql('loan', conn, if_exists = 'replace', index = False)

999

In [5]:
sql = """
SELECT home_ownership, count(1) as count, CAST(avg(annual_inc) AS INT) as avg_annual_inc
FROM loan
GROUP BY home_ownership
ORDER BY 3 DESC
"""

df = pd.read_sql(sql, conn)
display(df)

Unnamed: 0,home_ownership,count,avg_annual_inc
0,MORTGAGE,315,74495
1,OWN,81,55447
2,RENT,603,54799


In [6]:
sql = """
SELECT term, count(1) as count, CAST(avg(loan_amnt) AS INT) as avg_loan_amnt 
FROM loan
GROUP BY term
ORDER BY term
"""

df = pd.read_sql(sql, conn)
display(df)

Unnamed: 0,term,count,avg_loan_amnt
0,36 months,816,10759
1,60 months,183,18297


In [7]:
sql = """
SELECT purpose, count(1) as count, CAST(avg(loan_amnt) AS INT) as avg_loan_amnt 
FROM loan
GROUP BY purpose
ORDER BY 3 DESC
LIMIT 5
"""

df = pd.read_sql(sql, conn)
display(df)

Unnamed: 0,purpose,count,avg_loan_amnt
0,home_improvement,35,13375
1,debt_consolidation,520,13311
2,small_business,30,12868
3,wedding,8,12456
4,credit_card,268,12086


In [8]:
sql = """
CREATE TABLE IF NOT EXISTS purpose AS
SELECT purpose, term, count(1) as count, CAST(avg(loan_amnt) AS INT) as avg_loan_amnt 
FROM loan
GROUP BY purpose, term
"""

cur = conn.cursor()
cur.execute(sql)
conn.commit()

cur.execute("DELETE FROM loan WHERE purpose = 'home_improvement'")
conn.commit()

sql = """
SELECT a.loan_amnt, b.avg_loan_amnt,
CASE WHEN a.loan_amnt > b.avg_loan_amnt THEN 'Y' ELSE 'N' END AS loan_amnt_above_avg,
a.purpose,
a.term
FROM loan a
INNER JOIN purpose b ON a.purpose = b.purpose AND a.term = b.term
ORDER BY b.avg_loan_amnt DESC, a.loan_amnt DESC
"""

df = pd.read_sql(sql, conn)
display(df)

df = pd.read_sql("SELECT DISTINCT purpose FROM loan", conn)
display(df)

Unnamed: 0,loan_amnt,avg_loan_amnt,loan_amnt_above_avg,purpose,term
0,35000,21625,Y,wedding,60 months
1,8250,21625,N,wedding,60 months
2,35000,21444,Y,small_business,60 months
3,30000,21444,Y,small_business,60 months
4,30000,21444,Y,small_business,60 months
...,...,...,...,...,...
959,2400,5376,N,major_purchase,36 months
960,1000,5376,N,major_purchase,36 months
961,4500,3500,Y,car,60 months
962,2500,3500,N,car,60 months


Unnamed: 0,purpose
0,credit_card
1,car
2,small_business
3,other
4,wedding
5,debt_consolidation
6,major_purchase
7,medical
8,moving
9,vacation


In [9]:
conn.close()