In [1]:
from sqlalchemy import create_engine
import pandas as pd
import numpy as np
import altair as alt
import datetime
from urllib.parse import urlencode
import vars

# connect to local db
engine = create_engine('sqlite:///file:../plch_dw.db?mode=ro&uri=true')

# connect to sierra db
sierra_connection_uri = 'postgres://{}:{}@sierra-db.plch.net:1032/iii'.format(vars.pg_username, vars.pg_password)
sierra_engine = create_engine(sierra_connection_uri)

year = '2020'

In [2]:
bib_num_sql = """\
select
r.record_num
from
sierra_view.record_metadata as r
where
r.id = %s
and r.record_type_code = 'b'
and r.campus_code = ''
limit 1
"""
def get_bib_num(bib_record_id):
#     with sierra_engine.connect() as con:
    temp_df = pd.read_sql(sql=bib_num_sql, con=sierra_engine, params=[bib_record_id])
    try:
        return(int(temp_df['record_num']))
    except:
        return None

# print(get_bib_num(420910357225))
    

In [3]:
sql = f"""\
SELECT

c.itype_code_num,
(
	SELECT
	name
	FROM
	map_item_type
	WHERE
	code = c.itype_code_num
) as item_type_name,
count(*) as count,
count(DISTINCT c.item_record_id) as distinct_item_records,
count(DISTINCT c.bib_record_id) as distinct_bib_records,
count(DISTINCT c.patron_record_id) as distinct_patron_records

FROM
circ_trans as c

WHERE
c.op_code = 'o'
AND c.ptype_code != 196
AND c.transaction_gmt >= '{year}-01-01'
AND c.transaction_gmt < '{int(year) + 1}-01-01'

GROUP BY
c.itype_code_num

ORDER BY
count DESC
"""

In [4]:
df = pd.read_sql(sql=sql, con=engine)
# df.to_pickle('top_itypes.pickle')
df.to_csv(f'{year}_top_itypes.csv')

In [5]:
def make_itype_url(itype_code_num):
    # urls for itype will have this format given the itype_code_num...
    return f"{year}_top_20_itype_{itype_code_num}.html"

In [6]:
df['url'] = df['itype_code_num'].apply(make_itype_url)

In [7]:
df.head()

Unnamed: 0,itype_code_num,item_type_name,count,distinct_item_records,distinct_bib_records,distinct_patron_records,url
0,2,Juvenile Book,1052020,587611,94547,41793,2020_top_20_itype_2.html
1,0,Book,823996,440452,148865,75059,2020_top_20_itype_0.html
2,101,DVD/Videocassette,591753,202506,29715,33257,2020_top_20_itype_101.html
3,100,New Release DVDs,231188,43301,831,20042,2020_top_20_itype_100.html
4,77,Music on CD,143119,70570,30615,9129,2020_top_20_itype_77.html


In [8]:
# get the top 20 itypes
source = df.loc[:19]

chart = alt.Chart(source).mark_bar(color='#0090bd').encode(
    y=alt.X(
        'item_type_name',
        sort='-x'
    ),
    x='count',
#     color=alt.Color(
#         'count_checkouts',
#         scale=alt.Scale(domain=domain, range=domain_range)
#     ),
    href='url',
    tooltip=['itype_code_num', 'item_type_name', 'count', 'distinct_item_records', 'distinct_bib_records', 'distinct_patron_records'],
).properties(
    title="2020 Total Checkout Counts by Item Types",
    width=900
)

chart.save(f'{year}_top_itypes.html')
chart.save('index.html')

In [9]:
sql = """\
SELECT
c.bib_record_id,
c.best_title,
c.best_author,
count(*) as count,
count(DISTINCT item_record_id) as distinct_items,
count(DISTINCT patron_record_id) as distinct_patrons

FROM
circ_trans as c

WHERE
c.itype_code_num = ?
and c.op_code = 'o'
AND c.ptype_code != 196
AND c.transaction_gmt >= '2020-01-01'
AND c.transaction_gmt < '2021-01-01'

GROUP BY
c.bib_record_id,
c.best_title,
best_author

ORDER BY
count DESC

LIMIT
100
"""

In [10]:
def make_bc_query(name):
    return f"https://cincinnatilibrary.bibliocommons.com/item/show/{name}170"

In [11]:
%%time

charts = []
for i, row in df.iterrows():
    print(i, row['itype_code_num'], row['item_type_name'])
    itype_df = pd.read_sql(sql=sql, con=engine, params=[row["itype_code_num"]])
    
    itype_df['bib_record_num'] = itype_df['bib_record_id'].apply(get_bib_num)
    
    
    itype_df['url'] = itype_df['bib_record_num'].apply(make_bc_query)
    
    itype_df.to_csv(f"{year}_top_20_itype_{row['itype_code_num']}.csv")
    
    # get the top 20 titles
    source = itype_df.loc[:19]

    chart = alt.Chart(source).mark_bar(color='#0090bd').encode(
        y=alt.X(
            'best_title',
            sort='-x'
        ),
        x='count',
        href='url',
        tooltip=['best_title', 'best_author', 'count', 'distinct_items', 'distinct_patrons'],
    ).properties(
        title=f"{year} Top 20 Titles By Checkout Counts -- {row['item_type_name']}",
        width=900
    )
    
    charts.append(chart)
    
    chart.save(f"{year}_top_20_itype_{row['itype_code_num']}.html")
    
#     break

0 2 Juvenile Book
1 0 Book
2 101 DVD/Videocassette
3 100 New Release DVDs
4 77 Music on CD
5 20 Large Print Book
6 30 Magazine
7 4 Teen Book
8 70 Book on CD
9 230 OHPIR Requestable
10 231 OHPIR Requestable Movie
11 113 Laptop
12 71 Juvenile Book on CD
13 90 Playaway
14 6 Leased Book
15 200 OhioLINK Book
16 78 Juvenile Music on CD
17 102 Bluray
18 31 Juvenile Magazine
19 91 Juvenile Playaway
20 157 Music Score
21 112 Charging Kit
22 82 LP Record
23 32 Teen Magazine
24 105 Leased DVD
25 72 Teen Book on CD
26 208 OhioLINK Media
27 22 Juvenile Large Print Book
28 110 MakerSpace Equipment
29 92 Teen Playaway
30 24 Teen Large Print Book
31 10 Reference Book
32 111 Portable Technology Device
33 142 Juvenile Kit
34 1 Book (Branches)
35 141 Kit
36 159 Juvenile Music Score
37 11 Reference Juvenile Book
38 17 Government Document
39 114 Bike Locks
40 33 Reference Magazine
41 103 Reference Video
42 15 Braille
43 93 Playaway View
44 134 CD-ROM
45 232 OHPIR Non-Requestable
46 207 OhioLINK Non-Circ
47