### DSCI 320 - Project
## Analyzing Trends in Library Usage Across the Five Most Frequented San Francisco County Libraries

**Group Members: Sadia Khan Durani, Lillian Milroy, Heidi Lantz**

Our overarching goal is to analyze and reflect on what factors influence user engagement in the top five San Francisco library branches from 2005 to 2015.

In [1]:
# import statements
import pandas as pd
import altair as alt

In [2]:
# Loading the data
data = pd.read_csv("CleanLibrary.csv")
data.head()

Unnamed: 0,index,Patron Type Definition,Total Checkouts,Total Renewals,Age Range,Home Library Definition,Circulation Active Month,Circulation Active Year,Notice Preference Definition,Provided Email Address,Year Patron Registered,Outside of County,Years Active,Renewals per Checkouts,User Count,Average_Checkouts
0,4201,ADULT,9,0,25-34,Main Library,January,2014,email,True,2012,False,2,0.0,1242,0.007246
1,2795,SENIOR,41,0,75+,Chinatown,January,2013,print,False,2012,False,1,0.0,194,0.21134
2,2929,JUVENILE,63,1,0-9,Mission,January,2014,print,False,2012,False,2,0.02,350,0.18
3,2801,ADULT,3,0,45-54,Chinatown,January,2014,print,False,2010,False,4,0.0,194,0.015464
4,2318,ADULT,6,7,35-44,Main Library,January,2014,email,True,2013,False,1,1.17,1242,0.004831


# Task 1:
#### How does the duration of library membership vary by patrons’ notice preference type?
**Attributes**
- Year Patron Registered
- Last Circulation Year
- Age Range
- Notice Preference Definition

In [3]:
# Taking another smaller random sample for task 1 visualization purpose
data2 = data.sample(n = 500, random_state = 0)
data2 = data2[data2['Years Active'] > 0]

In [4]:
# Binning the Data by Years Active for Visualization 1 to have 3 options for radio buttons
data2['Binned Years Active'] = pd.cut(data2['Years Active'], bins = 3, labels = ['1-4', '5-7', '8-10'])

**Creating Visualization # 1**

In [5]:
# Setting all of the age range options
age_range_options = sorted(data2['Age Range'].unique())

# Creating the binding argument
age_range_dropdown = alt.binding_select(options = [None] + age_range_options, 
                                        labels = ['All'] + age_range_options, 
                                        name = 'Choose Age Range: ')

# Creating the selection argument
age_range_selection = alt.selection_point(fields=['Age Range'], 
                                          bind = age_range_dropdown)

In [6]:
# Setting the binned Years Active range options 
years_active_options = sorted(data2['Binned Years Active'].unique())

# Binding argument for drop down menu
years_active_dropdown = alt.binding_radio(options = [None] + years_active_options, 
                                           labels = ['All'] + years_active_options,
                                           name = '# Years Active: ')

# Creating the selection argument of active years
years_active_selection = alt.selection_point(fields=['Binned Years Active'], 
                                             bind = years_active_dropdown)

In [7]:
# Setting the visualization colors
color_domain = ['email', 'phone', 'print']
color_range =  ['#E07C8D', '#40B0A6', '#FFB000']

# Visualization and adding both of the parameters
vis1_1 = alt.Chart(data2).mark_line(opacity = 0.7, size = 1, point = True).encode(
    x = alt.X('Year Patron Registered:O', 
              scale = alt.Scale(domainMin = 2005, domainMax = 2015),
              axis = alt.Axis(tickSize = 0), 
              title = 'Start Year'),
    
    x2 = alt.X2('Circulation Active Year:O'),
    
    y = alt.Y('index:N', title=None, axis = None),
    
    color = alt.Color('Notice Preference Definition').legend(title = None, orient = 'none', columns = 3, legendX = 75, 
                                                             legendY = -20, symbolSize = 25, symbolStrokeWidth = 7).scale(domain = color_domain, 
                                                                                                                          range = color_range),
    
    tooltip = [
        alt.Tooltip('Year Patron Registered', title = 'Start Year'),
        alt.Tooltip('Circulation Active Year', title = 'End Year')
    ]
    
).properties(
    height = 500,
    width = 300,
    title = alt.TitleParams('Task 1: Membership Duration Variation',
                            subtitle = 'Across Notice Preferences',
                            offset = 5, 
                            fontSize = 14, 
                            subtitleFontSize = 14, 
                            subtitleFontStyle = 'bold')
)

# Adding the parameters
vis1_1 = vis1_1.add_params(
    years_active_selection
).transform_filter(
    years_active_selection
)

vis1_1 = vis1_1.add_params(
    age_range_selection
).transform_filter(
    age_range_selection
)

# Creating a second chart for the end circle 
vis1_2 = alt.Chart(data2).mark_circle().encode(
    x = alt.X('Circulation Active Year:O', title = 'End Year'),
    y = alt.Y('index:N', title=None, axis = None),
    color = alt.Color('Notice Preference Definition', legend = None).scale(domain = color_domain, range = color_range)
).properties(
    height = 500,
    width = 300
)

# Adding the parameters to second chart
vis1_2 = vis1_2.add_params(
    years_active_selection
).transform_filter(
    years_active_selection
)

vis1_2 = vis1_2.add_params(
    age_range_selection
).transform_filter(
    age_range_selection
)

vis1 = vis1_1 + vis1_2
vis1.configure_view(strokeWidth = 0)

------------------------------------------------------------------------------------------------------

# Task 2:
#### What is the variation in total checkouts across the years, and how do corresponding renewals per checkout differ across various age groups? 
**Attributes**
- Year Patron Registered
- Total Checkouts
- Renewals per Checkouts
- Age Range

In [8]:
# Creating the selection interval parameter
brush = alt.selection_interval(encodings = ['x'])

# Creating the Area Chart
area_chart = alt.Chart(data).mark_area(opacity = 0.9).encode(
    x = alt.X("Year Patron Registered:O", title = 'Year', axis = alt.Axis(tickSize = 0)),
    y = alt.Y("sum(Total Checkouts):Q", axis = alt.Axis(tickSize = 0))
).add_params(
    brush
).properties(
    height = 150,
    width = 200
) 

In [9]:
# Creating the selection interval parameter which is used in the dashboard
interval_selection = alt.selection_interval()

# Creating the Dot Chart
dot = alt.Chart(data).mark_circle(color = '#9E9E9E', opacity = 0.5, size = 30).encode(
    y = alt.X('Renewals per Checkouts:Q', title = 'Renewals per Checkout',scale = alt.Scale(domain = [0, 6]), axis = alt.Axis(tickSize = 0)),
    x = alt.Y('Age Range:O', axis = alt.Axis(labelAngle = 45, tickSize = 0)),
    tooltip = [
        alt.Tooltip('Renewals per Checkouts:Q'), 
        alt.Tooltip('Age Range'), 
        alt.Tooltip('Year Patron Registered:O')
    ]
).add_params(
    interval_selection
).properties(
    height = 150,
    width = 200
)

In [10]:
# Combining both charts together for task # 2
vis2 = alt.hconcat(area_chart, dot.transform_filter(brush), spacing = 15, center = True).properties(
    title = alt.TitleParams('Task 2: Library Engagement Over Time and Across Age Groups', 
                            offset = 10,
                            anchor = 'middle', 
                            fontSize = 14)
).configure_view(strokeWidth = 0)

vis2

------------------------------------------------------------------------------------------------------

# Task 3:
#### Which months influence the engagement level of patrons across the top 5 libraries?
**Attributes**
- Circulation Active Month
- Home Library Definition
- Mean Total Checkouts

In [11]:
# Ordering the Months
order = ['January', 'February', 'March', 'April', 'May', 'June', 
         'July', 'August', 'September', 'October', 'November', 'December']

data['Circulation Active Month'] = pd.Categorical(data['Circulation Active Month'], categories = order, ordered = True)

In [12]:
# Create a dictionary to map month names to their numerical representation
month_dict = {'January': 'Jan', 'February': 'Feb', 'March': 'Mar', 'April': 'Apr', 'May': 'May', 'June': 'Jun',
              'July': 'Jul', 'August': 'Aug', 'September': 'Sept', 'October': 'Oct', 'November': 'Nov', 'December': 'Dec'}

# Replace the month names with their numerical equivalents
data['Circulation Active Month'] = data['Circulation Active Month'].map(month_dict)
data.head(3)

Unnamed: 0,index,Patron Type Definition,Total Checkouts,Total Renewals,Age Range,Home Library Definition,Circulation Active Month,Circulation Active Year,Notice Preference Definition,Provided Email Address,Year Patron Registered,Outside of County,Years Active,Renewals per Checkouts,User Count,Average_Checkouts
0,4201,ADULT,9,0,25-34,Main Library,Jan,2014,email,True,2012,False,2,0.0,1242,0.007246
1,2795,SENIOR,41,0,75+,Chinatown,Jan,2013,print,False,2012,False,1,0.0,194,0.21134
2,2929,JUVENILE,63,1,0-9,Mission,Jan,2014,print,False,2012,False,2,0.02,350,0.18


In [13]:
# Creating the selection point parameter which will be used in the final dashboard
multi = alt.selection_point(encodings = ['x'])

# Creating the Heatmap
vis3 = alt.Chart(data).mark_rect().encode(
    x = alt.X('Circulation Active Month', title = 'Month', axis = alt.Axis(tickSize = 0)),
    y = alt.Y('Home Library Definition', title = 'Home Library', axis = alt.Axis(tickSize = 0)),
    
    color = alt.Color('average(Total Checkouts)').legend(title = 'Mean Checkouts', titleAnchor = 'middle', titleFontSize = 10,
                                                         gradientLength = 100, gradientThickness = 15, orient = 'none',
                                                        legendY = 10, legendX = 260).scale(scheme = 'teals'),
    
    opacity = alt.condition(multi, alt.value(1), alt.value(0.1)),
    
    tooltip = [
        alt.Tooltip('Home Library Definition', title = 'Home Library'),
        alt.Tooltip('average(Total Checkouts)', title = 'Avg Total Checkouts'), 
        alt.Tooltip('Circulation Active Month', title = 'Month')
    ]
    
).add_params(
    multi
).properties(
    width = 250,
    height = 140,
    title = alt.TitleParams('Task 3: Monthly Mean Total Checkouts Across Libraries',
                            anchor = 'middle', 
                            offset = 20, 
                            fontSize = 14)
)

vis3

------------------------------------------------------------------------------------------------------

# Task 4: 
#### Which home libraries have the most average total checkouts over the number of years a patron is active in the library?
**Attributes**
- Years Active
- Total Checkouts
- Home Library Definition

In [14]:
# Setting the visualization colors
color_domain = ['Main Library', 'Chinatown', 'Mission', 'Richmond', 'Excelsior']
color_range =  ['#FE6100', '#AB9CF1', '#014432', '#008080', '#907B57']

# Creating the interactive legend argument
selection = alt.selection_point(fields = ['Home Library Definition'], bind='legend', empty = False)
barOpacity = alt.selection_point(fields = ['Home Library Definition'], bind='legend', empty = False)

# Creating the Line Chart
linechart = alt.Chart(data).mark_line().encode(
    x=alt.X('Years Active:N', title='Years Active',
            axis=alt.Axis(titleFontSize=13,
                          labelAngle = 0, tickSize=0, labelPadding=10)),
    y=alt.Y('sum(Average_Checkouts):Q', title='Average Total Checkouts per User',
            axis=alt.Axis(titleFontSize=14,
                          tickCount=5, tickSize=0, labelPadding=10)),
    color=alt.Color('Home Library Definition:N', title='Home Library').scale(domain = color_domain, range = color_range),
    opacity=alt.condition(selection, alt.value(1), alt.value(0.2)),
    tooltip = ['sum(Average_Checkouts):Q', 'Home Library Definition:N', 'Years Active:N']
).properties(
    width=500,
    height=250,
    title=alt.TitleParams("Task 4: Checkouts Over Time Per Library",
                          fontSize=15)
).add_params(selection)

# Creating the Bar Chart
barchart = alt.Chart(data).mark_bar().encode(
    x=alt.X('Years Active:N', title='Years Active'),
    y=alt.Y('sum(Average_Checkouts):Q', title='Average Total Checkouts per User'),
    color=alt.Color('Home Library Definition:N', title='Home Library').scale(domain = color_domain, range = color_range),
    opacity=alt.condition(barOpacity, alt.value(0.8), alt.value(1))
).properties(
    width=500,
    height=250,
).add_params(barOpacity)

vis4 = barchart.transform_filter(selection) + linechart.mark_line(point=True).add_params(barOpacity)
vis4.configure_view(strokeWidth = 0).configure_axis(labelFontSize=12)

------------------------------------------------------------------------------------------------------

# DASHBOARD

In [15]:
bi_directional = alt.hconcat(dot.transform_filter(multi), vis3.transform_filter(interval_selection), center = True, spacing = 15).resolve_scale(
    color='independent'
)
#bi_directional

In [16]:
top = alt.hconcat(area_chart, bi_directional.transform_filter(brush), spacing = 10, center = True).resolve_scale(color='independent').properties(
    title = alt.TitleParams('Task 2: Library Engagement Over Time and Across Age Groups', 
                            dx = 55, 
                            dy = 18, 
                            fontSize = 14)
)
#top

In [18]:
dashboard = alt.hconcat(vis1, 
                        alt.vconcat(top, vis4, spacing = 35, center = True).resolve_scale(color='independent'),
                        spacing = 10, center = True,
                        padding = {'top': 40, 'bottom':10, 'left':10, 'right':20}).resolve_scale(
    color='independent'
).properties(
    title = alt.Title('Analyzing Trends in Library Usage Across the Five Most Frequented San Francisco County Libraries',
                      anchor = "middle",
                      fontSize = 20, 
                      offset = 16)
).configure_view(
    strokeWidth = 0
).configure_axis(
    titleFontWeight = 'bold',
    grid = False
)


dashboard