# Author Analytics Notebook

## Import libraries

In [None]:
import duckdb
import plotly.express as px
import plotly.graph_objects as go

## Prepare functions

In [None]:
# Define standard formatting transformations
def std_format(fig,height=300,width=800):
    fig.update_layout(
    paper_bgcolor="LightSteelBlue",
    height=height,
    width=width,
    margin=dict(l=40, r=40, t=20, b=40)
    )

    fig.update_traces(
    cliponaxis=False
    )
    return fig

# Perform the queries
Queries get loaded into DataFrames, which can then be easily used by the visualization tools

In [None]:
con = duckdb.connect("~/Documents/Databaes/author_stats/dbs/model_db.duckdb")

# These queries are filtered to 2025-01-01 and after
sales_totals = con.sql("select report_month, round(sum(my_share_usd),2) as estimated_royalties, round(sum(combined_units),2) as combined_units from sales where report_date >= '2025-01-01' group by report_month order by report_month").df()

sales_by_format = con.sql("select report_month, format, round(sum(my_share_usd),2) as estimated_royalties, round(sum(combined_units),2) as combined_units from sales where report_date >= '2025-01-01' group by 1, 2 order by 1 desc")

sales_by_title = con.sql("select report_month, title_code, round(sum(my_share_usd),2) as estimated_royalties, round(sum(combined_units),2) as combined_units from sales where report_date >= '2025-01-01' group by 1, 2 order by 1 desc")

sales = con.sql("select * from sales where report_date >= '2025-01-01' order by report_date").df()

all_time_sales = con.sql("select * from sales order by report_date").df()

In [None]:
# Scratch pad queries

rel = con.sql("select * from sales where title_code = 'MOON'")
rel.show()

# Visualizations

From February 2024 and onward.
Includes KENP income and non-finalized sales entries. Conversions from local currency to USD are estimates and may not reflect final payout.
Estimated units are net ebook units plus minimum number of borrows (total pages divided by pages per book)

## Royalties and Units by Month

In [None]:
for sum_column in ['my_share_usd','combined_units']:
    fig = px.histogram(sales, x='report_month'
                            , y=sum_column
                            , text_auto='.3s')
    fig = std_format(fig)
    fig.show()

### Series volume over time

In [None]:
fig = px.histogram(sales, x='report_month'
                        , y='my_share_usd'
                        , color='series'
                )
fig.add_trace(go.Scatter(
    x=sales_totals['report_month'], 
    y=sales_totals['estimated_royalties'],
    text=sales_totals['estimated_royalties'],
    mode='text',
    textposition='top center',
    textfont=dict(
        size=12,
    ),
    showlegend=False
))
fig = std_format(fig)
fig.show() 

fig1b = px.histogram(sales, x='report_month'
                        , y='combined_units'
                        , color='series'
                  )
fig1b.add_trace(go.Scatter(
    x=sales_totals['report_month'], 
    y=sales_totals['combined_units'],
    text=sales_totals['combined_units'],
    mode='text',
    textposition='top center',
    textfont=dict(
        size=12,
    ),
    showlegend=False
))
fig1b = std_format(fig1b)
fig1b.show()

### Format volume over time

In [None]:
for sum_column in ['my_share_usd','combined_units']:
    fig = px.histogram(sales, x='report_month'
                            , y=sum_column
                            , color='format'
                            , text_auto='.3s')
    fig = std_format(fig)
    fig.show()

### Source volume over time

In [None]:
for sum_column in ['my_share_usd','combined_units']:
    fig = px.histogram(sales, x='report_month'
                            , y=sum_column
                            , color='report_source'
                            , text_auto='.3s')
    fig = std_format(fig)
    fig.show()

### Format Volume Over Time, Overlayed

In [None]:
for sum_column in ['estimated_royalties','combined_units']:
    fig = px.line(sales_by_title
                            , x='report_month'
                            , y=sum_column
                            , color='title_code'
                            , symbol='title_code'
                            )
    fig = std_format(fig)
    fig.show()

### Units by Title Trend

In [None]:
for sum_column in ['estimated_royalties','combined_units']:
    fig = px.line(sales_by_format
                            , x='report_month'
                            , y=sum_column
                            , color='format'
                            , symbol='format'
                            )
    fig = std_format(fig)
    fig.show()

## Analysis for All Time

### By Year

In [None]:
for sum_column in ['my_share_usd','combined_units','net_unit_sales']:
    fig = px.histogram(all_time_sales, x='report_year'
                            , y=sum_column
                            , text_auto='.3s')
    fig = std_format(fig, width=500)
    fig.show()

## By Series

In [None]:
for sum_column in ['my_share_usd','combined_units','net_unit_sales']:
    fig = px.histogram(all_time_sales
                            , x='series'
                            , y=sum_column
                            , color='series'
                            # , color='title_code'
                            , text_auto='.3s'
                            #  , facet_row='series'

                            )
    fig = std_format(fig)
    fig.update_layout(xaxis={'categoryorder': 'total descending'})
    fig.show()

In [None]:
for sum_column in ['my_share_usd','combined_units']:
    fig5 = px.histogram(all_time_sales
                            , x='series'
                            , y=sum_column
                            , color='title_code'
                            , text_auto='.3s'
                            #  , facet_row='series'

                            )
    fig5 = std_format(fig5)
    fig5.update_layout(xaxis={'categoryorder': 'total descending'})
    fig5.show()

## Dive in by format

In [None]:
for sum_column in ['my_share_usd','combined_units']:
    fig = px.histogram(all_time_sales
                            , x='series'
                            , y=sum_column
                            , color='title_code'
                            , text_auto='.3s'
                            , facet_row='format'

                            )
    fig = std_format(fig,height=500)
    fig.update_layout(xaxis={'categoryorder': 'category descending'})
    fig.show()

### Close the connection

In [None]:
# Explicitly close the connection
con.close()