### Sample plots to popular tabbed block components

In [1]:
#| default_exp charts

In [4]:
#| export
import numpy as np
import pandas as pd
import altair as alt
import math
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
pd.set_option('display.max_columns', 500)

Example use of a `toml` file might look like this.
> secrets.toml   

```
[api]
key = "YOUR_API_KEY_HERE"

[database]
username = "DB_USERNAME"
password = "DB_PASSWORD"
``````
Remember to add it to your `.gitignore` file ⚠️

In [5]:
#hide
from nbdev.showdoc import *
import toml
s = toml.load("../.gradio/secrets.toml", _dict=dict)
s['data']

FileNotFoundError: [Errno 2] No such file or directory: '../.gradio/secrets.toml'

# 1. Pull Sample Data

In [6]:
#| export

def get_sample(verbose=True):
    """
    Sample Sales Data, Order Info, Sales, Customer, Shipping, etc., 
    Used for Segmentation, Customer Analytics, Clustering and More. 
        - Taken from Kaggle (www.kaggle.com/datasets/kyanyoga/sample-sales-data)
    """
    data = pd.read_csv('../data/sales_data_sample.csv', encoding=('ISO-8859-1'))

    data['ORDERDATE'] = pd.to_datetime(data['ORDERDATE'])

    # Removing all the columns not revelant for this analysis to avoid confusion
    data.drop(['ORDERLINENUMBER','STATUS','PRODUCTCODE','PHONE','STATE',
           'POSTALCODE', 'TERRITORY', 'CONTACTFIRSTNAME', 'CONTACTLASTNAME'], axis=1)

    if verbose:
        print(data.shape)
        display(data.head())
    return data

In [7]:
data = get_sample()

(2823, 25)


Unnamed: 0,ORDERNUMBER,QUANTITYORDERED,PRICEEACH,ORDERLINENUMBER,SALES,ORDERDATE,STATUS,QTR_ID,MONTH_ID,YEAR_ID,PRODUCTLINE,MSRP,PRODUCTCODE,CUSTOMERNAME,PHONE,ADDRESSLINE1,ADDRESSLINE2,CITY,STATE,POSTALCODE,COUNTRY,TERRITORY,CONTACTLASTNAME,CONTACTFIRSTNAME,DEALSIZE
0,10107,30,95.7,2,2871.0,2003-02-24,Shipped,1,2,2003,Motorcycles,95,S10_1678,Land of Toys Inc.,2125557818,897 Long Airport Avenue,,NYC,NY,10022.0,USA,,Yu,Kwai,Small
1,10121,34,81.35,5,2765.9,2003-05-07,Shipped,2,5,2003,Motorcycles,95,S10_1678,Reims Collectables,26.47.1555,59 rue de l'Abbaye,,Reims,,51100.0,France,EMEA,Henriot,Paul,Small
2,10134,41,94.74,2,3884.34,2003-07-01,Shipped,3,7,2003,Motorcycles,95,S10_1678,Lyon Souveniers,+33 1 46 62 7555,27 rue du Colonel Pierre Avia,,Paris,,75508.0,France,EMEA,Da Cunha,Daniel,Medium
3,10145,45,83.26,6,3746.7,2003-08-25,Shipped,3,8,2003,Motorcycles,95,S10_1678,Toys4GrownUps.com,6265557265,78934 Hillside Dr.,,Pasadena,CA,90003.0,USA,,Young,Julie,Medium
4,10159,49,100.0,14,5205.27,2003-10-10,Shipped,4,10,2003,Motorcycles,95,S10_1678,Corporate Gift Ideas Co.,6505551386,7734 Strong St.,,San Francisco,CA,,USA,,Brown,Julie,Medium


### Testing using `fastcore` functions

In [8]:
#| export

def human_readify(n):
    """Convert large numbers into a human-readable format."""
    if n == 0:
        return '0'
    
    millnames = ['', 'K', 'M', 'B']
    n = float(n)
    millidx = max(0, min(len(millnames)-1, 
                         int(math.floor(0 if n == 0 else math.log10(abs(n))/3))))
    
    # If in 'K', format with 3 significant figures
    if millidx == 1:
        return '{:.3g}{}'.format(n / 10**(3 * millidx), millnames[millidx])
    elif millidx == 0:
        return '{:.0f}'.format(n)  # Return whole number for values less than 1,000
    return '{:.1f}{}'.format(n / 10**(3 * millidx), millnames[millidx])


In [9]:
# Create tests
assert human_readify(0) == '0'
assert human_readify(999) == '999'
assert human_readify(876123) == '876K'
assert human_readify(1234567) == '1.2M'
assert human_readify(9876543210) == '9.9B'

In [10]:
from fastcore.test import test_eq

def test_human_readify(verbose=False):
    tests = [
        (0, '0'),
        (999, '999'),
        (1001, '1K'),
        (876123, '876K'),
        (1234567, '1.2M'),
        (9876543210, '9.9B')
    ]
    
    for i, (input_val, expected_output) in enumerate(tests, 1):
        result = human_readify(input_val)
        test_eq(result, expected_output)
        if verbose:
            print(f"Test {i:<5} Input: {input_val:<12} Expected: {expected_output:<7} Got: {result:<7}✅")
    
    if verbose:
        print("All tests passed!")

test_human_readify(verbose=True)

Test 1     Input: 0            Expected: 0       Got: 0      ✅
Test 2     Input: 999          Expected: 999     Got: 999    ✅
Test 3     Input: 1001         Expected: 1K      Got: 1K     ✅
Test 4     Input: 876123       Expected: 876K    Got: 876K   ✅
Test 5     Input: 1234567      Expected: 1.2M    Got: 1.2M   ✅
Test 6     Input: 9876543210   Expected: 9.9B    Got: 9.9B   ✅
All tests passed!


# `Altair` Sales Chart

In [13]:
#| export

def create_sales_chart(data):
    # Extract the month and year from the ORDERDATE for grouping
    data['YearMonth'] = data['ORDERDATE'].dt.to_period('M')

    # Group by YearMonth and sum the SALES
    monthly_sales_alt = data.groupby('YearMonth')['SALES'].sum().reset_index()

    # Convert the 'YearMonth' back to string for Altair to handle it properly
    monthly_sales_alt['YearMonth'] = monthly_sales_alt['YearMonth'].astype(str)

    # Convert the SALES column to a human-readable format
    monthly_sales_alt['ReadableSales'] = monthly_sales_alt['SALES'].apply(human_readify)

    # Create the Altair chart with improved aesthetics
    chart = alt.Chart(monthly_sales_alt).mark_bar(color='cyan', opacity=0.3).encode(
        x=alt.X('YearMonth:O', title='Month', axis=alt.Axis(labelAngle=-45)),
        y=alt.Y('SALES:Q', title='Total Sales', axis=alt.Axis(format=".1s")),
        tooltip=['YearMonth', 'ReadableSales']
    ).properties(
        title='Monthly Sales Over Time',
        width=1000,
        height=300
    )

    # Add text labels on top of the bars
    text = chart.mark_text(
        align='center',
        baseline='bottom',
        dy=-10  # Nudge text upwards
    ).encode(
        text='ReadableSales:O'
    )

    # Combine the chart with bars and text labels
    final_chart = (chart + text)
    
    return final_chart


In [14]:
create_sales_chart(data)

In [28]:
#| hide
import nbdev; nbdev.nbdev_export()