In [16]:
# 📓 JCDS Live Function Testing Notebook
# File: tests/notebooks/dev_test_functions.ipynb

In [17]:
# ----------------------------------------------------------------------------
# 🧼 Notes
# ----------------------------------------------------------------------------
# - This notebook uses `%autoreload 2` to automatically reload all updated modules.
# - You can call any function from your library here to test live edits.
# - This notebook lives in tests/notebooks/ for organized dev use.
# - You can copy this structure for testing other modules as needed.

In [18]:
# ----------------------------------------------------------------------------
# 🧪 Load Sample Dataset (Titanic from Seaborn)
# ----------------------------------------------------------------------------
import seaborn as sns
import pandas as pd

try:
    df = pd.read_csv("query_export_results_latest.csv")
except FileNotFoundError:
    df = sns.load_dataset("titanic")

df.head()

Unnamed: 0,PR ID,Date Opened,Date Due,Date Closed,Extended Date Due,Originator,QA Owner,Assigned To,Project,Where Issue Occurred,...,Quantity Shipped - Complaint,Order No,ERP Originator,DI - Complaint,PI - Complaint,Caller Name,Caller Email,Caller Phone No,Customer Number,Customer Name
0,425906,4/2/2020 11:02,29-May-20,5/30/2020 13:21,29-May-20,"Jones, Rafell","Jones, Rafell","Allen, Robbie",Corrective Action,,...,,,,,,,,,,
1,425928,4/2/2020 12:48,29-May-20,9/9/2020 8:05,29-May-20,"Jones, Rafell","Jones, Rafell","Johnson, Presley",Corrective Action,,...,,,,,,,,,,
2,425936,4/2/2020 13:29,1-Jun-20,5/22/2020 9:45,1-Jun-20,"Zaluskey, Laura","Zaluskey, Laura","Molinero, Juan",Corrective Action,,...,,,,,,,,,,
3,426191,4/3/2020 19:20,26-Jun-20,10/21/2020 13:12,30-Oct-20,"Jones, Rafell","Jones, Rafell","Johnson, Presley",Corrective Action,,...,,,,,,,,,,
4,426192,4/3/2020 19:24,26-Jun-20,7/13/2020 16:14,26-Jun-20,"Jones, Rafell","Jones, Rafell","Johnson, Presley",Corrective Action,,...,,,,,,,,,,


In [19]:
# ----------------------------------------------------------------------------
# 🔁 Autoreload Extension Setup
# ----------------------------------------------------------------------------
%load_ext autoreload
# Automatically reload all modules before executing cells
%autoreload 2  

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [20]:
# ----------------------------------------------------------------------------
# 📦 Import Local Library Modules
# ----------------------------------------------------------------------------
import jcds.eda as jeda
import jcds.reports as jrep

In [26]:
# ----------------------------------------------------------------------------
# 🚀 Run Function to Test Live Edits
# ----------------------------------------------------------------------------
# Edit your jcds functions and save the .py file. Then re-run this cell.

jrep.data_quality(
    df, show_columns=True
)  # Replace or duplicate this cell for any function you want to test.

DATA QUALITY REPORT

 * Total entries (rows * cols): 151844
 * Memory usage: 8.43 MB
 * Rows: 4466
 * Columns: 34

MISSING DATA:
 * Total entries: 94256 missing (62.1%)

ROWS:
--------------------
 * Rows missing any: 4465
 * Rows missing all: 0

DUPLICATES: 0

COLUMNS:
--------------------
Columns missing any: 28
	'DI - Complaint': 4453 missing (99.7%)
	'PI - Complaint': 4453 missing (99.7%)
	'Caller Phone No': 4440 missing (99.4%)
	'Caller Email': 4408 missing (98.7%)
	'ERP Complaint No': 4399 missing (98.5%)
	'Customer Number': 4399 missing (98.5%)
	'ERP Originator': 4393 missing (98.4%)
	'Order No': 4389 missing (98.3%)
	'SKU Description - Complaint': 4388 missing (98.3%)
	'Regulatory Code - Complaint': 4376 missing (98.0%)
	'Lot - Complaint': 4361 missing (97.6%)
	'Caller Name': 4360 missing (97.6%)
	'Customer Name': 4360 missing (97.6%)
	'SKU No - Complaint': 4359 missing (97.6%)
	'Quantity Shipped - Complaint': 4359 missing (97.6%)
	'Regulated Product': 4357 missing (97.6%)
	'De

In [27]:
jeda.show_missing_summary(df)

{'DI - Complaint': (4453, 99.7),
 'PI - Complaint': (4453, 99.7),
 'Caller Phone No': (4440, 99.4),
 'Caller Email': (4408, 98.7),
 'ERP Complaint No': (4399, 98.5),
 'Customer Number': (4399, 98.5),
 'ERP Originator': (4393, 98.4),
 'Order No': (4389, 98.3),
 'SKU Description - Complaint': (4388, 98.3),
 'Regulatory Code - Complaint': (4376, 98.0),
 'Lot - Complaint': (4361, 97.6),
 'Caller Name': (4360, 97.6),
 'Customer Name': (4360, 97.6),
 'SKU No - Complaint': (4359, 97.6),
 'Quantity Shipped - Complaint': (4359, 97.6),
 'Regulated Product': (4357, 97.6),
 'Defect Code': (4357, 97.6),
 'Defect Description': (4357, 97.6),
 'Sites Affected - Notification': (3522, 78.9),
 'Invest Summary and Root Cause': (3393, 76.0),
 'What Is The Issue': (2395, 53.6),
 'Where Issue Occurred': (2350, 52.6),
 'Extended Date Due': (1251, 28.0),
 'Date Due': (1163, 26.0),
 'Site Found/Mfg': (473, 10.6),
 'Assigned To': (342, 7.7),
 'Date Closed': (247, 5.5),
 'QA Owner': (152, 3.4)}

In [35]:
highcard = jeda.show_highcardvars(df, percent_unique=20, verbose=True)
print(highcard)

Cateogrical variables with cardinality >= 20%
[('Date Opened', 99.01477832512316), ('Date Due', 25.906851768920735), ('Date Closed', 92.76757725033588), ('Extended Date Due', 25.459023734885804), ('Short Description', 77.60859829825347), ('What Is The Issue', 46.37259292431706), ('Invest Summary and Root Cause', 22.279444693237796)]


In [36]:
for col in highcard:
    print(f" * '{col[0]}': {col[1]:.1f}%")

 * 'Date Opened': 99.0%
 * 'Date Due': 25.9%
 * 'Date Closed': 92.8%
 * 'Extended Date Due': 25.5%
 * 'Short Description': 77.6%
 * 'What Is The Issue': 46.4%
 * 'Invest Summary and Root Cause': 22.3%
