In [4]:
import os
import pandas as pd
import importlib.util

In [16]:
# locate the sample data directory
try:
    # When running as a script
    sample_dir = os.path.join(os.path.dirname(__file__), '..', 'sample_data')
except NameError:
    # When running in Jupyter notebook
    notebook_dir = os.path.dirname(os.path.abspath(''))
    print(f"Notebook directory: {notebook_dir}")
    sample_dir = os.path.join(notebook_dir, 'sample_data')


print(f"Looking for sample data in: {sample_dir}")

projectoverview_path = os.path.join(sample_dir, 'pdfcb_00a_projectoverview.md')
keyterms_path = os.path.join(sample_dir,        'pdfcb_00b_keyterms.md')
csv_path = os.path.join(sample_dir,             'pdfcb_00c_sampledata.csv')
datastructure_path = os.path.join(sample_dir,   'pdfcb_00d_data_structure.py')
banner_path = os.path.join(sample_dir, 'IN-CORE_HRRC_Banner.png')
figure_path = os.path.join(sample_dir, 'pdfcb_00e_sampleimage.jpg')

# Check if paths exist, else set to empty string
projectoverview_path = projectoverview_path if os.path.exists(projectoverview_path) else ""
keyterms_path = keyterms_path if os.path.exists(keyterms_path) else ""
csv_path = csv_path if os.path.exists(csv_path) else ""
datastructure_path = datastructure_path if os.path.exists(datastructure_path) else ""
banner_path = banner_path if os.path.exists(banner_path) else None
figure_path = figure_path if os.path.exists(figure_path) else None

# Load CSV
input_df = pd.read_csv(csv_path)
input_df.head()

Notebook directory: c:\Users\nathanael99\MyProjects\GitHub\pypdfcodebook
Looking for sample data in: c:\Users\nathanael99\MyProjects\GitHub\pypdfcodebook\sample_data


Unnamed: 0,huid,blockid,numprec,ownershp
0,B371559601011003H001,371560000000000.0,1,1
1,B371559601011005H001,371560000000000.0,1,1
2,B371559601011006H001,371560000000000.0,1,1
3,B371559601011006H002,371560000000000.0,1,1
4,B371559601011007H006,371560000000000.0,1,2


In [7]:
# Load data structure dict from .py file
spec = importlib.util.spec_from_file_location("pdfcb_00d_data_structure", datastructure_path)
if spec is None or spec.loader is None:
    raise ImportError(f"Could not load module from {datastructure_path}")
ds_module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(ds_module)
datastructure = ds_module.DATA_STRUCTURE
print(f"Data structure loaded: {datastructure}")

Data structure loaded: {'huid': {'label': 'Housing Unit ID', 'DataType': 'String', 'pyType': <class 'str'>, 'AnalysisUnit': 'Housing unit', 'MeasureUnit': 'Housing units', 'notes': '1. Primary Key for sample data. '}, 'blockid': {'label': 'Block ID', 'huiv3-0-0': 'Block2010', 'formula': "output_df['blockid'].apply(lambda x :str(int(x)).zfill(15))", 'DataType': 'String', 'pyType': <class 'str'>, 'AnalysisUnit': 'Geographic unit', 'MeasureUnit': 'Housing unit in census block', 'length': 15, 'zero_padded': True, 'notes': '1. 2010 Census Block ID'}, 'numprec': {'label': 'Number of Person Records', 'DataType': 'Int', 'pyType': <class 'int'>, 'AnalysisUnit': 'Housing unit', 'MeasureUnit': 'Persons'}, 'ownershp': {'label': 'Tenure Status', 'DataType': 'Int', 'pyType': 'category', 'categorical': True, 'AnalysisUnit': 'Household', 'MeasureUnit': 'Housing unit', 'categories_dict': {1: '1. Owned or being bought (loan)', 2: '2. Rented'}, 'categories': ['1. Owned or being bought (loan)', '2. Rented

In [12]:
# locate the test data directory
try:
    # When running as a script
    tests_dir = os.path.dirname(__file__)
except NameError:
    # When running in Jupyter notebook
    notebook_dir = os.path.dirname(os.path.abspath(''))
    print(f"Notebook directory: {notebook_dir}")
    tests_dir = os.path.join(notebook_dir, 'tests')
print(f"Looking for tests directory in: {tests_dir}")

# Set up output in tests directory
output_filename_path = os.path.join(tests_dir, "test_codebook_with_images.pdf")
output_filename = "test_codebook_with_images"
outputfolders = {'top': tests_dir}
print(f"Output will be saved to: {output_filename_path}")
print(f"Output folders set to: {outputfolders}")

Notebook directory: c:\Users\nathanael99\MyProjects\GitHub\pypdfcodebook
Looking for tests directory in: c:\Users\nathanael99\MyProjects\GitHub\pypdfcodebook\tests
Output will be saved to: c:\Users\nathanael99\MyProjects\GitHub\pypdfcodebook\tests\test_codebook_with_images.pdf
Output folders set to: {'top': 'c:\\Users\\nathanael99\\MyProjects\\GitHub\\pypdfcodebook\\tests'}


In [17]:
print(f"Banner path: {banner_path}")


Banner path: c:\Users\nathanael99\MyProjects\GitHub\pypdfcodebook\sample_data\IN-CORE_HRRC_Banner.png


In [19]:
# Check image formats before PDF creation
supported_exts = {'.png', '.jpg', '.jpeg', '.bmp', '.gif', '.tif', '.tiff'}
print(banner_path, figure_path)
if banner_path != "":
    banner_ext = os.path.splitext(banner_path)[1].lower()
    print(f"Banner extension: {banner_ext}")
    banner_to_use = banner_path if banner_ext in supported_exts else ""
    if banner_to_use == "":
        print(f"Skipping unsupported banner image format: {banner_path}")
else:
    banner_to_use = ""

print(f"Banner to use: {banner_to_use}")


c:\Users\nathanael99\MyProjects\GitHub\pypdfcodebook\sample_data\IN-CORE_HRRC_Banner.png c:\Users\nathanael99\MyProjects\GitHub\pypdfcodebook\sample_data\pdfcb_00e_sampleimage.jpg
Banner extension: .png
Banner to use: c:\Users\nathanael99\MyProjects\GitHub\pypdfcodebook\sample_data\IN-CORE_HRRC_Banner.png


In [None]:
if figure_path is not None:
    figure_ext = os.path.splitext(figure_path)[1].lower()
    figure_to_use = figure_path if figure_ext in supported_exts else None
    if figure_to_use is None:
        print(f"Skipping unsupported figure image format: {figure_path}")
else:
    figure_to_use = None

# Pass figures as a list if valid, else None
figures_param = [figure_to_use] if figure_to_use else None
print(f"Figures to include: {figures_param}")
print(f"Banner to include: {banner_to_use}")

In [2]:
help(pkg)

Help on package sample_test_package:

NAME
    sample_test_package

PACKAGE CONTENTS


FUNCTIONS
    add_num(a: float, b: float) -> float
        Return the sum of *a* and *b*.

        Examples
        --------
        >>> add_num(2, 3)
        5

    subtract_num(a: float, b: float) -> float
        Return the difference of *a* and *b*.

        Examples
        --------
        >>> subtract_num(2, 3)
        -1

FILE
    c:\users\nathanael99\appdata\local\miniconda3\envs\pycodebook\lib\site-packages\sample_test_package\__init__.py




In [3]:
pkg.add_num(2, 3)

5

In [4]:
pkg.add_num(2, 10)

12

In [5]:
pkg.subtract_num(2, 10)

-8