# Copy Sample Files to Volume - Optional

***

### Notebook Workflow Summary

This notebook follows a structured workflow to manage files and integrate with Unity Catalog. The key steps are as follows:

	- Widget setup: Initialize interactive widgets for user input.
	- Volume path construction: Build the necessary paths for the Unity Catalog volume.
	- Local file discovery: Search for files in the local directory that match specified criteria.
	- File copy to Unity Catalog volume: Transfer the discovered files to the designated Unity Catalog volume.

In [0]:
# Set up Databricks widgets for notebook parameterization
#	catalog_use: Unity Catalog catalog name
#	schema_use: Unity Catalog schema name
#	volume_use: Unity Catalog volume name
#	volume_sub_path: Optional subdirectory within the volume
#	ncdpd_file_types: Comma-separated list of file types to manage

dbutils.widgets.text("catalog_use", "dev")	# Catalog name
dbutils.widgets.text("schema_use", "dev_matthew_giglia_ncpdp_rx")	# Schema name
dbutils.widgets.text("volume_use", "landing")	# Volume name
dbutils.widgets.text("volume_sub_path", "")	# Optional subdirectory
dbutils.widgets.text("ncdpd_file_types", "ClaimBilling")	# File types

In [0]:
# Retrieve widget values and process file types
#	Get values from widgets and strip whitespace
catalog_use = dbutils.widgets.get("catalog_use").strip()
schema_use = dbutils.widgets.get("schema_use").strip()
volume_use = dbutils.widgets.get("volume_use").strip()
volume_sub_path = dbutils.widgets.get("volume_sub_path").strip()
#	Split file types by comma and strip whitespace
ncdpd_file_types = dbutils.widgets.get("ncdpd_file_types").strip().split(",")
ncdpd_file_types = [file_type.strip() for file_type in ncdpd_file_types]

In [0]:
# Construct the volume path based on widget values
#	, If volume_sub_path is empty, use the base volume path
#	, Otherwise, append the sub-path
#	, Check if volume_sub_path is None or an empty string
if volume_sub_path is None \
		or volume_sub_path == "":
	#	, Construct the volume path without the sub-path
	volume_path = f"/Volumes/{catalog_use}/{schema_use}/{volume_use}"
else:
	#	, Construct the volume path with the sub-path
	volume_path = f"/Volumes/{catalog_use}/{schema_use}/{volume_use}/{volume_sub_path}"

# Print the constructed volume path
print(f"""
	volume_path: {volume_path}
""")

In [0]:
# Import required modules for local file operations
import os
import shutil

In [0]:
# Discover local source directory containing sample files
source_path = os.path.abspath(os.path.join('..', '..', 'fixtures', 'samples'))

#	Raise an error if the source directory does not exist
if not os.path.exists(source_path):
	raise FileNotFoundError(f"Source directory not found: {source_path}")

#	Print the resolved source directory path
print(f"Source samples directory: {source_path}")

#	List only subdirectories in the source path
directories = os.listdir(source_path)
directories = [d for d in directories if os.path.isdir(os.path.join(source_path, d))]
directories

In [0]:
# Iterate through discovered directories and copy files to Unity Catalog volume
for directory in directories:
	if directory in ncdpd_file_types:
		directory_full_path = os.path.join(source_path, directory)
		file_list = os.listdir(directory_full_path)  # Get a list of all files in the source directory
		#	Create a list of dictionaries containing file names and their full paths
		file_info = [
			{"file_name": file_name, "full_path": os.path.join(directory_full_path, file_name)}  # Create a dictionary for each file
			for file_name in file_list
		]

		#	Copy each file to the target Unity Catalog volume path
		for file in file_info:
			local_path = file["full_path"]  # Full path to the source file on the local filesystem
			full_volume_path = f"{volume_path}/{directory}/{file['file_name']}"  # Construct the destination path in Unity Catalog volume
			try:
				shutil.copy2(local_path, full_volume_path)  # Copy the file, preserving metadata for reliability
				print(f"Successfully copied {local_path} to {full_volume_path}")  # Confirm successful copy with a message
			except Exception as e:
				# Print a detailed error message if the copy operation fails, including the error details for troubleshooting
				print(f"Failed to copy {local_path} to {full_volume_path}. Error: {str(e)}")  # Log the error for debugging