# Guided Initialization: Databricks Assessment Utility
## Overview
This notebook streamlines the process of installing necessary dependencies and launching the Databricks assessment utility, offering a consistent approach for both Serverless and cluster-based compute environments

In [0]:
import os

current_working_directory = os.getcwd()
print(f"Current working directory: {current_working_directory}")

# Construct the path(s)
requirements_file_path = os.path.join(current_working_directory, "..", "requirements.txt")
script_path = os.path.join(current_working_directory, "databricks_assessment.py")

# Print the constructed path
print(f"Path to requirements.txt: {requirements_file_path}")
print(f"Path to databricks_assessment.py: {script_path}")

### 1. Install dependencies
- Installs dependencies defined in **`requirements.txt`** using **`%pip install -r`** magic command

In [0]:
try:
    %pip install -r $requirements_file_path
    print("Successfully installed dependencies from requirements.txt")
except Exception as e:
    print(f"Error installing dependencies: {e}")

In [0]:
"""
Restart the python process attached to the notebook
- Restart the Python process attached to the notebook for the newly installed libraries to be available in the current session
- Refer https://docs.databricks.com/aws/en/libraries/restart-python-process
"""
dbutils.library.restartPython()

- Resetting paths after notebook restart

In [None]:
import os

current_working_directory = os.getcwd()
print(f"Current working directory: {current_working_directory}")

# Construct the path(s)
requirements_file_path = os.path.join(current_working_directory, "..", "requirements.txt")
script_path = os.path.join(current_working_directory, "databricks_assessment.py")

# Print the constructed path
print(f"Path to requirements.txt: {requirements_file_path}")
print(f"Path to databricks_assessment.py: {script_path}")

### 2. Verifying Installed Dependencies
- Programmatically check if the dependencies listed in your **`requirements.txt`** file were installed accurately.

In [0]:
import subprocess
import re

def get_installed_package_info(package_name):
    try:
        result = subprocess.run(
            ["pip", "show", package_name],
            capture_output=True,
            text=True,
            check=True
        )
        output = result.stdout
        version_match = re.search(r"Version: (\S+)", output)
        if version_match:
            version = version_match.group(1)
            return package_name, version
        else:
            return package_name, "Version information not found"
    except subprocess.CalledProcessError:
        return package_name, "Not installed"
    except FileNotFoundError:
        return package_name, "pip command not found"

def print_installed_versions_from_requirements(requirements_file):
    try:
        with open(requirements_file, "r") as f:
            package_number = 1
            for line in f:
                line = line.strip()
                if line and not line.startswith("#"):
                    package_name = line.split("==")[0].split(">=")[0].split("<=")[0].split("!=")[0].split(">")[0].split("<")[0].split("~=")[0]
                    name, version = get_installed_package_info(package_name)
                    print(f"Package {package_number} => {name}: {version}")
                    package_number += 1
    except FileNotFoundError:
        print(f"Error: {requirements_file} not found.")

print_installed_versions_from_requirements(requirements_file_path)

### 3. Execute Databricks Assessment Tool

In [0]:
"""
Enables autoreload; learn more at https://docs.databricks.com/en/files/workspace-modules.html#autoreload-for-python-modules
"""

%load_ext autoreload
%autoreload 2

In [0]:
from databricks_assessment import DatabricksAssessment

In [0]:
input_customer_name = "dbx_demo_customer    " # Set this to your customer name

assessor = DatabricksAssessment(customer_name=input_customer_name)
assessor.process_workspace_data()

### 4. Transfer output files from Databricks workspace to S3
> - Provide a valid [Unity Catalog Volume](https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-volumes) location to the **`input_value`** parameter. 
>   - Format: _**`/Volumes/mycatalog/myschema/myexternalvolume`**_

In [0]:
input_value = "/Volumes/dbx_demo/default/demo_volume" # Enter your input value here

In [0]:
helper_notebook_path = "./databricks_cloud_storage_writer" # Notebook path (Do not change, unless you have moved the notebook)
timeout = 300

args_to_pass = {"target_volume_location": input_value}

try:
    processed_result = dbutils.notebook.run(helper_notebook_path, timeout, args_to_pass)
except Exception as e:
    print(f"Error running {helper_notebook_path}: {e}")

## ======== END ========

## Additional helpers _(not required for tool execution)_

In [0]:
"""
Execute the Python script using the `%run` magic command
"""
# %run $script_path