In [None]:
import os
from collections import Counter
import logging
import sys
import shutil

# Load environment variables from .env file
from dotenv import load_dotenv
notebook_dir = os.getcwd()
project_root = os.path.dirname(notebook_dir)
env_path = os.path.join(project_root, '.env')
load_dotenv(env_path, override=True)

print(env_path)

/home/lakishadavid/computational_genetic_genealogy/.env


In [7]:
working_directory = os.getenv('PROJECT_WORKING_DIR', default=None)
data_directory = os.getenv('PROJECT_DATA_DIR', default=None)
references_directory = os.getenv('PROJECT_REFERENCES_DIR', default=None)
results_directory = os.getenv('PROJECT_RESULTS_DIR', default=None)
utils_directory = os.getenv('PROJECT_UTILS_DIR', default=None)

print(f"Working Directory: {working_directory}")
print(f"Data Directory: {data_directory}")
print(f"References Directory: {references_directory}")
print(f"Results Directory: {results_directory}")
print(f"Utils Directory: {utils_directory}")

os.chdir(working_directory)
print(f"The current directory is {os.getcwd()}")

Working Directory: /home/lakishadavid/computational_genetic_genealogy
Data Directory: /home/lakishadavid/computational_genetic_genealogy/data
References Directory: /home/lakishadavid/computational_genetic_genealogy/references
Results Directory: /home/lakishadavid/computational_genetic_genealogy/results
Utils Directory: /home/lakishadavid/computational_genetic_genealogy/utils
The current directory is /home/lakishadavid/computational_genetic_genealogy


In [8]:
def configure_logging(log_filename, log_file_debug_level="INFO", console_debug_level="INFO"):
    """
    Configure logging for both file and console handlers.

    Args:
        log_filename (str): Path to the log file where logs will be written.
        log_file_debug_level (str): Logging level for the file handler.
        console_debug_level (str): Logging level for the console handler.
    """
    # Create a root logger
    logger = logging.getLogger()
    logger.setLevel(logging.DEBUG)  # Capture all messages at the root level

    # Convert level names to numeric levels
    file_level = getattr(logging, log_file_debug_level.upper(), logging.INFO)
    console_level = getattr(logging, console_debug_level.upper(), logging.INFO)

    # File handler: Logs messages at file_level and above to the file
    file_handler = logging.FileHandler(log_filename)
    file_handler.setLevel(file_level)
    file_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    file_handler.setFormatter(file_formatter)

    # Console handler: Logs messages at console_level and above to the console
    console_handler = logging.StreamHandler(sys.stdout)
    console_handler.setLevel(console_level)
    console_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
    console_handler.setFormatter(console_formatter)

    # Add handlers to the root logger
    logger.addHandler(file_handler)
    logger.addHandler(console_handler)
    
def clear_logger():
    """Remove all handlers from the root logger."""
    logger = logging.getLogger()
    for handler in logger.handlers[:]:
        logger.removeHandler(handler)

In [5]:
log_filename = os.path.join(results_directory, "lab5_log.txt")
print(f"The Lab 5 log file is located at {log_filename}.")

# Ensure the results_directory exists
if not os.path.exists(results_directory):
    os.makedirs(results_directory)

# Check if the file exists; if not, create it
if not os.path.exists(log_filename):
    with open(log_filename, 'w') as file:
        pass  # The file is now created.

The Lab 5 log file is located at /home/lakishadavid/computational_genetic_genealogy/results/lab5_log.txt.


In [6]:
clear_logger() # Clear the logger before reconfiguring it
configure_logging(log_filename, log_file_debug_level="INFO", console_debug_level="INFO")

In [9]:
# Define the default data directory path.
default_data_directory = os.path.expanduser("~/computational_genetic_genealogy/data")

# Expand the current data_directory from the environment.
expanded_data_directory = os.path.abspath(os.path.expanduser(data_directory))

# Check if the data_directory is not the default.
if expanded_data_directory != os.path.abspath(default_data_directory):
    print(f"Data directory ({expanded_data_directory}) is not the default ({default_data_directory}). Running data copy block...")

    # Define the source directory (assumed to be 'Data' in the project root)
    data_source = os.path.join(project_root, 'Data')

    if not os.path.exists(data_source):
        print(f"Warning: The data source '{data_source}' does not exist.")
    else:
        # Ensure the destination data directory exists; create it if not.
        if not os.path.exists(expanded_data_directory):
            os.makedirs(expanded_data_directory)
            print(f"Created data directory at: {expanded_data_directory}")
        
        # Define the target path where 'Data' will be copied.
        target_dir = os.path.join(expanded_data_directory, os.path.basename(data_source))
        
        try:
            # Copy the entire 'Data' directory to the target location.
            # dirs_exist_ok=True allows merging if the target exists (requires Python 3.8+).
            shutil.copytree(data_source, target_dir, dirs_exist_ok=True)
            print(f"Successfully copied data from '{data_source}' to '{target_dir}'.")
        except Exception as e:
            print(f"Error copying data directory: {e}")
else:
    print("Data directory is the default; skipping data copy block.")


Data directory is the default; skipping data copy block.
