In [None]:
import os
import sys
import base64
from kaggle_secrets import UserSecretsClient

# --- 1. Retrieve Secrets ---
print("--- 1. Retrieving secrets from Kaggle Secret Manager ---")
user_secrets = UserSecretsClient()

try:
    git_pat = user_secrets.get_secret("GITHUB_PAT")
    git_key_b64 = user_secrets.get_secret("GIT_DEPLOY_KEY_B64")
    gdrive_creds_b64 = user_secrets.get_secret("GDRIVE_CREDENTIALS_DATA_B64")
    print("Successfully retrieved secrets.")
except Exception as e:
    print(f"ERROR: Could not retrieve secrets. Details: {e}")
    raise

# --- DEBUGGING STEP ---
if not gdrive_creds_b64 or len(gdrive_creds_b64) < 10:
    print("ERROR: GDRIVE_CREDENTIALS_DATA_B64 secret appears to be empty or invalid!")
    raise ValueError("GDRIVE_CREDENTIALS_DATA_B64 secret is empty!")
if not git_pat:
    raise ValueError("GITHUB_PAT secret is empty!")
# --- 2. Set up SSH for Pushing (Optional but good for the script later) ---
# Although we clone with PAT, the deploy key is more secure for pushing back.
print("\n--- Configuring SSH for Git push ---")
git_key_b64 = user_secrets.get_secret("GIT_DEPLOY_KEY_B64")
ssh_dir = "/root/.ssh"
os.makedirs(ssh_dir, exist_ok=True)
key_path = os.path.join(ssh_dir, "id_ed25519")
with open(key_path, "w") as f:
    f.write(base64.b64decode(git_key_b64).decode())
os.chmod(key_path, 0o600)
!ssh-keyscan github.com >> /root/.ssh/known_hosts

# --- 3. Clone the Private Repository using HTTPS & PAT ---
print("\n--- Cloning the private project repository ---")
GIT_USERNAME = "labyedh"
GIT_REPO = "Alzheimer_classification_mlops"
PROJECT_DIR = GIT_REPO

# This is the HTTPS URL format for cloning with a PAT
GIT_URL_HTTPS = f"https://{git_pat}@github.com/{GIT_USERNAME}/{GIT_REPO}.git"

# Clean up previous clone if it exists, then clone the latest version
!rm -rf {PROJECT_DIR}
!git clone {GIT_URL_HTTPS}

# --- 4. Write DVC Credentials into the CLONED project folder ---
print("\n--- Writing DVC Credentials ---")
creds_path = os.path.join(PROJECT_DIR, "gdrive-credentials.json")
with open(creds_path, "w") as f:
    f.write(base64.b64decode(gdrive_creds_b64).decode())

print("\n--- Setup Cell Finished ---")

In [None]:
# ===================================================================
# CELL 2: EXECUTE PIPELINE FROM WITHIN PROJECT DIRECTORY
# ===================================================================
import os
import sys

PROJECT_DIR = "Alzheimer_classification_mlops"

# --- 5. Change into the project directory (CRUCIAL STEP) ---
%cd {PROJECT_DIR}

# Add the project's root directory to Python's path so 'src' imports work
sys.path.append(os.getcwd())

# --- 6. Make the pipeline script executable and run it ---
script_path = "run_pipeline.sh"
if os.path.exists(script_path):
    print(f"\n--- Found script at {os.getcwd()}/{script_path}. Executing... ---")
    !chmod +x {script_path}
    !./{script_path}
else:
    print(f"\n--- ERROR: {script_path} not found in the repository! ---")
    raise FileNotFoundError(f"{script_path} is missing from the Git repository.")