<a href="https://colab.research.google.com/github/pulipulichen/colab-File-Browser/blob/main/colab/colab-File-Browser.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **colab-File-Browser**

Instruction: https://sites.google.com/view/howtousecolab-colab-20250202-2


# Script

## Configurations for script

In [185]:
PROJECT_OWNER="pulipulichen"
PROJECT_NAME="colab-File-Browser"

FORCE_RUN_IN_FOREGROUND=True

### Testing Instruction

1. `Runtime` > `Run all` (Ctrl + F9)
2. Wait for the Cloudflare URL to be accessible.

## Core Tools

### Install Packages that need restart the session.

In [186]:
# Install packages that will trigger a session restart first.
!pip install CherryPy==18.8.0



### curl_local_url()

In [187]:
import os
import time
import subprocess

def curl_local_url(port):
  while True:
    try:
      output = subprocess.check_output(['curl', '-s', 'http://127.0.0.1:' + str(port) + '/']).decode('utf-8')
      print("Success to connect http://127.0.0.1:" + str(port) + ' ' + get_current_datestring())
      break  # Break out of the loop if successful
    except subprocess.CalledProcessError:
      # print("Check http://127.0.0.1:" + str(port) + "/ failed. Retrying in 5 seconds...")
      time.sleep(5)
      continue  # Retry in case of failure

### wait_for_cloudflare()

In [188]:
import os
import time

def wait_for_cloudflare():
  file_path = f'/content/docker-app/{PROJECT_NAME}/.cloudflare.url'

  while not os.path.exists(file_path):
    time.sleep(3)  # Check every 1 second

  # File found, read and print its contents
  # with open(file_path, 'r') as file:
  #   content = file.read()
  #   print(f"Public URL: {content}")


### wait_for_docker_web_ready()

In [189]:
import os
import time

def wait_for_docker_web_ready():
  print('\nWaiting for docker web ready...' + get_current_datestring())
  file_path = f'/content/docker-app/{PROJECT_NAME}/.docker-web.ready'

  while not os.path.exists(file_path):
    time.sleep(3)  # Check every 1 second

  # File found, read and print its contents
  # with open(file_path, 'r') as file:
  #   content = file.read()
  #   print(f"Public URL: {content}")

  time.sleep(10)

### keep_waiting()

In [190]:
import time

def keep_waiting():
  while True:
    time.sleep(1)  # Check every 1 second


### mountGDrive()

In [191]:
import os
from subprocess import getoutput
from google.colab import drive
import subprocess

try:
    GDRIVE_PATH
except NameError:
    GDRIVE_PATH = "/colab/" + PROJECT_NAME

def mountGDrive():
  if os.path.exists("/google-drive/MyDrive"):
    return True

  if not os.path.exists("/google-drive"):
    drive.mount('/google-drive')  # access drive

  TEMP_GDRIVE_PATH = GDRIVE_PATH

  if not TEMP_GDRIVE_PATH.startswith("/"):
    TEMP_GDRIVE_PATH = "/" + TEMP_GDRIVE_PATH
  if not TEMP_GDRIVE_PATH.endswith("/"):
    TEMP_GDRIVE_PATH = TEMP_GDRIVE_PATH + "/"

  folder_path = "/google-drive/MyDrive/docker-app" + TEMP_GDRIVE_PATH
  if not os.path.exists(folder_path):
    os.makedirs(folder_path)

  target_folder_path = "/content/docker-app/"
  if not os.path.exists(target_folder_path):
    os.makedirs(target_folder_path)

  command = ["ln", "-s", folder_path, target_folder_path]
  subprocess.run(command, check=True)

### clearDockerData()

In [192]:
import os
from subprocess import getoutput
import subprocess

def clearDockerData():
  stop_udocker()
  command = ["rm", "-rf", "/content/docker-app/" + PROJECT_NAME]
  subprocess.run(command, check=True)


### get_config_from_yaml()

In [193]:
import yaml
import time

def get_config_from_yaml():
  yaml_file_path = f"./.docker-app/{PROJECT_NAME}/app-build/docker-compose-template.yml"

  with open(yaml_file_path, "r") as file:
    yaml_data = yaml.safe_load(file)
  # print(yaml_data)

  # Assuming there is only one service named "app" in your YAML
  webapp_ports = yaml_data.get("services", {}).get("app", {}).get("ports", [])
  WEBAPP_PORT = None

  for port_mapping in webapp_ports:
      # Check if the mapping is in the format "HOST_PORT:CONTAINER_PORT"
      if isinstance(port_mapping, str) and ":" in port_mapping:
        # print(port_mapping)
        host_port, container_port = port_mapping.split(":")
        WEBAPP_PORT = host_port
        break

  # ============
  environments = yaml_data.get("services", {}).get("app", {}).get("environment", [])

  # ============
  RUN_COMMAND = yaml_data.get("services", {}).get("app", {}).get("command", [])
  if RUN_COMMAND is not None:
    RUN_COMMAND = " ".join(RUN_COMMAND)

  # ============

  volumes = yaml_data.get("services", {}).get("app", {}).get("volumes", [])
  LOCAL_VOLUMN_PATH = '/data'

  for volumes_mapping in volumes:
      # Check if the mapping is in the format "HOST_PORT:CONTAINER_PORT"
      if isinstance(volumes_mapping, str) and ":" in port_mapping:
        # print(port_mapping)
        host_path, container_path = volumes_mapping.split(":")
        LOCAL_VOLUMN_PATH = container_path
        break

  # ============
  deploy = yaml_data.get("services", {}).get("app", {}).get("deploy", [])
  GPU_ENABLE = False
  if 'resources' in deploy:
    GPU_ENABLE = True

  # ============
  app = yaml_data.get("services", {}).get("app", {})
  # print(app)
  IMAGE_NAME = app['image']

  return {
    "WEBAPP_PORT": WEBAPP_PORT,
    "LOCAL_VOLUMN_PATH": LOCAL_VOLUMN_PATH,
    "IMAGE_NAME": IMAGE_NAME,
    "RUN_COMMAND": RUN_COMMAND,
    "GPU_ENABLE": GPU_ENABLE
  }


#### get_environments_from_yaml()

In [194]:
def get_environments_from_yaml(environments, key, default_value = None):

  for env_mapping in environments:
      # Check if the mapping is in the format "HOST_PORT:CONTAINER_PORT"
      if isinstance(env_mapping, str) and key + "=" in env_mapping:
        # print(port_mapping)
        key, value = env_mapping.split("=")
        default_value = value
        break

  return default_value

### get_current_datestring()

In [195]:
from datetime import datetime

def get_current_datestring():
  return datetime.now().strftime("%Y-%m-%d %H:%M:%S")

### show_interval_time()

In [196]:
from datetime import datetime

def show_interval_time():
  ready_time = datetime.now()

  # Calculate the difference in minutes
  interval_minutes = round((ready_time - START_TIME).total_seconds() / 60)
  print(f"Minutes spent in the startup process: {interval_minutes}")

  # print('\nPreparing link... ' + ready_time.strftime("%Y-%m-%d %H:%M:%S"))


## udocker

### stop_udocker()

In [197]:
def stop_udocker():
  #!udocker --allow-root ps
  print('Stop udocker...' + get_current_datestring())
  !udocker --allow-root ps | awk 'NR > 1 {print $1}' | xargs -I {} udocker --allow-root rm {}

  print('Kill Java...' + get_current_datestring())
  !pgrep java && pkill java && sleep 5


### run_udocker()

In [198]:
def run_udocker():
  # WEBAPP_PORT, LOCAL_VOLUMN_PATH, IMAGE_NAME, RUN_COMMAND = get_config_from_yaml()
  YAML_CONFIG = get_config_from_yaml()

  # !rm -f /content/docker-app/$PROJECT_NAME/.docker-web.ready || true
  # !rm -f /content/docker-app/$PROJECT_NAME/.cloudflare.url || true
  ![ -e /content/docker-app/$PROJECT_NAME/.cloudflare.url ] && rm /content/docker-app/$PROJECT_NAME/.cloudflare.url
  ![ -e /content/docker-app/$PROJECT_NAME/.docker-web.ready ] && rm /content/docker-app/$PROJECT_NAME/.docker-web.ready

  RUN_IN_BACKGROUND = True
  if YAML_CONFIG["WEBAPP_PORT"] is None:
    RUN_IN_BACKGROUND = False

  # For testing purposes
  if FORCE_RUN_IN_FOREGROUND is True:
    RUN_IN_BACKGROUND = False

  if RUN_IN_BACKGROUND is False:
    run_udocker_in_foreground()
  else:
    run_udocker_in_background()


### run_udocker_in_foreground()

In [199]:
import asyncio

def run_udocker_in_foreground():

  # WEBAPP_PORT, LOCAL_VOLUMN_PATH, IMAGE_NAME, RUN_COMMAND = get_config_from_yaml()
  YAML_CONFIG = get_config_from_yaml()

  print('\nRun container in foreground...' + get_current_datestring())

  # print(LOCAL_VOLUMN_PATH)
  # print(RUN_COMMAND)

  !rm -rf /content/docker-app/$PROJECT_NAME
  !mkdir -p /content/docker-app/$PROJECT_NAME

  # asyncio.run(setup_cloudflared())

  # =======

  # show_interval_time()

  # =======

  udocker_command = get_udocker_run_command()
  !$udocker_command

### run_udocker_in_background()

In [200]:
# 會觸發Session Restart
!pip install CherryPy==18.8.0

import cherrypy

def run_udocker_in_background():


  stop_udocker()

  !mkdir -p /content/docker-app/$PROJECT_NAME

  # WEBAPP_PORT, LOCAL_VOLUMN_PATH, IMAGE_NAME, RUN_COMMAND = get_config_from_yaml()
  YAML_CONFIG = get_config_from_yaml()

  !rm -rf ./*nohup.out
  !rm -rf ./docker-app/$PROJECT_NAME/.cloudflare.url

  print('\nRun container in background...' + get_current_datestring())

  ![ -e /content/.docker-app/udocker.sh ] && rm /content/.docker-app/udocker.sh
  udocker_command = get_udocker_run_command()
  !echo "nohup $udocker_command > /content/.docker-app/.nohup.out 2>&1 &" >> /content/.docker-app/udocker.sh
  !chmod +x /content/.docker-app/udocker.sh
  !bash /content/.docker-app/udocker.sh

  cherrypy.config.update({'server.socket_host': '0.0.0.0','server.socket_port' : int(YAML_CONFIG["WEBAPP_PORT"])})
  # wait_for_cloudflare()
  curl_local_url(YAML_CONFIG["WEBAPP_PORT"])
  wait_for_docker_web_ready()

  # =======

  show_interval_time()

  # =======

  # setup_cloudflared()

  keep_waiting()



### get_udocker_run_command()

In [201]:
!pip install udocker==1.3.10
!udocker --allow-root install
import re

def get_udocker_run_command():
  YAML_CONFIG = get_config_from_yaml()

  command = ''

  IMAGE_NAME = YAML_CONFIG["IMAGE_NAME"]
  NAME_SPACE = re.sub(r'[/\-:.]', '_', IMAGE_NAME)

  print('\nPulling the image ' + IMAGE_NAME + '...' + get_current_datestring())
  !udocker --allow-root pull $IMAGE_NAME

  print('\nCreate the name space for ' + IMAGE_NAME + ' ...' + get_current_datestring())
  !udocker --allow-root create --name=$NAME_SPACE $IMAGE_NAME

  if YAML_CONFIG["GPU_ENABLE"] == True:

    print('\nSetup nvidia for the name space sdw...' + get_current_datestring())
    !udocker --allow-root setup --nvidia --force $NAME_SPACE

  command = "udocker --allow-root run " + \
    "-p " + YAML_CONFIG["WEBAPP_PORT"] + ":" + YAML_CONFIG["WEBAPP_PORT"] + \
    " " + \
    "--volume=/content/docker-app/" + PROJECT_NAME + ":" + YAML_CONFIG["LOCAL_VOLUMN_PATH"] + \
    " " + \
    NAME_SPACE

  print(command)

  return command



### setup_cloudflared()

In [202]:
import time
import re

def setup_cloudflared():
  YAML_CONFIG = get_config_from_yaml()

  print('\nWaiting for server ready... ' + get_current_datestring())
  curl_local_url(YAML_CONFIG["WEBAPP_PORT"])
  wait_for_docker_web_ready()

  if os.path.isfile('/content/.docker-app/.cloudflared') is False:
    !wget https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64 -O /content/.docker-app/.cloudflared
    !chmod +x /content/.docker-app/.cloudflared

  #!cat /content/docker-app/docker-web-Apache-Solr/solrconfig.xml | grep dc.publisher
  cloudflared_command = '/content/.docker-app/.cloudflared --url "http://127.0.0.1:' + YAML_CONFIG["WEBAPP_PORT"] + '"'


  !echo "nohup $cloudflared_command > /content/.docker-app/.cloudflared.nohup.out 2>&1 &" >> /content/.docker-app/cloudflared.sh
  !chmod +x /content/.docker-app/cloudflared.sh
  !bash /content/.docker-app/cloudflared.sh

  time.sleep(10)

  file_path = "/content/.docker-app/.cloudflared.nohup.out"
  cloudflare_url = None

  while cloudflare_url is None:
      try:
          with open(file_path, "r") as file:
              content = file.read()
              match = re.search(r"https?://[\w.-]+\.trycloudflare\.com", content)
              if match:
                  cloudflare_url = match.group(0)
                  # print(f"Found Cloudflare URL: {cloudflare_url} " + get_current_datestring())
                  break
      except FileNotFoundError:
          print("File not found. Retrying in 10 seconds...")
      except Exception as e:
          print(f"Error reading file: {e}")

      time.sleep(10)

  print('\n============================================================================')
  show_interval_time()
  print('Public URL: ' + cloudflare_url)
  # print('')
  # print('You still need to wait for the subsequent execution to succeed before the URL displays the correct content.')
  print('============================================================================')

#### run_setup_cloudflared()

In [203]:
import asyncio

async def run_setup_cloudflared():
    print("[INFO] Starting Cloudflared setup...")
    loop = asyncio.get_running_loop()
    print("[INFO] Starting Cloudflared setup... ok?")
    await loop.run_in_executor(None, setup_cloudflared)

### setup_git_project()

In [204]:
def setup_git_project():
  if os.path.isdir('/content/.docker-app/') is False:
    !mkdir -p ./.docker-app

  if os.path.isdir('/content/.docker-app/' + PROJECT_NAME) is False:
    print('Try to initialize project...' + get_current_datestring())

    %cd /content/.docker-app
    !git clone "https://github.com/{PROJECT_OWNER}/{PROJECT_NAME}.git"
    !git config --global pull.rebase false
    !git config --global user.email "blog@pulipuli.info"
    !git config --global user.name "Pulipuli Chen"
  else:
    print('Try to update project...' + get_current_datestring())

    %cd /content/.docker-app/$PROJECT_NAME
    !git reset --hard
    !git pull --force

  %cd /content/

## main() (must be the last)

In [205]:
%cd /content
if os.path.isdir('/content/sample_data'):
  !rm -rf ./sample_data*

# ==================

from datetime import datetime
START_TIME = datetime.now()

def main():
  setup_git_project()

  asyncio.create_task(run_setup_cloudflared())

  START_TIME = datetime.now()
  print('\nPreparing environment... ' + START_TIME.strftime("%Y-%m-%d %H:%M:%S"))

  # mountGDrive() # 開啟Google Drive掛載功能
  run_udocker()

/content


In [206]:
# For Testing purposes

#clearDockerData()

# **Runtime**

In [207]:
# mountGDrive() # 開啟Google Drive掛載功能
main()

Try to update project...2025-02-03 17:50:35
/content/.docker-app/colab-File-Browser
HEAD is now at 4ca50f8 Created using Colab
Already up to date.
/content

Preparing environment... 2025-02-03 17:50:36

Run container in foreground...2025-02-03 17:50:36

Pulling the image pudding/docker-web:colab-file-browser-20250203.210626...2025-02-03 17:50:37
Info: downloading layer sha256:81463de3d3e40def8a8deebfdb5df4131a0c386dcfc1f5b2ba35ac29528ab486
Info: downloading layer sha256:4f4fb700ef54461cfa02571ae0db9a0dc1e0cdb5577484a6d75e68dc38e8acc1
Info: downloading layer sha256:22a000724d336b2928d092eb5c6706338a1ec497f1c65457557fefb5d8a3a79b
Info: downloading layer sha256:765ac7573f365d1e418b43cdb31f052323c85faa2a0dd0ddc4d5c1fbcfe726bd
Info: downloading layer sha256:54f006d8f037975ea822f6863e64d2002efd6482498ff3bd5e9c536dde024ae0
Info: downloading layer sha256:118df6eacbe30c327bac9be3b223aaccbcfe10a43285692411a4742295cbfa8a
Info: downloading layer sha256:4c4be17cfc167ba19a8e2e30ea1ea3ef3af27f923ae7f