From fcfb56c32ef34629d459535f3db6607b540a9fc2 Mon Sep 17 00:00:00 2001 From: luutuankiet Date: Tue, 28 May 2024 07:00:19 +0000 Subject: [PATCH 1/8] init setup for devcon to match python version --- .devcontainer/devcontainer.json | 144 +++++++++++++++++++++++++++++ .devcontainer/postCreateCommand.sh | 75 +++++++++++++++ .github/dependabot.yml | 12 +++ env_init.sh | 7 ++ source_env.sh | 3 + 5 files changed, 241 insertions(+) create mode 100644 .devcontainer/devcontainer.json create mode 100755 .devcontainer/postCreateCommand.sh create mode 100644 .github/dependabot.yml create mode 100755 env_init.sh create mode 100755 source_env.sh diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..1a2ddcd --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,144 @@ +// For format details, see https://aka.ms/devcontainer.json. For config options, see the +// README at: https://github.com/devcontainers/templates/tree/main/src/python +{ + "name": "Python 3", + // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile + "image": "mcr.microsoft.com/devcontainers/python:1-3.9-bookworm", + "runArgs": ["--name","spark-Learn"], + "remoteUser": "root", + "postCreateCommand": "bash -i .devcontainer/postCreateCommand.sh", + "features": { + "ghcr.io/devcontainers/features/docker-in-docker:2": { + "moby": "false", + "azureDnsAutoDetection": "false", + "installDockerBuildx": "false", + "installDockerComposeSwitch": "false", + "version": "latest", + "dockerDashComposeVersion": "latest" + }, + "customizations": { + "vscode": { + "extensions": [ + "bastienboutonnet.vscode-dbt", + "chrisdias.vscode-opennewinstance", + "cweijan.vscode-office", + "dvirtz.parquet-viewer", + "henriblancke.vscode-dbt-formatter", + "innoverio.vscode-dbt-power-user", + "mechatroner.rainbow-csv", + "mhutchie.git-graph", + "ms-python.debugpy", + "ms-python.python", + "ms-python.vscode-pylance", + "ms-toolsai.jupyter", + "ms-toolsai.jupyter-keymap", + "ms-toolsai.jupyter-renderers", + "ms-toolsai.vscode-jupyter-cell-tags", + "ms-toolsai.vscode-jupyter-slideshow", + "mtxr.sqltools", + "mtxr.sqltools-driver-pg", + "mtxr.sqltools-driver-sqlite", + "mutantdino.resourcemonitor", + "randomfractalsinc.duckdb-sql-tools", + "samuelcolvin.jinjahtml", + "sourcegraph.cody-ai", + "taoklerks.poor-mans-t-sql-formatter-vscode", + "uloco.theme-bluloco-dark", + "visualstudioexptteam.intellicode-api-usage-examples", + "visualstudioexptteam.vscodeintellicode", + "ms-azuretools.vscode-docker", + "redhat.vscode-yaml", + "esbenp.prettier-vscode" + + ], + "settings": { + "terminal.integrated.env.osx": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}/data-processing-spark" + }, + "terminal.integrated.env.linux": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}/data-processing-spark" + }, + "terminal.integrated.env.windows": { + "PYTHONPATH": "${env:PYTHONPATH};${workspaceFolder}/data-processing-spark" + }, + "python.defaultInterpreterPath": ".venv/bin/python", + "python.venvFolders": [ + "${workspaceFolder}/.venv" + ], + "python.envFile": "${workspaceFolder}/.env", + "python.terminal.activateEnvironment": true, + "findrelated.workspaceRulesets": [ + { + "name": "sql", + "rules": [ + { + "pattern": "^(.*/)?models/(.*/)?(.+\\.sql)$", + "locators": [ + "**/compiled/**/$3" + ] + }, + { + "pattern": "^(.*/)?compiled/(.*/)?(.+\\.sql)$", + "locators": [ + "**/run/**/$3" + ] + }, + { + "pattern": "^(.*/)?run/(.*/)?(.+\\.sql)$", + "locators": [ + "**/models/**/$3" + ] + } + ] + } + ], + "findrelated.applyRulesets": [ + "sql" + ], + "findrelated.applyWorkspaceRulesets": [ + "sql" + ], + "workbench.editor.highlightModifiedTabs": true, + "workbench.editor.labelFormat": "medium", + "workbench.editor.revealIfOpen": true, + "editor.rulers": [ + 99 + ], + "yaml.schemas": { + "https://raw.githubusercontent.com/dbt-labs/dbt-jsonschema/main/schemas/latest/dbt_yml_files-latest.json": [ + "/**/*.yml", + "!profiles.yml", + "!dbt_project.yml", + "!packages.yml", + "!selectors.yml", + "!profile_template.yml" + ], + "https://raw.githubusercontent.com/dbt-labs/dbt-jsonschema/main/schemas/latest/dbt_project-latest.json": [ + "dbt_project.yml" + ], + "https://raw.githubusercontent.com/dbt-labs/dbt-jsonschema/main/schemas/latest/selectors-latest.json": [ + "selectors.yml" + ], + "https://raw.githubusercontent.com/dbt-labs/dbt-jsonschema/main/schemas/latest/packages-latest.json": [ + "packages.yml" + ] + } + } + } + } + } + // Features to add to the dev container. More info: https://containers.dev/features. + // "features": {}, + + // Use 'forwardPorts' to make a list of ports inside the container available locally. + // "forwardPorts": [], + + // Use 'postCreateCommand' to run commands after the container is created. + // "postCreateCommand": "pip3 install --user -r requirements.txt", + + // Configure tool-specific properties. + // "customizations": {}, + + // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. + // "remoteUser": "root" +} diff --git a/.devcontainer/postCreateCommand.sh b/.devcontainer/postCreateCommand.sh new file mode 100755 index 0000000..a2b5341 --- /dev/null +++ b/.devcontainer/postCreateCommand.sh @@ -0,0 +1,75 @@ +#!/bin/bash +# Function to print a section header +print_section() { + local section_title=$1 + echo + echo "=================================" + echo "=================================" + echo "=================================" + echo " $section_title" + echo "=================================" + echo "=================================" + echo "=================================" + echo +} + + + + +##### install npm +print_section "INSTALL NPM" + +# installs nvm (Node Version Manager) +curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash + +# download and install Node.js +nvm install 20 + +# verifies the right Node.js version is in the environment +node -v # should print `v20.13.1` + +# verifies the right NPM version is in the environment +npm -v # should print `10.5.2` + + +# deps for sqltools - duckdb driver +npm install duckdb-async@0.9.2 + + + +### sets up zsh terminal +print_section "SETUP ZSH TERMINAL" +curl -o- https://gist.githubusercontent.com/luutuankiet/fbb70fca0f7f948c4e102442d76c363e/raw/boilerplate-dev-env | bash + + +#### sets up python +print_section "SETUP PYTHON" +apt-get update && apt-get install -y python3-venv + +# init then source env vars +print_section "INIT & SOURCE ENV VARS" +chmod +x ./env_init.sh +chmod +x ./source_env.sh + +. ./env_init.sh +source source_env.sh + + + +##### TODO: uncomment this for a true rebuild from scratch. currenlty broken due to packages deps in requirements file. +# create env +print_section "CREATE VENV & INSTALL REQUIREMENTS" +python3 -m venv --clear $VIRTUAL_ENV + +# # add virt env to PATH which allows the next part of script to install packages directly to venv +# export PATH="$VIRTUAL_ENV/bin:$PATH" + +# # install reqs. each lines is a separate process hence neeeds a source .venv in front +source .venv/bin/activate && \ +pip install -r data-processing-spark/1-lab-setup/containers/spark/requirements.txt && \ + + +# fix for deactivate script : https://github.com/microsoft/vscode-python/wiki/Fixing-%22deactivate%22-command-for-Virtual-Environments +ENV_WORK_DIR=$(pwd) +curl -o $ENV_WORK_DIR/deactivate https://gist.githubusercontent.com/karrtikr/963469ba74c9b7632d2c43224ffa2f25/raw/deactivate +echo "source $ENV_WORK_DIR/deactivate" >> ~/.zshrc \ No newline at end of file diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..f33a02c --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,12 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for more information: +# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates +# https://containers.dev/guide/dependabot + +version: 2 +updates: + - package-ecosystem: "devcontainers" + directory: "/" + schedule: + interval: weekly diff --git a/env_init.sh b/env_init.sh new file mode 100755 index 0000000..10a990d --- /dev/null +++ b/env_init.sh @@ -0,0 +1,7 @@ +# parse dir +ENV_WORK_DIR=$(pwd) + +cat < .env +VIRTUAL_ENV="$ENV_WORK_DIR/.venv" +PYTHONPATH="$ENV_WORK_DIR/data-processing-spark" +EOF \ No newline at end of file diff --git a/source_env.sh b/source_env.sh new file mode 100755 index 0000000..2253d5b --- /dev/null +++ b/source_env.sh @@ -0,0 +1,3 @@ +set -a # Automatically export all variables +source .env +set +a # Stop automatically exporting variables \ No newline at end of file From adc9c95e7eeff13a3920ac19f1358fa2fb9f1a51 Mon Sep 17 00:00:00 2001 From: luutuankiet Date: Tue, 28 May 2024 07:20:34 +0000 Subject: [PATCH 2/8] working setup that the host can correctly parse python code --- .devcontainer/devcontainer.json | 180 +++++++++-------------------- .devcontainer/postCreateCommand.sh | 11 +- .env | 2 + .gitignore | 4 + deactivate | 37 ++++++ 5 files changed, 109 insertions(+), 125 deletions(-) create mode 100644 .env create mode 100644 deactivate diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 1a2ddcd..e9b63f8 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -4,7 +4,10 @@ "name": "Python 3", // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile "image": "mcr.microsoft.com/devcontainers/python:1-3.9-bookworm", - "runArgs": ["--name","spark-Learn"], + "runArgs": [ + "--name", + "spark-Learn" + ], "remoteUser": "root", "postCreateCommand": "bash -i .devcontainer/postCreateCommand.sh", "features": { @@ -15,130 +18,59 @@ "installDockerComposeSwitch": "false", "version": "latest", "dockerDashComposeVersion": "latest" - }, - "customizations": { - "vscode": { - "extensions": [ - "bastienboutonnet.vscode-dbt", - "chrisdias.vscode-opennewinstance", - "cweijan.vscode-office", - "dvirtz.parquet-viewer", - "henriblancke.vscode-dbt-formatter", - "innoverio.vscode-dbt-power-user", - "mechatroner.rainbow-csv", - "mhutchie.git-graph", - "ms-python.debugpy", - "ms-python.python", - "ms-python.vscode-pylance", - "ms-toolsai.jupyter", - "ms-toolsai.jupyter-keymap", - "ms-toolsai.jupyter-renderers", - "ms-toolsai.vscode-jupyter-cell-tags", - "ms-toolsai.vscode-jupyter-slideshow", - "mtxr.sqltools", - "mtxr.sqltools-driver-pg", - "mtxr.sqltools-driver-sqlite", - "mutantdino.resourcemonitor", - "randomfractalsinc.duckdb-sql-tools", - "samuelcolvin.jinjahtml", - "sourcegraph.cody-ai", - "taoklerks.poor-mans-t-sql-formatter-vscode", - "uloco.theme-bluloco-dark", - "visualstudioexptteam.intellicode-api-usage-examples", - "visualstudioexptteam.vscodeintellicode", - "ms-azuretools.vscode-docker", - "redhat.vscode-yaml", - "esbenp.prettier-vscode" - + } + }, + "customizations": { + "vscode": { + "extensions": [ + "bastienboutonnet.vscode-dbt", + "chrisdias.vscode-opennewinstance", + "cweijan.vscode-office", + "dvirtz.parquet-viewer", + "henriblancke.vscode-dbt-formatter", + "innoverio.vscode-dbt-power-user", + "mechatroner.rainbow-csv", + "mhutchie.git-graph", + "ms-python.debugpy", + "ms-python.python", + "ms-python.vscode-pylance", + "ms-toolsai.jupyter", + "ms-toolsai.jupyter-keymap", + "ms-toolsai.jupyter-renderers", + "ms-toolsai.vscode-jupyter-cell-tags", + "ms-toolsai.vscode-jupyter-slideshow", + "mtxr.sqltools", + "mtxr.sqltools-driver-pg", + "mtxr.sqltools-driver-sqlite", + "mutantdino.resourcemonitor", + "randomfractalsinc.duckdb-sql-tools", + "samuelcolvin.jinjahtml", + "sourcegraph.cody-ai", + "taoklerks.poor-mans-t-sql-formatter-vscode", + "uloco.theme-bluloco-dark", + "visualstudioexptteam.intellicode-api-usage-examples", + "visualstudioexptteam.vscodeintellicode", + "ms-azuretools.vscode-docker", + "redhat.vscode-yaml", + "esbenp.prettier-vscode" + ], + "settings": { + "terminal.integrated.env.osx": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}/data-processing-spark" + }, + "terminal.integrated.env.linux": { + "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}/data-processing-spark" + }, + "terminal.integrated.env.windows": { + "PYTHONPATH": "${env:PYTHONPATH};${workspaceFolder}/data-processing-spark" + }, + "python.defaultInterpreterPath": ".venv/bin/python", + "python.venvFolders": [ + "${workspaceFolder}/.venv" ], - "settings": { - "terminal.integrated.env.osx": { - "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}/data-processing-spark" - }, - "terminal.integrated.env.linux": { - "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}/data-processing-spark" - }, - "terminal.integrated.env.windows": { - "PYTHONPATH": "${env:PYTHONPATH};${workspaceFolder}/data-processing-spark" - }, - "python.defaultInterpreterPath": ".venv/bin/python", - "python.venvFolders": [ - "${workspaceFolder}/.venv" - ], - "python.envFile": "${workspaceFolder}/.env", - "python.terminal.activateEnvironment": true, - "findrelated.workspaceRulesets": [ - { - "name": "sql", - "rules": [ - { - "pattern": "^(.*/)?models/(.*/)?(.+\\.sql)$", - "locators": [ - "**/compiled/**/$3" - ] - }, - { - "pattern": "^(.*/)?compiled/(.*/)?(.+\\.sql)$", - "locators": [ - "**/run/**/$3" - ] - }, - { - "pattern": "^(.*/)?run/(.*/)?(.+\\.sql)$", - "locators": [ - "**/models/**/$3" - ] - } - ] - } - ], - "findrelated.applyRulesets": [ - "sql" - ], - "findrelated.applyWorkspaceRulesets": [ - "sql" - ], - "workbench.editor.highlightModifiedTabs": true, - "workbench.editor.labelFormat": "medium", - "workbench.editor.revealIfOpen": true, - "editor.rulers": [ - 99 - ], - "yaml.schemas": { - "https://raw.githubusercontent.com/dbt-labs/dbt-jsonschema/main/schemas/latest/dbt_yml_files-latest.json": [ - "/**/*.yml", - "!profiles.yml", - "!dbt_project.yml", - "!packages.yml", - "!selectors.yml", - "!profile_template.yml" - ], - "https://raw.githubusercontent.com/dbt-labs/dbt-jsonschema/main/schemas/latest/dbt_project-latest.json": [ - "dbt_project.yml" - ], - "https://raw.githubusercontent.com/dbt-labs/dbt-jsonschema/main/schemas/latest/selectors-latest.json": [ - "selectors.yml" - ], - "https://raw.githubusercontent.com/dbt-labs/dbt-jsonschema/main/schemas/latest/packages-latest.json": [ - "packages.yml" - ] - } - } + "python.envFile": "${workspaceFolder}/.env", + "python.terminal.activateEnvironment": true } } } - // Features to add to the dev container. More info: https://containers.dev/features. - // "features": {}, - - // Use 'forwardPorts' to make a list of ports inside the container available locally. - // "forwardPorts": [], - - // Use 'postCreateCommand' to run commands after the container is created. - // "postCreateCommand": "pip3 install --user -r requirements.txt", - - // Configure tool-specific properties. - // "customizations": {}, - - // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. - // "remoteUser": "root" -} +} \ No newline at end of file diff --git a/.devcontainer/postCreateCommand.sh b/.devcontainer/postCreateCommand.sh index a2b5341..8628ada 100755 --- a/.devcontainer/postCreateCommand.sh +++ b/.devcontainer/postCreateCommand.sh @@ -61,6 +61,9 @@ source source_env.sh print_section "CREATE VENV & INSTALL REQUIREMENTS" python3 -m venv --clear $VIRTUAL_ENV + + + # # add virt env to PATH which allows the next part of script to install packages directly to venv # export PATH="$VIRTUAL_ENV/bin:$PATH" @@ -72,4 +75,10 @@ pip install -r data-processing-spark/1-lab-setup/containers/spark/requirements.t # fix for deactivate script : https://github.com/microsoft/vscode-python/wiki/Fixing-%22deactivate%22-command-for-Virtual-Environments ENV_WORK_DIR=$(pwd) curl -o $ENV_WORK_DIR/deactivate https://gist.githubusercontent.com/karrtikr/963469ba74c9b7632d2c43224ffa2f25/raw/deactivate -echo "source $ENV_WORK_DIR/deactivate" >> ~/.zshrc \ No newline at end of file +echo "source $ENV_WORK_DIR/deactivate" >> ~/.zshrc + +# remember to add the following to gitignore +# .venv +# node_modules +# package-lock.json +# package.json \ No newline at end of file diff --git a/.env b/.env new file mode 100644 index 0000000..e912702 --- /dev/null +++ b/.env @@ -0,0 +1,2 @@ +VIRTUAL_ENV="/workspaces/efficient_data_processing_spark/.venv" +PYTHONPATH="/workspaces/efficient_data_processing_spark/data-processing-spark" diff --git a/.gitignore b/.gitignore index 155eade..716355c 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,7 @@ derby.log *.pyc __pycache__/ +.venv +node_modules +package-lock.json +package.json diff --git a/deactivate b/deactivate new file mode 100644 index 0000000..7c7e8a3 --- /dev/null +++ b/deactivate @@ -0,0 +1,37 @@ +# >>> Virtual env deactivate hook >>> + +# Same as deactivate in "/bin/activate" +deactivate () { + if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then + PATH="${_OLD_VIRTUAL_PATH:-}" + export PATH + unset _OLD_VIRTUAL_PATH + fi + if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then + PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}" + export PYTHONHOME + unset _OLD_VIRTUAL_PYTHONHOME + fi + if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then + hash -r 2> /dev/null + fi + if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then + PS1="${_OLD_VIRTUAL_PS1:-}" + export PS1 + unset _OLD_VIRTUAL_PS1 + fi + unset VIRTUAL_ENV + unset VIRTUAL_ENV_PROMPT + if [ ! "${1:-}" = "nondestructive" ] ; then + unset -f deactivate + fi +} + +# Initialize the variables required by deactivate function +_OLD_VIRTUAL_PS1="${PS1:-}" +_OLD_VIRTUAL_PATH="$PATH" +if [ -n "${PYTHONHOME:-}" ] ; then + _OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}" +fi + +# <<< Virtual env deactivate hook <<< \ No newline at end of file From 2783da59f6f467c8aa9fed774cea559beac53061 Mon Sep 17 00:00:00 2001 From: luutuankiet <56199834+luutuankiet@users.noreply.github.com> Date: Tue, 28 May 2024 08:22:52 +0000 Subject: [PATCH 3/8] add shebang to scripts --- env_init.sh | 1 + source_env.sh | 1 + 2 files changed, 2 insertions(+) diff --git a/env_init.sh b/env_init.sh index 10a990d..37a3a7e 100755 --- a/env_init.sh +++ b/env_init.sh @@ -1,3 +1,4 @@ +#!/bin/bash # parse dir ENV_WORK_DIR=$(pwd) diff --git a/source_env.sh b/source_env.sh index 2253d5b..2f27c40 100755 --- a/source_env.sh +++ b/source_env.sh @@ -1,3 +1,4 @@ +#!/bin/bash set -a # Automatically export all variables source .env set +a # Stop automatically exporting variables \ No newline at end of file From ab5074b413dad142bf353b1f5320c04551100d4f Mon Sep 17 00:00:00 2001 From: luutuankiet Date: Tue, 28 May 2024 09:28:33 +0000 Subject: [PATCH 4/8] add util for code tunnel --- .devcontainer/postCreateCommand.sh | 9 ++++++++- .gitignore | 2 ++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/.devcontainer/postCreateCommand.sh b/.devcontainer/postCreateCommand.sh index 8628ada..83ce65a 100755 --- a/.devcontainer/postCreateCommand.sh +++ b/.devcontainer/postCreateCommand.sh @@ -81,4 +81,11 @@ echo "source $ENV_WORK_DIR/deactivate" >> ~/.zshrc # .venv # node_modules # package-lock.json -# package.json \ No newline at end of file +# package.json + + +# optional : download code cli to then access dev container from a browser +curl -Lk 'https://code.visualstudio.com/sha/download?build=stable&os=cli-alpine-x64' --output vscode_cli.tar.gz +tar -xf vscode_cli.tar.gz + +# usage: ./code tunnel \ No newline at end of file diff --git a/.gitignore b/.gitignore index 716355c..589eaac 100644 --- a/.gitignore +++ b/.gitignore @@ -10,3 +10,5 @@ __pycache__/ node_modules package-lock.json package.json +vscode_cli.tar.gz +code From a9efbb3e1ebb2d12a435a38b07b018ae911da94b Mon Sep 17 00:00:00 2001 From: luutuankiet Date: Tue, 28 May 2024 09:40:31 +0000 Subject: [PATCH 5/8] devcon: add iconpack vs code --- .devcontainer/devcontainer.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index e9b63f8..1e9f348 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -52,7 +52,8 @@ "visualstudioexptteam.vscodeintellicode", "ms-azuretools.vscode-docker", "redhat.vscode-yaml", - "esbenp.prettier-vscode" + "esbenp.prettier-vscode", + "vscode-icons-team.vscode-icons" ], "settings": { "terminal.integrated.env.osx": { From d6b14098c21a1709cb36ece33d2b93acdaa99631 Mon Sep 17 00:00:00 2001 From: luutuankiet Date: Wed, 29 May 2024 08:19:28 +0000 Subject: [PATCH 6/8] declutter only specify container's pythonpath once in env_init --- .devcontainer/devcontainer.json | 6 +++--- .env | 2 +- env_init.sh | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 1e9f348..12d6f7f 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -57,13 +57,13 @@ ], "settings": { "terminal.integrated.env.osx": { - "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}/data-processing-spark" + "PYTHONPATH": "${env:PYTHONPATH}" }, "terminal.integrated.env.linux": { - "PYTHONPATH": "${env:PYTHONPATH}:${workspaceFolder}/data-processing-spark" + "PYTHONPATH": "${env:PYTHONPATH}" }, "terminal.integrated.env.windows": { - "PYTHONPATH": "${env:PYTHONPATH};${workspaceFolder}/data-processing-spark" + "PYTHONPATH": "${env:PYTHONPATH}" }, "python.defaultInterpreterPath": ".venv/bin/python", "python.venvFolders": [ diff --git a/.env b/.env index e912702..7de6510 100644 --- a/.env +++ b/.env @@ -1,2 +1,2 @@ VIRTUAL_ENV="/workspaces/efficient_data_processing_spark/.venv" -PYTHONPATH="/workspaces/efficient_data_processing_spark/data-processing-spark" +PYTHONPATH="/workspaces/efficient_data_processing_spark/data-processing-spark:/workspaces/efficient_data_processing_spark/data-processing-spark" diff --git a/env_init.sh b/env_init.sh index 37a3a7e..a1785a4 100755 --- a/env_init.sh +++ b/env_init.sh @@ -4,5 +4,5 @@ ENV_WORK_DIR=$(pwd) cat < .env VIRTUAL_ENV="$ENV_WORK_DIR/.venv" -PYTHONPATH="$ENV_WORK_DIR/data-processing-spark" +PYTHONPATH="$PYTHONPATH:$ENV_WORK_DIR/data-processing-spark" EOF \ No newline at end of file From 87dc257439d1904052245db44d352d11437f4f35 Mon Sep 17 00:00:00 2001 From: luutuankiet Date: Wed, 29 May 2024 08:28:45 +0000 Subject: [PATCH 7/8] no need duplicate pythonpath --- env_init.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/env_init.sh b/env_init.sh index a1785a4..37a3a7e 100755 --- a/env_init.sh +++ b/env_init.sh @@ -4,5 +4,5 @@ ENV_WORK_DIR=$(pwd) cat < .env VIRTUAL_ENV="$ENV_WORK_DIR/.venv" -PYTHONPATH="$PYTHONPATH:$ENV_WORK_DIR/data-processing-spark" +PYTHONPATH="$ENV_WORK_DIR/data-processing-spark" EOF \ No newline at end of file From c0769b0910d791b8590a691b28fc44b9727369d9 Mon Sep 17 00:00:00 2001 From: luutuankiet Date: Wed, 29 May 2024 09:03:58 +0000 Subject: [PATCH 8/8] typo in postCreate script --- .devcontainer/postCreateCommand.sh | 2 +- .env | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.devcontainer/postCreateCommand.sh b/.devcontainer/postCreateCommand.sh index 83ce65a..a5c0c0c 100755 --- a/.devcontainer/postCreateCommand.sh +++ b/.devcontainer/postCreateCommand.sh @@ -69,7 +69,7 @@ python3 -m venv --clear $VIRTUAL_ENV # # install reqs. each lines is a separate process hence neeeds a source .venv in front source .venv/bin/activate && \ -pip install -r data-processing-spark/1-lab-setup/containers/spark/requirements.txt && \ +pip install -r data-processing-spark/1-lab-setup/containers/spark/requirements.txt # fix for deactivate script : https://github.com/microsoft/vscode-python/wiki/Fixing-%22deactivate%22-command-for-Virtual-Environments diff --git a/.env b/.env index 7de6510..e912702 100644 --- a/.env +++ b/.env @@ -1,2 +1,2 @@ VIRTUAL_ENV="/workspaces/efficient_data_processing_spark/.venv" -PYTHONPATH="/workspaces/efficient_data_processing_spark/data-processing-spark:/workspaces/efficient_data_processing_spark/data-processing-spark" +PYTHONPATH="/workspaces/efficient_data_processing_spark/data-processing-spark"