Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the
// README at: https://github.com/devcontainers/templates/tree/main/src/python
{
"name": "Python 3",
// Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
"image": "mcr.microsoft.com/devcontainers/python:1-3.9-bookworm",
"runArgs": [
"--name",
"spark-Learn"
],
"remoteUser": "root",
"postCreateCommand": "bash -i .devcontainer/postCreateCommand.sh",
"features": {
"ghcr.io/devcontainers/features/docker-in-docker:2": {
"moby": "false",
"azureDnsAutoDetection": "false",
"installDockerBuildx": "false",
"installDockerComposeSwitch": "false",
"version": "latest",
"dockerDashComposeVersion": "latest"
}
},
"customizations": {
"vscode": {
"extensions": [
"bastienboutonnet.vscode-dbt",
"chrisdias.vscode-opennewinstance",
"cweijan.vscode-office",
"dvirtz.parquet-viewer",
"henriblancke.vscode-dbt-formatter",
"innoverio.vscode-dbt-power-user",
"mechatroner.rainbow-csv",
"mhutchie.git-graph",
"ms-python.debugpy",
"ms-python.python",
"ms-python.vscode-pylance",
"ms-toolsai.jupyter",
"ms-toolsai.jupyter-keymap",
"ms-toolsai.jupyter-renderers",
"ms-toolsai.vscode-jupyter-cell-tags",
"ms-toolsai.vscode-jupyter-slideshow",
"mtxr.sqltools",
"mtxr.sqltools-driver-pg",
"mtxr.sqltools-driver-sqlite",
"mutantdino.resourcemonitor",
"randomfractalsinc.duckdb-sql-tools",
"samuelcolvin.jinjahtml",
"sourcegraph.cody-ai",
"taoklerks.poor-mans-t-sql-formatter-vscode",
"uloco.theme-bluloco-dark",
"visualstudioexptteam.intellicode-api-usage-examples",
"visualstudioexptteam.vscodeintellicode",
"ms-azuretools.vscode-docker",
"redhat.vscode-yaml",
"esbenp.prettier-vscode",
"vscode-icons-team.vscode-icons"
],
"settings": {
"terminal.integrated.env.osx": {
"PYTHONPATH": "${env:PYTHONPATH}"
},
"terminal.integrated.env.linux": {
"PYTHONPATH": "${env:PYTHONPATH}"
},
"terminal.integrated.env.windows": {
"PYTHONPATH": "${env:PYTHONPATH}"
},
"python.defaultInterpreterPath": ".venv/bin/python",
"python.venvFolders": [
"${workspaceFolder}/.venv"
],
"python.envFile": "${workspaceFolder}/.env",
"python.terminal.activateEnvironment": true
}
}
}
}
91 changes: 91 additions & 0 deletions .devcontainer/postCreateCommand.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
#!/bin/bash
# Function to print a section header
print_section() {
local section_title=$1
echo
echo "================================="
echo "================================="
echo "================================="
echo " $section_title"
echo "================================="
echo "================================="
echo "================================="
echo
}




##### install npm
print_section "INSTALL NPM"

# installs nvm (Node Version Manager)
curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.7/install.sh | bash

# download and install Node.js
nvm install 20

# verifies the right Node.js version is in the environment
node -v # should print `v20.13.1`

# verifies the right NPM version is in the environment
npm -v # should print `10.5.2`


# deps for sqltools - duckdb driver
npm install duckdb-async@0.9.2



### sets up zsh terminal
print_section "SETUP ZSH TERMINAL"
curl -o- https://gist.githubusercontent.com/luutuankiet/fbb70fca0f7f948c4e102442d76c363e/raw/boilerplate-dev-env | bash


#### sets up python
print_section "SETUP PYTHON"
apt-get update && apt-get install -y python3-venv

# init then source env vars
print_section "INIT & SOURCE ENV VARS"
chmod +x ./env_init.sh
chmod +x ./source_env.sh

. ./env_init.sh
source source_env.sh



##### TODO: uncomment this for a true rebuild from scratch. currenlty broken due to packages deps in requirements file.
# create env
print_section "CREATE VENV & INSTALL REQUIREMENTS"
python3 -m venv --clear $VIRTUAL_ENV




# # add virt env to PATH which allows the next part of script to install packages directly to venv
# export PATH="$VIRTUAL_ENV/bin:$PATH"

# # install reqs. each lines is a separate process hence neeeds a source .venv in front
source .venv/bin/activate && \
pip install -r data-processing-spark/1-lab-setup/containers/spark/requirements.txt


# fix for deactivate script : https://github.com/microsoft/vscode-python/wiki/Fixing-%22deactivate%22-command-for-Virtual-Environments
ENV_WORK_DIR=$(pwd)
curl -o $ENV_WORK_DIR/deactivate https://gist.githubusercontent.com/karrtikr/963469ba74c9b7632d2c43224ffa2f25/raw/deactivate
echo "source $ENV_WORK_DIR/deactivate" >> ~/.zshrc

# remember to add the following to gitignore
# .venv
# node_modules
# package-lock.json
# package.json


# optional : download code cli to then access dev container from a browser
curl -Lk 'https://code.visualstudio.com/sha/download?build=stable&os=cli-alpine-x64' --output vscode_cli.tar.gz
tar -xf vscode_cli.tar.gz

# usage: ./code tunnel
2 changes: 2 additions & 0 deletions .env
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
VIRTUAL_ENV="/workspaces/efficient_data_processing_spark/.venv"
PYTHONPATH="/workspaces/efficient_data_processing_spark/data-processing-spark"
12 changes: 12 additions & 0 deletions .github/dependabot.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# To get started with Dependabot version updates, you'll need to specify which
# package ecosystems to update and where the package manifests are located.
# Please see the documentation for more information:
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
# https://containers.dev/guide/dependabot

version: 2
updates:
- package-ecosystem: "devcontainers"
directory: "/"
schedule:
interval: weekly
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,9 @@ derby.log
*.pyc
__pycache__/

.venv
node_modules
package-lock.json
package.json
vscode_cli.tar.gz
code
37 changes: 37 additions & 0 deletions deactivate
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# >>> Virtual env deactivate hook >>>

# Same as deactivate in "<venv>/bin/activate"
deactivate () {
if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
PATH="${_OLD_VIRTUAL_PATH:-}"
export PATH
unset _OLD_VIRTUAL_PATH
fi
if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then
PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}"
export PYTHONHOME
unset _OLD_VIRTUAL_PYTHONHOME
fi
if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then
hash -r 2> /dev/null
fi
if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then
PS1="${_OLD_VIRTUAL_PS1:-}"
export PS1
unset _OLD_VIRTUAL_PS1
fi
unset VIRTUAL_ENV
unset VIRTUAL_ENV_PROMPT
if [ ! "${1:-}" = "nondestructive" ] ; then
unset -f deactivate
fi
}

# Initialize the variables required by deactivate function
_OLD_VIRTUAL_PS1="${PS1:-}"
_OLD_VIRTUAL_PATH="$PATH"
if [ -n "${PYTHONHOME:-}" ] ; then
_OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}"
fi

# <<< Virtual env deactivate hook <<<
8 changes: 8 additions & 0 deletions env_init.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash
# parse dir
ENV_WORK_DIR=$(pwd)

cat <<EOF > .env
VIRTUAL_ENV="$ENV_WORK_DIR/.venv"
PYTHONPATH="$ENV_WORK_DIR/data-processing-spark"
EOF
4 changes: 4 additions & 0 deletions source_env.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash
set -a # Automatically export all variables
source .env
set +a # Stop automatically exporting variables