Skip to content

Commit

Permalink
PR changes requested by @henryre
Browse files Browse the repository at this point in the history
  • Loading branch information
ajratner committed Aug 13, 2019
1 parent 92dc67b commit be87aa3
Show file tree
Hide file tree
Showing 7 changed files with 125 additions and 316 deletions.
2 changes: 1 addition & 1 deletion .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ exclude =
.mypy_cache,
.tox,
.env**,
.venv,
.venv**,
_build,
build,
dist
25 changes: 21 additions & 4 deletions intro/download_data.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,26 @@
# Execute from snorkel-tutorials/spam/
# Download data,
#!/bin/bash
set -euxo pipefail

# Check that we are running from the right directory.
if [ ! "${PWD##*/}" = "intro" ]; then
echo "Script must be run from intro directory" >&2
exit 1
fi

FILES=( "Youtube01-Psy.csv" "Youtube02-KatyPerry.csv" "Youtube03-LMFAO.csv" "Youtube04-Eminem.csv" "Youtube05-Shakira.csv" )
DATA_URL="https://archive.ics.uci.edu/ml/machine-learning-databases/00380/YouTube-Spam-Collection-v1.zip"
RELOAD=false

# Check if at least any file is missing. If so, reload all data.
for filename in "${FILES[@]}"
do
if [ ! -e "data/$filename" ]; then
RELOAD=true
fi
done

if [ ! -d "data" ]; then
if [ "$RELOAD" = true ]; then
if [ -d "data/" ]; then rm -Rf "data/"; fi
mkdir -p data
wget $DATA_URL -O data.zip
mv data.zip data/
Expand All @@ -12,4 +29,4 @@ if [ ! -d "data" ]; then
rm data.zip
rm -rf __MACOSX
cd ..
fi
fi
Loading

0 comments on commit be87aa3

Please sign in to comment.