Add readme, autodownload data, gitignore

lukas · Jul 25, 2018 · e5c2182 · e5c2182
1 parent ecc3ca4
commit e5c2182
Show file tree

Hide file tree

Showing 4 changed files with 20 additions and 14 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1 +1,4 @@
 wandb-debug.log
+**/wandb/*
+!**/wandb/settings
+keras-sign/sign-language
diff --git a/keras-sign/README.md b/keras-sign/README.md
@@ -0,0 +1,12 @@
+# Sign Language Classifier
+
+In this problem the source data is 28x28 pixel grayscale images of a hands making sign language (there are only 24 categories as j and z require movement).  The training and test data is stored in a CSV as pixel values between 0 & 255.  The challenge is to create an ML classifier that performs the best on the test dataset.
+
+`perceptron.py` is very simple and lacks normalization.  Your first step should likely be to normalize the input data to be between 0 & 1, then create a Concurrent Neural Net but be careful not to overfit.  Transfer learning, data augmentation, and increasing the size of your dataset are more advanced approaches to achieve higher accuracy. 
+
+## Resources
+
+* https://google.com
+* https://keras.io
+* https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html
+* http://empslocal.ex.ac.uk/people/staff/np331/index.php?section=FingerSpellingDataset
diff --git a/keras-sign/perceptron.py b/keras-sign/perceptron.py
@@ -1,12 +1,4 @@
 # A very simple perceptron for classifying american sign language letters
-# Possible improvements
-# - Normalize data
-# - Different neural net architecture (maybe CNN?)
-#   - It's a small amount of training data!  Beware of overfitting & use dropout
-# - Data augmentation
-# - Find more data online (please do not use the validation data for training)
-# - Transfer learning
-
 import signdata
 import numpy as np
 from keras.models import Sequential
@@ -26,7 +18,6 @@
 if (config.team_name == 'default'):
     raise ValueError("Please set config.team_name to be your team name")
 
-
 # load data
 (X_test, y_test) = signdata.load_test_data()
 (X_train, y_train) = signdata.load_train_data()
@@ -51,4 +42,4 @@
 
 # Fit the model
 model.fit(X_train, y_train, epochs=config.epochs, validation_data=(X_test, y_test),
-                    callbacks=[WandbCallback(data_type="image", labels=signdata.letters)])
+                    callbacks=[WandbCallback(data_type="image", labels=signdata.letters)])
diff --git a/keras-sign/signdata.py b/keras-sign/signdata.py
@@ -4,13 +4,13 @@
 import numpy as np
 import pandas as pd
 import wandb
+import subprocess
 
 if not os.path.isfile('sign-language/sign_mnist_train.csv'):
-    print("""Can't find data file, please run the following command from this directory:
-  curl https://storage.googleapis.com/wandb-production.appspot.com/mlclass/sign-language-data.tar.gz | tar xvz""")
-    exit()
+    print("Downloading signlanguage dataset...")
+    subprocess.check_output("curl https://storage.googleapis.com/wandb-production.appspot.com/mlclass/sign-language-data.tar.gz | tar xvz", shell=True)
 
-letters = "abcdefghijklmnopqrstuvwxyz"
+letters = "abcdefghiklmnopqrstuvwxy"
 
 def load_train_data():
     df=pd.read_csv('sign-language/sign_mnist_train.csv')