From 0de58d2b924ddee8b3e2e6e6eaf808b785ef2916 Mon Sep 17 00:00:00 2001
From: Rowan Zellers <rowanz@cs.washington.edu>
Date: Fri, 24 Aug 2018 11:08:05 -0700
Subject: [PATCH] added 2 more baselines and cleared up requirements :tada:
 :ok:

---
 README.md                                     |   2 +-
 requirements-full.txt                         | 263 -----------------
 requirements.txt                              |   2 +
 spec-file.txt                                 | 257 -----------------
 swag_baselines/README.md                      |   3 +
 .../decomposable_attention/README.md          |   5 +
 .../decomposable_attention/__init__.py        |   0
 .../decomposable_attention/dataset_reader.py  |  96 +++++++
 .../decomposable_attention_swag.py            | 264 ++++++++++++++++++
 .../decomposable_attention/run_experiments.sh |  21 ++
 .../train-elmo-goldonly.json                  |  96 +++++++
 .../decomposable_attention/train-elmo.json    |  96 +++++++
 .../train-glove-840.json                      |  73 +++++
 .../train-glove-goldonly-840.json             |  73 +++++
 .../train-glove-goldonly.json                 |  73 +++++
 .../decomposable_attention/train-glove.json   |  73 +++++
 .../train-numberbatch-goldonly.json           |  73 +++++
 .../train-numberbatch.json                    |  74 +++++
 swag_baselines/esim/dataset_reader.py         |   6 +-
 swag_baselines/unarylstm/__init__.py          |   0
 swag_baselines/unarylstm/dataset_reader.py    |  99 +++++++
 swag_baselines/unarylstm/lstm_swag.py         | 143 ++++++++++
 swag_baselines/unarylstm/run_experiments.sh   |  21 ++
 .../unarylstm/run_experiments_ending.sh       |  22 ++
 swag_baselines/unarylstm/train-cnn.json       |  59 ++++
 .../train-lstmbasic-elmo-endingonly.json      |  58 ++++
 ...in-lstmbasic-elmo-goldonly-endingonly.json |  58 ++++
 .../train-lstmbasic-elmo-goldonly.json        |  58 ++++
 .../unarylstm/train-lstmbasic-elmo.json       |  58 ++++
 .../train-lstmbasic-glove-endingonly.json     |  58 ++++
 ...n-lstmbasic-glove-goldonly-endingonly.json |  58 ++++
 .../train-lstmbasic-glove-goldonly.json       |  58 ++++
 .../unarylstm/train-lstmbasic-glove.json      |  58 ++++
 ...rain-lstmbasic-numberbatch-endingonly.json |  58 ++++
 ...basic-numberbatch-goldonly-endingonly.json |  58 ++++
 .../train-lstmbasic-numberbatch-goldonly.json |  58 ++++
 .../train-lstmbasic-numberbatch.json          |  58 ++++
 swag_baselines/unarylstm/train.json           |  58 ++++
 38 files changed, 2124 insertions(+), 524 deletions(-)
 delete mode 100644 requirements-full.txt
 delete mode 100644 spec-file.txt
 create mode 100644 swag_baselines/README.md
 create mode 100644 swag_baselines/decomposable_attention/README.md
 create mode 100644 swag_baselines/decomposable_attention/__init__.py
 create mode 100644 swag_baselines/decomposable_attention/dataset_reader.py
 create mode 100644 swag_baselines/decomposable_attention/decomposable_attention_swag.py
 create mode 100644 swag_baselines/decomposable_attention/run_experiments.sh
 create mode 100644 swag_baselines/decomposable_attention/train-elmo-goldonly.json
 create mode 100644 swag_baselines/decomposable_attention/train-elmo.json
 create mode 100644 swag_baselines/decomposable_attention/train-glove-840.json
 create mode 100644 swag_baselines/decomposable_attention/train-glove-goldonly-840.json
 create mode 100644 swag_baselines/decomposable_attention/train-glove-goldonly.json
 create mode 100644 swag_baselines/decomposable_attention/train-glove.json
 create mode 100644 swag_baselines/decomposable_attention/train-numberbatch-goldonly.json
 create mode 100644 swag_baselines/decomposable_attention/train-numberbatch.json
 create mode 100644 swag_baselines/unarylstm/__init__.py
 create mode 100644 swag_baselines/unarylstm/dataset_reader.py
 create mode 100644 swag_baselines/unarylstm/lstm_swag.py
 create mode 100644 swag_baselines/unarylstm/run_experiments.sh
 create mode 100644 swag_baselines/unarylstm/run_experiments_ending.sh
 create mode 100644 swag_baselines/unarylstm/train-cnn.json
 create mode 100644 swag_baselines/unarylstm/train-lstmbasic-elmo-endingonly.json
 create mode 100644 swag_baselines/unarylstm/train-lstmbasic-elmo-goldonly-endingonly.json
 create mode 100644 swag_baselines/unarylstm/train-lstmbasic-elmo-goldonly.json
 create mode 100644 swag_baselines/unarylstm/train-lstmbasic-elmo.json
 create mode 100644 swag_baselines/unarylstm/train-lstmbasic-glove-endingonly.json
 create mode 100644 swag_baselines/unarylstm/train-lstmbasic-glove-goldonly-endingonly.json
 create mode 100644 swag_baselines/unarylstm/train-lstmbasic-glove-goldonly.json
 create mode 100644 swag_baselines/unarylstm/train-lstmbasic-glove.json
 create mode 100644 swag_baselines/unarylstm/train-lstmbasic-numberbatch-endingonly.json
 create mode 100644 swag_baselines/unarylstm/train-lstmbasic-numberbatch-goldonly-endingonly.json
 create mode 100644 swag_baselines/unarylstm/train-lstmbasic-numberbatch-goldonly.json
 create mode 100644 swag_baselines/unarylstm/train-lstmbasic-numberbatch.json
 create mode 100644 swag_baselines/unarylstm/train.json

diff --git a/README.md b/README.md
index f54e9e3..adf5cfd 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 SWAG dataset. More info is at [rowanzellers.com/swag](https://rowanzellers.com/swag).
 
 ## Setting up your environment
-To create an environment you will need to intall Python 3.6, PyTorch 3.1, and AllenNLP.  Additional
+To create an environment you will need to intall Python 3.1, PyTorch 3.1, and AllenNLP.  These
 requirements are listed in `requirements.txt`.
 
 You will also need to set PYTHONPATH to the `swagaf` directory.  You can do this by running the
diff --git a/requirements-full.txt b/requirements-full.txt
deleted file mode 100644
index 1f14e87..0000000
--- a/requirements-full.txt
+++ /dev/null
@@ -1,263 +0,0 @@
-ad3==2.2.dev0
-aiohttp==3.0.9
-alabaster==0.7.10
-allennlp===0.5.0-unreleased
-anaconda-client==1.6.5
-anaconda-navigator==1.6.9
-anaconda-project==0.8.0
-argh==0.26.2
-asn1crypto==0.22.0
-astroid==1.5.3
-astropy==2.0.2
-async-timeout==2.0.1
-attrs==17.4.0
-autocorrect==0.3.0
-awscli==1.14.51
-Babel==2.5.0
-backports.csv==1.0.5
-backports.shutil-get-terminal-size==1.0.0
-beautifulsoup4==4.6.0
-bitarray==0.8.1
-bkcharts==0.2
-bleach==2.0.0
-bokeh==0.12.10
-boto==2.48.0
-boto3==1.5.36
-botocore==1.9.4
-Bottleneck==1.2.1
-bz2file==0.98
-certifi==2018.1.18
-cffi==1.11.2
-chardet==3.0.4
-cheroot==6.1.2
-CherryPy==14.0.1
-click==6.7
-cloudpickle==0.4.0
-clyent==1.2.2
-codecov==2.0.15
-colorama==0.3.7
-conda==4.4.7
-conda-build==3.0.27
-conda-verify==2.0.0
-contextlib2==0.5.5
-cookies==2.2.1
-coverage==4.5.1
-cryptography==2.0.3
-cycler==0.10.0
-cymem==1.31.2
-Cython==0.26.1
-cytoolz==0.8.2
-dask==0.15.3
-datashape==0.5.4
-decorator==4.1.2
-dill==0.2.7.1
-distributed==1.19.1
-docutils==0.14
-editdistance==0.4
-en-core-web-sm==2.0.0
-entrypoints==0.2.3
-et-xmlfile==1.0.1
-fastcache==1.0.2
-feedparser==5.2.1
-filelock==2.0.12
-flaky==3.4.0
-Flask==0.12.1
-Flask-Cors==3.0.3
-ftfy==4.4.2
-future==0.16.0
-gender-guesser==0.4.0
-gensim==3.3.0
-gevent==1.2.2
-glob2==0.5
-gmpy2==2.0.8
-greenlet==0.4.12
-gspread==0.6.2
-h5py==2.7.0
-heapdict==1.0.0
-html5lib==0.999999999
-httplib2==0.10.3
-hunspell==0.5.4
-idna==2.6
-idna-ssl==1.0.1
-imageio==2.2.0
-imagesize==0.7.1
-ipdb==0.10.3
-ipykernel==4.6.1
-ipython==6.1.0
-ipython-genutils==0.2.0
-ipywidgets==7.0.0
-isort==4.2.15
-itsdangerous==0.24
-jdcal==1.3
-jedi==0.10.2
-Jinja2==2.9.6
-jmespath==0.9.3
-jsonschema==2.6.0
-jupyter==1.0.0
-jupyter-client==5.1.0
-jupyter-console==5.2.0
-jupyter-core==4.3.0
-jupyterlab==0.27.0
-jupyterlab-launcher==0.4.0
-lazy-object-proxy==1.3.1
-line-profiler==2.1.2
-livereload==2.5.1
-llvmlite==0.20.0
-locket==0.2.0
-lxml==4.1.0
-MarkupSafe==1.0
-matplotlib==2.1.0
-mccabe==0.6.1
-mistune==0.7.4
-more-itertools==4.1.0
-mpmath==0.19
-msgpack-numpy==0.4.1
-msgpack-python==0.4.8
-multidict==4.1.0
-multipledispatch==0.4.9
-murmurhash==0.28.0
-mwclient==0.8.7
-mwparserfromhell==0.5
-mypy==0.521
-mysqlclient==1.3.12
-navigator-updater==0.1.0
-nbconvert==5.3.1
-nbformat==4.4.0
-networkx==2.0
-nltk==3.2.4
-nose==1.3.7
-notebook==5.0.0
-num2words==0.5.6
-numba==0.35.0+10.g143f70e90
-numexpr==2.6.2
-numpy==1.13.3
-numpydoc==0.7.0
-oauth2client==4.1.2
-oauthlib==2.0.6
-odo==0.5.1
-olefile==0.44
-openpyxl==2.4.8
-overrides==1.9
-packaging==16.8
-pandas==0.20.3
-pandocfilters==1.4.2
-partd==0.3.8
-path.py==10.3.1
-pathlib==1.0.1
-pathlib2==2.3.0
-pathtools==0.1.2
-patsy==0.4.1
-Pattern==2.6
-pdfminer.six==20170720
-pep8==1.7.0
-pexpect==4.2.1
-pickleshare==0.7.4
-Pillow==4.2.1
-pkginfo==1.4.2
-plac==0.9.6
-ply==3.10
-port-for==0.3.1
-portend==2.2
-preshed==1.0.0
-prompt-toolkit==1.0.15
-protobuf==3.5.2
-psutil==5.4.0
-psycopg2==2.7.4
-ptyprocess==0.5.2
-py==1.4.34
-pyasn1==0.4.2
-pyasn1-modules==0.2.1
-pycocotools==2.0
-pycodestyle==2.3.1
-pycosat==0.6.3
-pycparser==2.18
-pycrypto==2.6.1
-pycryptodome==3.6.0
-pycurl==7.43.0
-pyenchant==2.0.0
-pyflakes==1.6.0
-Pygments==2.2.0
-pyhocon==0.3.35
-pylint==1.8.1
-pyodbc==4.0.17
-pyOpenSSL==17.2.0
-pypandoc==1.4
-pyparsing==2.2.0
-PySocks==1.6.7
-pytest==3.2.1
-pytest-cov==2.5.1
-pytest-pythonpath==0.7.2
-python-dateutil==2.6.1
-python-docx==0.8.6
-pytz==2017.3
-PyWavelets==0.5.2
-pywikibot==3.0.dev0
-PyYAML==3.12
-pyzmq==16.0.2
-QtAwesome==0.4.4
-qtconsole==4.3.1
-QtPy==1.3.1
-regex==2017.4.5
-requests==2.18.4
-requests-oauthlib==0.8.0
-requests-toolbelt==0.8.0
-responses==0.8.1
-rope==0.10.5
-rsa==3.4.2
-ruamel-yaml==0.11.14
-s3transfer==0.1.13
-scikit-image==0.13.1
-scikit-learn==0.19.1
-scipy==0.19.1
-seaborn==0.8
-simplegeneric==0.8.1
-simplejson==3.13.2
-singledispatch==3.4.0.3
-six==1.11.0
-smart-open==1.5.6
-snowballstemmer==1.2.1
-sortedcontainers==1.5.9
-spacy==2.0.6
-Sphinx==1.5.3
-sphinx-autobuild==0.7.1
-sphinx-rtd-theme==0.2.4
-sphinxcontrib-websupport==1.0.1
-spyder==3.2.4
-SQLAlchemy==1.1.13
-statsmodels==0.8.0
-sympy==1.1.1
-tables==3.4.2
-tblib==1.3.2
-tempora==1.11
-tensorboardX==1.0
-termcolor==1.1.0
-terminado==0.6
-testpath==0.3.1
-thinc==6.10.2
-toolz==0.8.2
-torch==0.3.1
-torchvision==0.2.0
-tornado==4.5.2
-tqdm==4.19.4
-traitlets==4.3.2
-twine==1.11.0
-typed-ast==1.0.4
-typing==3.6.2
-ujson==1.35
-unicodecsv==0.14.1
-Unidecode==1.0.22
-urllib3==1.22
-watchdog==0.8.3
-wcwidth==0.1.7
-webencodings==0.5.1
-Werkzeug==0.12.2
-widgetsnbextension==3.0.2
-wikitextparser==0.20.0
-wrapt==1.10.11
-xarray==0.10.2
-xgboost==0.71
-xlrd==1.1.0
-XlsxWriter==1.0.2
-xlwt==1.3.0
-yarl==1.1.1
-zict==0.1.3
diff --git a/requirements.txt b/requirements.txt
index 712d1e5..a264c22 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1,3 @@
 pandas==0.20.3
+torch==0.3.1
+git+git://github.com/allenai/allennlp.git@7142962d330ca5a95cade114c26a361c78f2042e
diff --git a/spec-file.txt b/spec-file.txt
deleted file mode 100644
index 8dee586..0000000
--- a/spec-file.txt
+++ /dev/null
@@ -1,257 +0,0 @@
-# This file may be used to create an environment using:
-# $ conda create --name <env> --file <this file>
-# platform: linux-64
-@EXPLICIT
-https://repo.continuum.io/pkgs/main/linux-64/ca-certificates-2018.03.07-0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/conda-env-2.6.0-h36134e3_1.tar.bz2
-https://conda.anaconda.org/pytorch/linux-64/cuda90-1.0-h6433d27_0.tar.bz2
-https://conda.anaconda.org/anaconda/linux-64/gdbm-1.12-0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/intel-openmp-2018.0.0-h15fc484_7.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/libgcc-ng-7.2.0-h7cc24e2_2.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/libgfortran-ng-7.2.0-h9f7466a_2.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/libstdcxx-ng-7.2.0-h7a57d05_2.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/bzip2-1.0.6-h0376d23_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/expat-2.2.4-hc00ebd1_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/gmp-6.1.2-hb3b607b_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/graphite2-1.3.10-hc526e54_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/icu-58.2-h211956c_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/jbig-2.1-hdba287a_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/jpeg-9b-habf39ab_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/libffi-3.2.1-h4deb6c0_3.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/libgcc-7.2.0-h69d50b8_2.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/libsodium-1.0.13-h31c71d8_2.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/libssh2-1.8.0-h8c220ad_2.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/libtool-2.4.6-hd50d1a6_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/libxcb-1.12-h84ff03f_3.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/lzo-2.10-h1bfc0ba_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/mkl-2018.0.0-hb491cac_4.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/ncurses-6.0-h06874d7_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/openssl-1.0.2o-h20670df_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/patchelf-0.9-hf79760b_2.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/pcre-8.41-hc71a17e_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/pixman-0.34.0-h83dc358_2.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/tk-8.6.7-h5979e9b_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/unixodbc-2.3.4-hc36303a_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/xz-5.2.3-h2bcbf08_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/yaml-0.1.7-h96e3832_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/zlib-1.2.11-hfbfcf68_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/curl-7.55.1-hcb0b314_2.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/glib-2.53.6-hc861d11_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/hdf5-1.10.1-hb0523eb_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/libedit-3.1-heed3624_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/libpng-1.6.32-hda9c8bc_2.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/libtiff-4.0.8-h90200ff_9.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/libxml2-2.9.4-h6b072ca_5.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/mpfr-3.1.5-h12ff648_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/pandoc-1.19.2.1-hea2e7c5_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/readline-7.0-hac23ff0_3.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/zeromq-4.2.2-hb0b69da_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/dbus-1.10.22-h3b5a359_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/freetype-2.8-h52ed37b_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/gstreamer-1.12.2-h4f93127_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/libxslt-1.1.29-hcf9102b_5.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/mpc-1.0.3-hf803216_4.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/sqlite-3.20.1-h6d8b0f3_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/fontconfig-2.12.4-h88586e7_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/gst-plugins-base-1.12.2-he3457e5_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/python-3.6.3-hc9025b9_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/alabaster-0.7.10-py36h306e16b_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/anaconda-custom-py36hbbc8b67_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/asn1crypto-0.22.0-py36h265ca7c_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/backports-1.0-py36hfa02d7e_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/beautifulsoup4-4.6.0-py36h49b8c8c_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/bitarray-0.8.1-py36h5834eb8_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/boto-2.48.0-py36h6e4cd66_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/cairo-1.14.10-haa5651f_5.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/certifi-2018.1.18-py36_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/chardet-3.0.4-py36h0f667ec_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/click-6.7-py36h5253387_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/cloudpickle-0.4.0-py36h30f8c20_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/colorama-0.3.9-py36h489cec4_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/contextlib2-0.5.5-py36h6c84a62_0.tar.bz2
-https://conda.anaconda.org/conda-forge/linux-64/cymem-1.31.2-py36_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/dask-core-0.15.3-py36h10e6167_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/decorator-4.1.2-py36hd076ac8_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/dill-0.2.7.1-py36h644ae93_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/docutils-0.14-py36hb0f60f5_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/entrypoints-0.2.3-py36h1aec115_2.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/et_xmlfile-1.0.1-py36hd6bccc3_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/fastcache-1.0.2-py36h5b0c431_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/filelock-2.0.12-py36hacfa1f5_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/glob2-0.5-py36h2c1b292_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/gmpy2-2.0.8-py36h55090d7_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/greenlet-0.4.12-py36h2d503a6_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/heapdict-1.0.0-py36h79797d7_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/idna-2.6-py36h82fb2a8_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/imagesize-0.7.1-py36h52d8127_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/ipython_genutils-0.2.0-py36hb52b0d5_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/itsdangerous-0.24-py36h93cc618_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/jdcal-1.3-py36h4c697fb_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/jedi-0.10.2-py36h552def0_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/lazy-object-proxy-1.3.1-py36h10fcdad_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/llvmlite-0.20.0-py36_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/locket-0.2.0-py36h787c0ad_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/lxml-4.1.0-py36h5b66e50_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/markupsafe-1.0-py36hd9260cd_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/mccabe-0.6.1-py36h5ad9710_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/mistune-0.7.4-py36hbab8784_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/mkl-service-1.1.2-py36h17a0993_4.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/mpmath-0.19-py36h8cc018b_2.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/msgpack-python-0.4.8-py36hec4c5d1_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/multipledispatch-0.4.9-py36h41da3fb_0.tar.bz2
-https://conda.anaconda.org/conda-forge/linux-64/murmurhash-0.28.0-py36_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/numpy-1.13.3-py36ha12f23b_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/olefile-0.44-py36h79f9f78_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/pandocfilters-1.4.2-py36ha6701b7_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/path.py-10.3.1-py36he0c6f6d_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/pep8-1.7.0-py36h26ade29_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/pickleshare-0.7.4-py36h63277f8_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/pkginfo-1.4.1-py36h215d178_1.tar.bz2
-https://conda.anaconda.org/conda-forge/linux-64/plac-0.9.6-py36_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/ply-3.10-py36hed35086_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/psutil-5.4.0-py36h84c53db_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/ptyprocess-0.5.2-py36h69acd42_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/py-1.4.34-py36h0712aa3_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/pycodestyle-2.3.1-py36hf609f19_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/pycosat-0.6.3-py36h0a5515d_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/pycparser-2.18-py36hf9f622e_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/pycrypto-2.6.1-py36h6998063_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/pycurl-7.43.0-py36h5e72054_3.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/pyodbc-4.0.17-py36h999153c_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/pyparsing-2.2.0-py36hee85983_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/pysocks-1.6.7-py36hd97a5b1_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/pytz-2017.2-py36hc2ccc2a_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/pyyaml-3.12-py36hafb9ca4_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/pyzmq-16.0.2-py36h3b0cf96_2.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/qt-5.6.2-h974d657_12.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/qtpy-1.3.1-py36h3691cc8_0.tar.bz2
-https://conda.anaconda.org/conda-forge/linux-64/regex-2017.11.09-py36_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/rope-0.10.5-py36h1f8c17e_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/ruamel_yaml-0.11.14-py36ha2fb22d_2.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/simplegeneric-0.8.1-py36h2cb9092_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/sip-4.18.1-py36h51ed4ed_2.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/six-1.11.0-py36h372c433_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/snowballstemmer-1.2.1-py36h6febd40_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/sortedcontainers-1.5.9-py36_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/sphinxcontrib-1.0-py36h6d0f590_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/sqlalchemy-1.1.13-py36hfb5efd7_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/tblib-1.3.2-py36h34cf8b6_0.tar.bz2
-https://conda.anaconda.org/conda-forge/linux-64/termcolor-1.1.0-py36_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/testpath-0.3.1-py36h8cadb63_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/toolz-0.8.2-py36h81f2dff_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/tornado-4.5.2-py36h1283b2a_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/tqdm-4.19.4-py36ha5a5176_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/typing-3.6.2-py36h7da032a_0.tar.bz2
-https://conda.anaconda.org/conda-forge/linux-64/ujson-1.35-py36_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/unicodecsv-0.14.1-py36ha668878_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/wcwidth-0.1.7-py36hdf4376a_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/webencodings-0.5.1-py36h800622e_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/werkzeug-0.12.2-py36hc703753_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/wrapt-1.10.11-py36h28b7045_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/xlrd-1.1.0-py36h1db9f0c_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/xlsxwriter-1.0.2-py36h3de1aca_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/xlwt-1.3.0-py36h7b00a1f_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/babel-2.5.0-py36h7d14adf_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/backports.shutil_get_terminal_size-1.0.0-py36hfea85ff_2.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/bottleneck-1.2.1-py36haac1ea0_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/cffi-1.10.0-py36had8d393_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/conda-verify-2.0.0-py36h98955d8_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/cycler-0.10.0-py36h93f1223_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/cytoolz-0.8.2-py36h708bfd4_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/h5py-2.7.0-py36he81ebca_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/harfbuzz-1.5.0-h2545bd6_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/html5lib-0.999999999-py36h2cfc398_0.tar.bz2
-https://conda.anaconda.org/conda-forge/noarch/msgpack-numpy-0.4.1-py_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/networkx-2.0-py36h7e96fb8_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/nltk-3.2.4-py36h1a0979f_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/numba-0.35.0-np113py36_10.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/numexpr-2.6.2-py36hdd3393f_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/openpyxl-2.4.8-py36h41dd2a8_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/packaging-16.8-py36ha668100_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/partd-0.3.8-py36h36fd896_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/pathlib2-2.3.0-py36h49efa8e_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/pexpect-4.2.1-py36h3b9d41b_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/pillow-4.2.1-py36h9119f52_0.tar.bz2
-https://conda.anaconda.org/conda-forge/linux-64/preshed-1.0.0-py36_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/pyqt-5.6.0-py36h0386399_5.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/python-dateutil-2.6.1-py36h88d3b88_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/pywavelets-0.5.2-py36he602eb0_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/qtawesome-0.4.4-py36h609ed8c_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/scipy-0.19.1-py36h9976243_3.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/setuptools-36.5.0-py36he42e2e1_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/singledispatch-3.4.0.3-py36h7a266c3_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/sphinxcontrib-websupport-1.0.1-py36hb5cb234_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/sympy-1.1.1-py36hc6d1c1c_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/terminado-0.6-py36ha25a19f_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/traitlets-4.3.2-py36h674d592_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/zict-0.1.3-py36h3a3bf81_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/astroid-1.5.3-py36hbdb9df2_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/bleach-2.0.0-py36h688b259_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/clyent-1.2.2-py36h7e57e65_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/cryptography-2.0.3-py36ha225213_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/cython-0.26.1-py36h21c49d0_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/datashape-0.5.4-py36h3ad6b5c_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/distributed-1.19.1-py36h25f3894_0.tar.bz2
-https://conda.anaconda.org/conda-forge/linux-64/ftfy-4.4.2-py36_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/get_terminal_size-1.0.0-haa9412d_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/gevent-1.2.2-py36h2fe25dc_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/imageio-2.2.0-py36he555465_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/isort-4.2.15-py36had401c0_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/jinja2-2.9.6-py36h489bce4_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/jsonschema-2.6.0-py36h006f8b5_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/jupyter_core-4.3.0-py36h357a921_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/matplotlib-2.1.0-py36hba5de38_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/navigator-updater-0.1.0-py36h14770f7_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/nose-1.3.7-py36hcdf7029_2.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/pandas-0.20.3-py36h842e28d_2.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/pango-1.40.11-h8191d47_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/patsy-0.4.1-py36ha3be15e_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/pyflakes-1.6.0-py36h7bd6a15_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/pygments-2.2.0-py36h0d3125c_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/pytables-3.4.2-py36h3b5282a_2.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/pytest-3.2.1-py36h11ad3bb_1.tar.bz2
-https://conda.anaconda.org/pytorch/linux-64/pytorch-0.3.0-py36_cuda9.0.176_cudnn7.0.3hdc18817_4.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/scikit-learn-0.19.1-py36h7aa7ec6_0.tar.bz2
-https://conda.anaconda.org/conda-forge/linux-64/thinc-6.10.2-py36_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/wheel-0.29.0-py36he7f4e38_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/astropy-2.0.2-py36ha51211e_4.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/bkcharts-0.2-py36h735825a_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/bokeh-0.12.10-py36hbb0e44a_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/flask-0.12.2-py36hb24657c_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/jupyter_client-5.1.0-py36h614e9ea_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/nbformat-4.4.0-py36h31c9010_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/pip-9.0.1-py36h8ec8b28_3.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/prompt_toolkit-1.0.15-py36h17d85b1_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/pylint-1.7.4-py36hb9d4533_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/pyopenssl-17.2.0-py36h5cc804b_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/statsmodels-0.8.0-py36h8533d0b_0.tar.bz2
-https://conda.anaconda.org/pytorch/linux-64/torchvision-0.2.0-py36h17b6947_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/xarray-0.10.2-py36_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/dask-0.15.3-py36hdc2c8aa_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/flask-cors-3.0.3-py36h2d857d3_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/ipython-6.1.0-py36hc72a948_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/nbconvert-5.3.1-py36hb41ffb7_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/seaborn-0.8.0-py36h197244f_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/urllib3-1.22-py36hbe7ace6_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/ipykernel-4.6.1-py36hbf841aa_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/odo-0.5.1-py36h90ed295_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/requests-2.18.4-py36he2e5f8d_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/scikit-image-0.13.1-py36h14c3975_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/anaconda-client-1.6.5-py36h19c0dcd_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/conda-4.4.7-py36_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/jupyter_console-5.2.0-py36he59e554_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/notebook-5.0.0-py36h0b20546_2.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/qtconsole-4.3.1-py36h8f73b5b_0.tar.bz2
-https://conda.anaconda.org/conda-forge/linux-64/spacy-2.0.6-py36_2.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/sphinx-1.6.3-py36he5f0bdb_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/anaconda-project-0.8.0-py36h29abdf5_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/conda-build-3.0.27-py36h940a66d_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/jupyterlab_launcher-0.4.0-py36h4d8058d_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/numpydoc-0.7.0-py36h18f165f_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/widgetsnbextension-3.0.2-py36hd01bb71_1.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/anaconda-navigator-1.6.9-py36h11ddaaa_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/ipywidgets-7.0.0-py36h7b55c3a_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/jupyterlab-0.27.0-py36h86377d0_2.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/spyder-3.2.4-py36hbe6152b_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/_ipyw_jlab_nb_ext_conf-0.1.0-py36he11e457_0.tar.bz2
-https://repo.continuum.io/pkgs/main/linux-64/jupyter-1.0.0-py36h9896ce5_0.tar.bz2
diff --git a/swag_baselines/README.md b/swag_baselines/README.md
new file mode 100644
index 0000000..0742a2c
--- /dev/null
+++ b/swag_baselines/README.md
@@ -0,0 +1,3 @@
+# swag_baselines
+
+Currently there are 3 baselines here, [Decomposable Attention](https://arxiv.org/abs/1606.01933) (Parikh et. al, 2016), [ESIM](https://arxiv.org/abs/1609.06038) (Chen et. al, 2017), and a unary LSTM model. Ping me if you'd like checkpoints.
\ No newline at end of file
diff --git a/swag_baselines/decomposable_attention/README.md b/swag_baselines/decomposable_attention/README.md
new file mode 100644
index 0000000..161b910
--- /dev/null
+++ b/swag_baselines/decomposable_attention/README.md
@@ -0,0 +1,5 @@
+#to run
+
+python -m allennlp.run train train.json -s tmp/output0 --include-package swag_baselines.decomposable_attention
+
+python -m allennlp.run evaluate tmp/output0/best.th --evaluation-data-file ../../data/test.csv
\ No newline at end of file
diff --git a/swag_baselines/decomposable_attention/__init__.py b/swag_baselines/decomposable_attention/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/swag_baselines/decomposable_attention/dataset_reader.py b/swag_baselines/decomposable_attention/dataset_reader.py
new file mode 100644
index 0000000..f5875d8
--- /dev/null
+++ b/swag_baselines/decomposable_attention/dataset_reader.py
@@ -0,0 +1,96 @@
+# Exactly the same as the other dataset reader
+
+from typing import Dict, List
+import json
+import logging
+
+from overrides import overrides
+
+from allennlp.common import Params
+from allennlp.common.file_utils import cached_path
+from allennlp.data.dataset_readers.dataset_reader import DatasetReader
+from allennlp.data.fields import Field, TextField, LabelField
+from allennlp.data.instance import Instance
+from allennlp.data.token_indexers import SingleIdTokenIndexer, TokenIndexer
+from allennlp.data.tokenizers import Tokenizer, WordTokenizer
+import pandas as pd
+import numpy as np
+
+logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
+
+USE_S1 = True
+
+
+@DatasetReader.register("swag")
+class SwagReader(DatasetReader):
+    """
+    Reads a file from the Stanford Natural Language Inference (SNLI) dataset.  This data is
+    formatted as jsonl, one json-formatted instance per line.  The keys in the data are
+    "gold_label", "sentence1", and "sentence2".  We convert these keys into fields named "label",
+    "premise" and "hypothesis".
+
+    Parameters
+    ----------
+    tokenizer : ``Tokenizer``, optional (default=``WordTokenizer()``)
+        We use this ``Tokenizer`` for both the premise and the hypothesis.  See :class:`Tokenizer`.
+    token_indexers : ``Dict[str, TokenIndexer]``, optional (default=``{"tokens": SingleIdTokenIndexer()}``)
+        We similarly use this for both the premise and the hypothesis.  See :class:`TokenIndexer`.
+    """
+
+    def __init__(self,
+                 tokenizer: Tokenizer = None,
+                 token_indexers: Dict[str, TokenIndexer] = None,
+                 use_only_gold_examples: bool = False) -> None:
+        super().__init__(lazy=False)
+        self._tokenizer = tokenizer or WordTokenizer()
+        self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer()}
+        self.use_only_gold_examples = use_only_gold_examples
+
+    @overrides
+    def _read(self, file_path: str):
+        if not USE_S1:
+            logger.warning("We're splitting the sentences up here!!!! WATCH OUT\n\n\n")
+        swag = pd.read_csv(file_path)
+
+        if self.use_only_gold_examples and file_path.endswith('train.csv'):
+            swag = swag[swag['gold-source'].str.startswith('gold')]
+
+        for _, row in swag.iterrows():
+            if USE_S1:
+                premise = row['sent1']
+                endings = [row['ending{}'.format(i)] for i in range(4)]
+                hypos = ['{} {}'.format(row['sent2'], end) for end in endings]
+            else:
+                premise = row['sent2']
+                hypos = [row['ending{}'.format(i)] for i in range(4)]
+
+            yield self.text_to_instance(premise, hypos, label=row['label'] if hasattr(row, 'label') else None)
+
+    @overrides
+    def text_to_instance(self,  # type: ignore
+                         premise: str,
+                         hypotheses: List[str],
+                         label: int = None) -> Instance:
+        # pylint: disable=arguments-differ
+        fields: Dict[str, Field] = {}
+        premise_tokens = self._tokenizer.tokenize(premise)
+        fields['premise'] = TextField(premise_tokens, self._token_indexers)
+
+        # This could be another way to get randomness
+        for i, hyp in enumerate(hypotheses):
+            hypothesis_tokens = self._tokenizer.tokenize(hyp)
+            fields['hypothesis{}'.format(i)] = TextField(hypothesis_tokens, self._token_indexers)
+
+        if label is not None:
+            fields['label'] = LabelField(label, skip_indexing=True)
+        return Instance(fields)
+
+    @classmethod
+    def from_params(cls, params: Params) -> 'SwagReader':
+        tokenizer = Tokenizer.from_params(params.pop('tokenizer', {}))
+        token_indexers = TokenIndexer.dict_from_params(params.pop('token_indexers', {}))
+        use_only_gold_examples = params.pop('use_only_gold_examples', False)
+        params.assert_empty(cls.__name__)
+        return cls(tokenizer=tokenizer,
+                   token_indexers=token_indexers,
+                   use_only_gold_examples=use_only_gold_examples)
diff --git a/swag_baselines/decomposable_attention/decomposable_attention_swag.py b/swag_baselines/decomposable_attention/decomposable_attention_swag.py
new file mode 100644
index 0000000..fadd3e4
--- /dev/null
+++ b/swag_baselines/decomposable_attention/decomposable_attention_swag.py
@@ -0,0 +1,264 @@
+from typing import Dict, Optional
+
+import torch
+
+from allennlp.common import Params
+from allennlp.common.checks import check_dimensions_match
+from allennlp.data import Vocabulary
+from allennlp.models.model import Model
+from allennlp.modules import FeedForward, MatrixAttention
+from allennlp.modules import Seq2SeqEncoder, SimilarityFunction, TimeDistributed, TextFieldEmbedder
+from allennlp.nn import InitializerApplicator, RegularizerApplicator
+from allennlp.nn.util import get_text_field_mask, last_dim_softmax, weighted_sum
+from allennlp.training.metrics import CategoricalAccuracy
+from allennlp.modules.token_embedders import Embedding, ElmoTokenEmbedder
+
+import logging
+
+logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
+
+
+@Model.register("decomposable_attention_swag")
+class DecomposableAttention(Model):
+    """
+    This ``Model`` implements the Decomposable Attention model described in `"A Decomposable
+    Attention Model for Natural Language Inference"
+    <https://www.semanticscholar.org/paper/A-Decomposable-Attention-Model-for-Natural-Languag-Parikh-T%C3%A4ckstr%C3%B6m/07a9478e87a8304fc3267fa16e83e9f3bbd98b27>`_
+    by Parikh et al., 2016, with some optional enhancements before the decomposable attention
+    actually happens.  Parikh's original model allowed for computing an "intra-sentence" attention
+    before doing the decomposable entailment step.  We generalize this to any
+    :class:`Seq2SeqEncoder` that can be applied to the premise and/or the hypothesis before
+    computing entailment.
+
+    The basic outline of this model is to get an embedded representation of each word in the
+    premise and hypothesis, align words between the two, compare the aligned phrases, and make a
+    final entailment decision based on this aggregated comparison.  Each step in this process uses
+    a feedforward network to modify the representation.
+
+    Parameters
+    ----------
+    vocab : ``Vocabulary``
+    text_field_embedder : ``TextFieldEmbedder``
+        Used to embed the ``premise`` and ``hypothesis`` ``TextFields`` we get as input to the
+        model.
+    attend_feedforward : ``FeedForward``
+        This feedforward network is applied to the encoded sentence representations before the
+        similarity matrix is computed between words in the premise and words in the hypothesis.
+    similarity_function : ``SimilarityFunction``
+        This is the similarity function used when computing the similarity matrix between words in
+        the premise and words in the hypothesis.
+    compare_feedforward : ``FeedForward``
+        This feedforward network is applied to the aligned premise and hypothesis representations,
+        individually.
+    aggregate_feedforward : ``FeedForward``
+        This final feedforward network is applied to the concatenated, summed result of the
+        ``compare_feedforward`` network, and its output is used as the entailment class logits.
+    premise_encoder : ``Seq2SeqEncoder``, optional (default=``None``)
+        After embedding the premise, we can optionally apply an encoder.  If this is ``None``, we
+        will do nothing.
+    hypothesis_encoder : ``Seq2SeqEncoder``, optional (default=``None``)
+        After embedding the hypothesis, we can optionally apply an encoder.  If this is ``None``,
+        we will use the ``premise_encoder`` for the encoding (doing nothing if ``premise_encoder``
+        is also ``None``).
+    initializer : ``InitializerApplicator``, optional (default=``InitializerApplicator()``)
+        Used to initialize the model parameters.
+    regularizer : ``RegularizerApplicator``, optional (default=``None``)
+        If provided, will be used to calculate the regularization penalty during training.
+    """
+    def __init__(self, vocab: Vocabulary,
+                 text_field_embedder: TextFieldEmbedder,
+                 attend_feedforward: FeedForward,
+                 similarity_function: SimilarityFunction,
+                 compare_feedforward: FeedForward,
+                 aggregate_feedforward: FeedForward,
+                 premise_encoder: Optional[Seq2SeqEncoder] = None,
+                 hypothesis_encoder: Optional[Seq2SeqEncoder] = None,
+                 initializer: InitializerApplicator = InitializerApplicator(),
+                 regularizer: Optional[RegularizerApplicator] = None,
+                 preload_path: Optional[str] = None) -> None:
+        super(DecomposableAttention, self).__init__(vocab, regularizer)
+
+        self._text_field_embedder = text_field_embedder
+        self._attend_feedforward = TimeDistributed(attend_feedforward)
+        self._matrix_attention = MatrixAttention(similarity_function)
+        self._compare_feedforward = TimeDistributed(compare_feedforward)
+        self._aggregate_feedforward = aggregate_feedforward
+        self._premise_encoder = premise_encoder
+        self._hypothesis_encoder = hypothesis_encoder or premise_encoder
+
+        # self._num_labels = vocab.get_vocab_size(namespace="labels")
+
+        check_dimensions_match(text_field_embedder.get_output_dim(), attend_feedforward.get_input_dim(),
+                               "text field embedding dim", "attend feedforward input dim")
+        # check_dimensions_match(aggregate_feedforward.get_output_dim(), self._num_labels,
+        #                        "final output dimension", "number of labels")
+
+        self._accuracy = CategoricalAccuracy()
+        self._loss = torch.nn.CrossEntropyLoss()
+
+        initializer(self)
+
+        # Do we want to initialize with the SNLI stuff? let's say yes.
+        # 'snli-decomposable-attention/weights.th'
+        if preload_path is not None:
+            logger.info("Preloading!")
+            preload = torch.load(preload_path)
+            own_state = self.state_dict()
+            for name, param in preload.items():
+                if name not in own_state:
+                    logger.info("Unexpected key {} in state_dict with size {}".format(name, param.size()))
+                elif param.size() == own_state[name].size():
+                    own_state[name].copy_(param)
+                else:
+                    logger.info("Network has {} with size {}, ckpt has {}".format(name,
+                                                                            own_state[name].size(),
+                                                                            param.size()))
+
+                missing = set(own_state.keys()) - set(preload.keys())
+                if len(missing) > 0:
+                    logger.info("We couldn't find {}".format(','.join(missing)))
+
+
+    def forward(self,  # type: ignore
+                premise: Dict[str, torch.LongTensor],
+                hypothesis0: Dict[str, torch.LongTensor],
+                hypothesis1: Dict[str, torch.LongTensor],
+                hypothesis2: Dict[str, torch.LongTensor],
+                hypothesis3: Dict[str, torch.LongTensor],
+                label: torch.IntTensor = None) -> Dict[str, torch.Tensor]:
+        # pylint: disable=arguments-differ
+        """
+        Parameters
+        ----------
+        premise : Dict[str, torch.LongTensor]
+            From a ``TextField``
+        hypothesis : Dict[str, torch.LongTensor]
+            From a ``TextField``
+        label : torch.IntTensor, optional (default = None)
+            From a ``LabelField``
+
+        Returns
+        -------
+        An output dictionary consisting of:
+
+        label_logits : torch.FloatTensor
+            A tensor of shape ``(batch_size, num_labels)`` representing unnormalised log
+            probabilities of the entailment label.
+        label_probs : torch.FloatTensor
+            A tensor of shape ``(batch_size, num_labels)`` representing probabilities of the
+            entailment label.
+        loss : torch.FloatTensor, optional
+            A scalar loss to be optimised.
+        """
+        if isinstance(self._text_field_embedder, ElmoTokenEmbedder):
+            self._text_field_embedder._elmo._elmo_lstm._elmo_lstm.reset_states()
+
+        hyps = [hypothesis0, hypothesis1, hypothesis2, hypothesis3]
+        embedded_premise = self._text_field_embedder(premise)
+        if isinstance(self._text_field_embedder, ElmoTokenEmbedder):
+            self._text_field_embedder._elmo._elmo_lstm._elmo_lstm.reset_states()
+
+        embedded_hypotheses = []
+        for hypothesis in hyps:
+            if isinstance(self._text_field_embedder, ElmoTokenEmbedder):
+                self.text_field_embedder._elmo._elmo_lstm._elmo_lstm.reset_states()
+            embedded_hypotheses.append(self._text_field_embedder(hypothesis))
+
+        premise_mask = get_text_field_mask(premise).float()
+        hypothesis_masks = [get_text_field_mask(hypothesis).float() for hypothesis in hyps]
+
+        if self._premise_encoder:
+            embedded_premise = self._premise_encoder(embedded_premise, premise_mask)
+        if self._hypothesis_encoder:
+            embedded_hypotheses = [self._hypothesis_encoder(emb, mask) for emb, mask in zip(embedded_hypotheses, hypothesis_masks)]
+
+        projected_premise = self._attend_feedforward(embedded_premise)
+
+        label_logits = []
+        for i, (embedded_hypothesis, hypothesis_mask) in enumerate(zip(embedded_hypotheses, hypothesis_masks)):
+            projected_hypothesis = self._attend_feedforward(embedded_hypothesis)
+            # Shape: (batch_size, premise_length, hypothesis_length)
+            similarity_matrix = self._matrix_attention(projected_premise, projected_hypothesis)
+
+            # Shape: (batch_size, premise_length, hypothesis_length)
+            p2h_attention = last_dim_softmax(similarity_matrix, hypothesis_mask)
+            # Shape: (batch_size, premise_length, embedding_dim)
+            attended_hypothesis = weighted_sum(embedded_hypothesis, p2h_attention)
+
+            # Shape: (batch_size, hypothesis_length, premise_length)
+            h2p_attention = last_dim_softmax(similarity_matrix.transpose(1, 2).contiguous(), premise_mask)
+            # Shape: (batch_size, hypothesis_length, embedding_dim)
+            attended_premise = weighted_sum(embedded_premise, h2p_attention)
+
+            premise_compare_input = torch.cat([embedded_premise, attended_hypothesis], dim=-1)
+            hypothesis_compare_input = torch.cat([embedded_hypothesis, attended_premise], dim=-1)
+
+            compared_premise = self._compare_feedforward(premise_compare_input)
+            compared_premise = compared_premise * premise_mask.unsqueeze(-1)
+            # Shape: (batch_size, compare_dim)
+            compared_premise = compared_premise.sum(dim=1)
+
+            compared_hypothesis = self._compare_feedforward(hypothesis_compare_input)
+            compared_hypothesis = compared_hypothesis * hypothesis_mask.unsqueeze(-1)
+            # Shape: (batch_size, compare_dim)
+            compared_hypothesis = compared_hypothesis.sum(dim=1)
+
+            aggregate_input = torch.cat([compared_premise, compared_hypothesis], dim=-1)
+            logit = self._aggregate_feedforward(aggregate_input)
+            assert logit.size(-1) == 1
+            label_logits.append(logit)
+
+        label_logits = torch.cat(label_logits, -1)
+        label_probs = torch.nn.functional.softmax(label_logits, dim=-1)
+
+        output_dict = {"label_logits": label_logits, "label_probs": label_probs}
+
+        if label is not None:
+            loss = self._loss(label_logits, label.long().view(-1))
+            self._accuracy(label_logits, label.squeeze(-1))
+            output_dict["loss"] = loss
+
+        return output_dict
+
+    def get_metrics(self, reset: bool = False) -> Dict[str, float]:
+        return {
+                'accuracy': self._accuracy.get_metric(reset),
+                }
+
+    @classmethod
+    def from_params(cls, vocab: Vocabulary, params: Params) -> 'DecomposableAttention':
+        embedder_params = params.pop("text_field_embedder")
+        text_field_embedder = TextFieldEmbedder.from_params(vocab, embedder_params)
+
+        premise_encoder_params = params.pop("premise_encoder", None)
+        if premise_encoder_params is not None:
+            premise_encoder = Seq2SeqEncoder.from_params(premise_encoder_params)
+        else:
+            premise_encoder = None
+
+        hypothesis_encoder_params = params.pop("hypothesis_encoder", None)
+        if hypothesis_encoder_params is not None:
+            hypothesis_encoder = Seq2SeqEncoder.from_params(hypothesis_encoder_params)
+        else:
+            hypothesis_encoder = None
+
+        attend_feedforward = FeedForward.from_params(params.pop('attend_feedforward'))
+        similarity_function = SimilarityFunction.from_params(params.pop("similarity_function"))
+        compare_feedforward = FeedForward.from_params(params.pop('compare_feedforward'))
+        aggregate_feedforward = FeedForward.from_params(params.pop('aggregate_feedforward'))
+        initializer = InitializerApplicator.from_params(params.pop('initializer', []))
+        regularizer = RegularizerApplicator.from_params(params.pop('regularizer', []))
+
+        preload_path = params.pop('preload_path', None)
+        params.assert_empty(cls.__name__)
+        return cls(vocab=vocab,
+                   text_field_embedder=text_field_embedder,
+                   attend_feedforward=attend_feedforward,
+                   similarity_function=similarity_function,
+                   compare_feedforward=compare_feedforward,
+                   aggregate_feedforward=aggregate_feedforward,
+                   premise_encoder=premise_encoder,
+                   hypothesis_encoder=hypothesis_encoder,
+                   initializer=initializer,
+                   regularizer=regularizer,
+                   preload_path=preload_path)
diff --git a/swag_baselines/decomposable_attention/run_experiments.sh b/swag_baselines/decomposable_attention/run_experiments.sh
new file mode 100644
index 0000000..f005cca
--- /dev/null
+++ b/swag_baselines/decomposable_attention/run_experiments.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+
+# Run skipthoughts with a bunch of different modes
+export CUDA_VISIBLE_DEVICES=$1
+if [ $1 == "0" ]; then
+    echo "fuck! Numberbatch"
+    python -m allennlp.run train train-numberbatch.json -s tmp/numberbatchJUSTS2 --include-package swag_baselines.decomposable_attention
+    echo "fuck! NUMBERBATCH GOLD ONLY!!!!"
+    python -m allennlp.run train train-numberbatch-goldonly.json -s tmp/numberbatchallJUSTS2 --include-package swag_baselines.decomposable_attention
+elif [ $1 == "1" ]; then
+    echo "fuck! Glove"
+    python -m allennlp.run train train-glove-840.json -s tmp/glove840JUSTS2 --include-package swag_baselines.decomposable_attention
+    echo "fuck! ELMO GOLD ONLY!!!!"
+    python -m allennlp.run train train-elmo-goldonly.json -s tmp/elmo2allJUSTS2 --include-package swag_baselines.decomposable_attention
+elif [ $1 == "2" ]; then
+    echo "fuck! Elmo"
+    python -m allennlp.run train train-elmo.json -s tmp/elmo2JUSTS2 --include-package swag_baselines.decomposable_attention
+    echo "fuck! GLOVE GOLD ONLY!!!!"
+    python -m allennlp.run train train-glove-goldonly-840.json -s tmp/gloveall840JUSTS2 --include-package swag_baselines.decomposable_attention
+fi
+
diff --git a/swag_baselines/decomposable_attention/train-elmo-goldonly.json b/swag_baselines/decomposable_attention/train-elmo-goldonly.json
new file mode 100644
index 0000000..a1b813f
--- /dev/null
+++ b/swag_baselines/decomposable_attention/train-elmo-goldonly.json
@@ -0,0 +1,96 @@
+{
+    "dataset_reader": {
+        "type": "swag",
+        "token_indexers": {
+            "elmo": {
+                "type": "elmo_characters"
+            }
+        },
+        "tokenizer": {
+            "end_tokens": [
+                "@@NULL@@"
+            ]
+        },
+        "use_only_gold_examples": true,
+    },
+  "train_data_path": "../../data/train.csv",
+  "validation_data_path": "../../data/val.csv",
+  "test_data_path": "../../data/test.csv",
+  "evaluate_on_test": true,
+    "model": {
+        "type": "decomposable_attention_swag",
+        "text_field_embedder": {
+            "elmo": {
+                "type": "elmo_token_embedder",
+                "options_file": "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json",
+                "weight_file": "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5",
+                "do_layer_norm": false,
+                "dropout": 0.5
+            }
+        },
+        "attend_feedforward": {
+            "input_dim": 1024,
+            "num_layers": 2,
+            "hidden_dims": 200,
+            "activations": "relu",
+            "dropout": 0.2
+        },
+        "similarity_function": {
+            "type": "dot_product"
+        },
+        "compare_feedforward": {
+            "input_dim": 2048,
+            "num_layers": 2,
+            "hidden_dims": 200,
+            "activations": "relu",
+            "dropout": 0.2
+        },
+        "aggregate_feedforward": {
+            "input_dim": 400,
+            "num_layers": 2,
+            "hidden_dims": [
+                200,
+                1
+            ],
+            "activations": [
+                "relu",
+                "linear"
+            ],
+            "dropout": [
+                0.2,
+                0.0
+            ]
+        },
+//        "preload_path": "snli-decomposable-attention/weights.th",
+        "initializer": [
+            [
+                ".*linear_layers.*weight",
+                {
+                    "type": "xavier_normal"
+                }
+            ],
+            [
+                ".*token_embedder_tokens\\._projection.*weight",
+                {
+                    "type": "xavier_normal"
+                }
+            ]
+        ]
+    },
+    "iterator": {
+        "type": "bucket",
+        "sorting_keys": [["premise", "num_tokens"]],
+        "batch_size": 64
+    },
+    "trainer": {
+        "num_epochs": 140,
+        "patience": 20,
+        "cuda_device": 0,
+        "grad_clipping": 1.0,
+        "validation_metric": "+accuracy",
+        "optimizer": {
+            "type": "adam",
+            "lr": 0.001
+        }
+    }
+}
\ No newline at end of file
diff --git a/swag_baselines/decomposable_attention/train-elmo.json b/swag_baselines/decomposable_attention/train-elmo.json
new file mode 100644
index 0000000..55b8bf7
--- /dev/null
+++ b/swag_baselines/decomposable_attention/train-elmo.json
@@ -0,0 +1,96 @@
+{
+    "dataset_reader": {
+        "type": "swag",
+        "token_indexers": {
+            "elmo": {
+                "type": "elmo_characters"
+            }
+        },
+        "tokenizer": {
+            "end_tokens": [
+                "@@NULL@@"
+            ]
+        },
+        "use_only_gold_examples": false,
+    },
+  "train_data_path": "../../data/train.csv",
+  "validation_data_path": "../../data/val.csv",
+  "test_data_path": "../../data/test.csv",
+  "evaluate_on_test": true,
+    "model": {
+        "type": "decomposable_attention_swag",
+        "text_field_embedder": {
+            "elmo": {
+                "type": "elmo_token_embedder",
+                "options_file": "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json",
+                "weight_file": "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5",
+                "do_layer_norm": false,
+                "dropout": 0.5
+            }
+        },
+        "attend_feedforward": {
+            "input_dim": 1024,
+            "num_layers": 2,
+            "hidden_dims": 200,
+            "activations": "relu",
+            "dropout": 0.2
+        },
+        "similarity_function": {
+            "type": "dot_product"
+        },
+        "compare_feedforward": {
+            "input_dim": 2048,
+            "num_layers": 2,
+            "hidden_dims": 200,
+            "activations": "relu",
+            "dropout": 0.2
+        },
+        "aggregate_feedforward": {
+            "input_dim": 400,
+            "num_layers": 2,
+            "hidden_dims": [
+                200,
+                1
+            ],
+            "activations": [
+                "relu",
+                "linear"
+            ],
+            "dropout": [
+                0.2,
+                0.0
+            ]
+        },
+//        "preload_path": "snli-decomposable-attention/weights.th",
+        "initializer": [
+            [
+                ".*linear_layers.*weight",
+                {
+                    "type": "xavier_normal"
+                }
+            ],
+            [
+                ".*token_embedder_tokens\\._projection.*weight",
+                {
+                    "type": "xavier_normal"
+                }
+            ]
+        ]
+    },
+    "iterator": {
+        "type": "bucket",
+        "sorting_keys": [["premise", "num_tokens"]],
+        "batch_size": 64
+    },
+    "trainer": {
+        "num_epochs": 140,
+        "patience": 20,
+        "cuda_device": 0,
+        "grad_clipping": 1.0,
+        "validation_metric": "+accuracy",
+        "optimizer": {
+            "type": "adam",
+            "lr": 0.001
+        }
+    }
+}
\ No newline at end of file
diff --git a/swag_baselines/decomposable_attention/train-glove-840.json b/swag_baselines/decomposable_attention/train-glove-840.json
new file mode 100644
index 0000000..acd20f8
--- /dev/null
+++ b/swag_baselines/decomposable_attention/train-glove-840.json
@@ -0,0 +1,73 @@
+{
+  "dataset_reader": {
+    "type": "swag",
+    "token_indexers": {
+      "tokens": {
+        "type": "single_id",
+        "lowercase_tokens": true
+      }
+    },
+    "tokenizer": {
+      "end_tokens": ["@@NULL@@"]
+    },
+    "use_only_gold_examples": false,
+  },
+  "train_data_path": "../../data/train.csv",
+  "validation_data_path": "../../data/val.csv",
+  "test_data_path": "../../data/test.csv",
+  "evaluate_on_test": true,
+  "model": {
+    "type": "decomposable_attention_swag",
+    "text_field_embedder": {
+      "tokens": {
+        "type": "embedding",
+        "projection_dim": 200,
+        "pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.840B.300d.txt.gz",
+        "embedding_dim": 300,
+        "trainable": false
+      }
+    },
+    "attend_feedforward": {
+      "input_dim": 200,
+      "num_layers": 2,
+      "hidden_dims": 200,
+      "activations": "relu",
+      "dropout": 0.2
+    },
+    "similarity_function": {"type": "dot_product"},
+    "compare_feedforward": {
+      "input_dim": 400,
+      "num_layers": 2,
+      "hidden_dims": 200,
+      "activations": "relu",
+      "dropout": 0.2
+    },
+    "aggregate_feedforward": {
+      "input_dim": 400,
+      "num_layers": 2,
+      "hidden_dims": [200, 1],
+      "activations": ["relu", "linear"],
+      "dropout": [0.2, 0.0]
+    },
+     "initializer": [
+      [".*linear_layers.*weight", {"type": "xavier_normal"}],
+      [".*token_embedder_tokens\._projection.*weight", {"type": "xavier_normal"}]
+     ]
+   },
+  "iterator": {
+    "type": "bucket",
+    "sorting_keys": [["premise", "num_tokens"]],
+    "batch_size": 64
+  },
+
+  "trainer": {
+    "num_epochs": 140,
+    "patience": 20,
+    "cuda_device": 0,
+    "grad_clipping": 5.0,
+    "validation_metric": "+accuracy",
+    "optimizer": {
+      "type": "adagrad"
+    }
+  }
+}
\ No newline at end of file
diff --git a/swag_baselines/decomposable_attention/train-glove-goldonly-840.json b/swag_baselines/decomposable_attention/train-glove-goldonly-840.json
new file mode 100644
index 0000000..47f6288
--- /dev/null
+++ b/swag_baselines/decomposable_attention/train-glove-goldonly-840.json
@@ -0,0 +1,73 @@
+{
+  "dataset_reader": {
+    "type": "swag",
+    "token_indexers": {
+      "tokens": {
+        "type": "single_id",
+        "lowercase_tokens": true
+      }
+    },
+    "tokenizer": {
+      "end_tokens": ["@@NULL@@"]
+    },
+    "use_only_gold_examples": true,
+  },
+  "train_data_path": "../../data/train.csv",
+  "validation_data_path": "../../data/val.csv",
+  "test_data_path": "../../data/test.csv",
+  "evaluate_on_test": true,
+  "model": {
+    "type": "decomposable_attention_swag",
+    "text_field_embedder": {
+      "tokens": {
+        "type": "embedding",
+        "projection_dim": 200,
+        "pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.840B.300d.txt.gz",
+        "embedding_dim": 300,
+        "trainable": false
+      }
+    },
+    "attend_feedforward": {
+      "input_dim": 200,
+      "num_layers": 2,
+      "hidden_dims": 200,
+      "activations": "relu",
+      "dropout": 0.2
+    },
+    "similarity_function": {"type": "dot_product"},
+    "compare_feedforward": {
+      "input_dim": 400,
+      "num_layers": 2,
+      "hidden_dims": 200,
+      "activations": "relu",
+      "dropout": 0.2
+    },
+    "aggregate_feedforward": {
+      "input_dim": 400,
+      "num_layers": 2,
+      "hidden_dims": [200, 1],
+      "activations": ["relu", "linear"],
+      "dropout": [0.2, 0.0]
+    },
+     "initializer": [
+      [".*linear_layers.*weight", {"type": "xavier_normal"}],
+      [".*token_embedder_tokens\._projection.*weight", {"type": "xavier_normal"}]
+     ]
+   },
+  "iterator": {
+    "type": "bucket",
+    "sorting_keys": [["premise", "num_tokens"]],
+    "batch_size": 64
+  },
+
+  "trainer": {
+    "num_epochs": 140,
+    "patience": 20,
+    "cuda_device": 0,
+    "grad_clipping": 5.0,
+    "validation_metric": "+accuracy",
+    "optimizer": {
+      "type": "adagrad"
+    }
+  }
+}
\ No newline at end of file
diff --git a/swag_baselines/decomposable_attention/train-glove-goldonly.json b/swag_baselines/decomposable_attention/train-glove-goldonly.json
new file mode 100644
index 0000000..558e0c6
--- /dev/null
+++ b/swag_baselines/decomposable_attention/train-glove-goldonly.json
@@ -0,0 +1,73 @@
+{
+  "dataset_reader": {
+    "type": "swag",
+    "token_indexers": {
+      "tokens": {
+        "type": "single_id",
+        "lowercase_tokens": true
+      }
+    },
+    "tokenizer": {
+      "end_tokens": ["@@NULL@@"]
+    },
+    "use_only_gold_examples": true,
+  },
+  "train_data_path": "../../data/train.csv",
+  "validation_data_path": "../../data/val.csv",
+  "test_data_path": "../../data/test.csv",
+  "evaluate_on_test": true,
+  "model": {
+    "type": "decomposable_attention_swag",
+    "text_field_embedder": {
+      "tokens": {
+        "type": "embedding",
+        "projection_dim": 200,
+        "pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.300d.txt.gz",
+        "embedding_dim": 300,
+        "trainable": false
+      }
+    },
+    "attend_feedforward": {
+      "input_dim": 200,
+      "num_layers": 2,
+      "hidden_dims": 200,
+      "activations": "relu",
+      "dropout": 0.2
+    },
+    "similarity_function": {"type": "dot_product"},
+    "compare_feedforward": {
+      "input_dim": 400,
+      "num_layers": 2,
+      "hidden_dims": 200,
+      "activations": "relu",
+      "dropout": 0.2
+    },
+    "aggregate_feedforward": {
+      "input_dim": 400,
+      "num_layers": 2,
+      "hidden_dims": [200, 1],
+      "activations": ["relu", "linear"],
+      "dropout": [0.2, 0.0]
+    },
+     "initializer": [
+      [".*linear_layers.*weight", {"type": "xavier_normal"}],
+      [".*token_embedder_tokens\._projection.*weight", {"type": "xavier_normal"}]
+     ]
+   },
+  "iterator": {
+    "type": "bucket",
+    "sorting_keys": [["premise", "num_tokens"]],
+    "batch_size": 64
+  },
+
+  "trainer": {
+    "num_epochs": 140,
+    "patience": 20,
+    "cuda_device": 0,
+    "grad_clipping": 5.0,
+    "validation_metric": "+accuracy",
+    "optimizer": {
+      "type": "adagrad"
+    }
+  }
+}
\ No newline at end of file
diff --git a/swag_baselines/decomposable_attention/train-glove.json b/swag_baselines/decomposable_attention/train-glove.json
new file mode 100644
index 0000000..19b663c
--- /dev/null
+++ b/swag_baselines/decomposable_attention/train-glove.json
@@ -0,0 +1,73 @@
+{
+  "dataset_reader": {
+    "type": "swag",
+    "token_indexers": {
+      "tokens": {
+        "type": "single_id",
+        "lowercase_tokens": true
+      }
+    },
+    "tokenizer": {
+      "end_tokens": ["@@NULL@@"]
+    },
+    "use_only_gold_examples": false,
+  },
+  "train_data_path": "../../data/train.csv",
+  "validation_data_path": "../../data/val.csv",
+  "test_data_path": "../../data/test.csv",
+  "evaluate_on_test": true,
+  "model": {
+    "type": "decomposable_attention_swag",
+    "text_field_embedder": {
+      "tokens": {
+        "type": "embedding",
+        "projection_dim": 200,
+        "pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.6B.300d.txt.gz",
+        "embedding_dim": 300,
+        "trainable": false
+      }
+    },
+    "attend_feedforward": {
+      "input_dim": 200,
+      "num_layers": 2,
+      "hidden_dims": 200,
+      "activations": "relu",
+      "dropout": 0.2
+    },
+    "similarity_function": {"type": "dot_product"},
+    "compare_feedforward": {
+      "input_dim": 400,
+      "num_layers": 2,
+      "hidden_dims": 200,
+      "activations": "relu",
+      "dropout": 0.2
+    },
+    "aggregate_feedforward": {
+      "input_dim": 400,
+      "num_layers": 2,
+      "hidden_dims": [200, 1],
+      "activations": ["relu", "linear"],
+      "dropout": [0.2, 0.0]
+    },
+     "initializer": [
+      [".*linear_layers.*weight", {"type": "xavier_normal"}],
+      [".*token_embedder_tokens\._projection.*weight", {"type": "xavier_normal"}]
+     ]
+   },
+  "iterator": {
+    "type": "bucket",
+    "sorting_keys": [["premise", "num_tokens"]],
+    "batch_size": 64
+  },
+
+  "trainer": {
+    "num_epochs": 140,
+    "patience": 20,
+    "cuda_device": 0,
+    "grad_clipping": 5.0,
+    "validation_metric": "+accuracy",
+    "optimizer": {
+      "type": "adagrad"
+    }
+  }
+}
\ No newline at end of file
diff --git a/swag_baselines/decomposable_attention/train-numberbatch-goldonly.json b/swag_baselines/decomposable_attention/train-numberbatch-goldonly.json
new file mode 100644
index 0000000..0bf859f
--- /dev/null
+++ b/swag_baselines/decomposable_attention/train-numberbatch-goldonly.json
@@ -0,0 +1,73 @@
+{
+  "dataset_reader": {
+    "type": "swag",
+    "token_indexers": {
+      "tokens": {
+        "type": "single_id",
+        "lowercase_tokens": true
+      }
+    },
+    "tokenizer": {
+      "end_tokens": ["@@NULL@@"]
+    },
+    "use_only_gold_examples": true,
+  },
+  "train_data_path": "../../data/train.csv",
+  "validation_data_path": "../../data/val.csv",
+  "test_data_path": "../../data/test.csv",
+  "evaluate_on_test": true,
+  "model": {
+    "type": "decomposable_attention_swag",
+    "text_field_embedder": {
+      "tokens": {
+        "type": "embedding",
+        "projection_dim": 200,
+        "pretrained_file": "https://conceptnet.s3.amazonaws.com/downloads/2017/numberbatch/numberbatch-en-17.06.txt.gz",
+        "embedding_dim": 300,
+        "trainable": false
+      }
+    },
+    "attend_feedforward": {
+      "input_dim": 200,
+      "num_layers": 2,
+      "hidden_dims": 200,
+      "activations": "relu",
+      "dropout": 0.2
+    },
+    "similarity_function": {"type": "dot_product"},
+    "compare_feedforward": {
+      "input_dim": 400,
+      "num_layers": 2,
+      "hidden_dims": 200,
+      "activations": "relu",
+      "dropout": 0.2
+    },
+    "aggregate_feedforward": {
+      "input_dim": 400,
+      "num_layers": 2,
+      "hidden_dims": [200, 1],
+      "activations": ["relu", "linear"],
+      "dropout": [0.2, 0.0]
+    },
+     "initializer": [
+      [".*linear_layers.*weight", {"type": "xavier_normal"}],
+      [".*token_embedder_tokens\._projection.*weight", {"type": "xavier_normal"}]
+     ]
+   },
+  "iterator": {
+    "type": "bucket",
+    "sorting_keys": [["premise", "num_tokens"]],
+    "batch_size": 64
+  },
+
+  "trainer": {
+    "num_epochs": 140,
+    "patience": 20,
+    "cuda_device": 0,
+    "grad_clipping": 5.0,
+    "validation_metric": "+accuracy",
+    "optimizer": {
+      "type": "adagrad"
+    }
+  }
+}
\ No newline at end of file
diff --git a/swag_baselines/decomposable_attention/train-numberbatch.json b/swag_baselines/decomposable_attention/train-numberbatch.json
new file mode 100644
index 0000000..8ec0f83
--- /dev/null
+++ b/swag_baselines/decomposable_attention/train-numberbatch.json
@@ -0,0 +1,74 @@
+{
+  "dataset_reader": {
+    "type": "swag",
+    "token_indexers": {
+      "tokens": {
+        "type": "single_id",
+        "lowercase_tokens": true
+      }
+    },
+    "tokenizer": {
+      "end_tokens": ["@@NULL@@"]
+    },
+    "use_only_gold_examples": false,
+  },
+  "train_data_path": "../../data/train.csv",
+  "validation_data_path": "../../data/val.csv",
+  "test_data_path": "../../data/test.csv",
+  "evaluate_on_test": true,
+  "model": {
+    "type": "decomposable_attention_swag",
+    "text_field_embedder": {
+      "tokens": {
+        "type": "embedding",
+        "projection_dim": 200,
+        "pretrained_file": "https://conceptnet.s3.amazonaws.com/downloads/2017/numberbatch/numberbatch-en-17.06.txt.gz",
+        "embedding_dim": 300,
+        "trainable": false
+      }
+    },
+    "attend_feedforward": {
+      "input_dim": 200,
+      "num_layers": 2,
+      "hidden_dims": 200,
+      "activations": "relu",
+      "dropout": 0.2
+    },
+    "similarity_function": {"type": "dot_product"},
+    "compare_feedforward": {
+      "input_dim": 400,
+      "num_layers": 2,
+      "hidden_dims": 200,
+      "activations": "relu",
+      "dropout": 0.2
+    },
+    "aggregate_feedforward": {
+      "input_dim": 400,
+      "num_layers": 2,
+      "hidden_dims": [200, 1],
+      "activations": ["relu", "linear"],
+      "dropout": [0.2, 0.0]
+    },
+     "initializer": [
+      [".*linear_layers.*weight", {"type": "xavier_normal"}],
+      [".*token_embedder_tokens\._projection.*weight", {"type": "xavier_normal"}]
+     ]
+   },
+  "iterator": {
+    "type": "bucket",
+    "sorting_keys": [["premise", "num_tokens"]],
+    "batch_size": 64
+  },
+
+  "trainer": {
+    "num_epochs": 140,
+    "patience": 20,
+    "cuda_device": 0,
+    "grad_clipping": 1.0,
+    "validation_metric": "+accuracy",
+    "optimizer": {
+        "type": "adam",
+        "lr": 0.001
+    },
+  }
+}
\ No newline at end of file
diff --git a/swag_baselines/esim/dataset_reader.py b/swag_baselines/esim/dataset_reader.py
index 602f190..f5875d8 100644
--- a/swag_baselines/esim/dataset_reader.py
+++ b/swag_baselines/esim/dataset_reader.py
@@ -50,12 +50,12 @@ def __init__(self,
     def _read(self, file_path: str):
         if not USE_S1:
             logger.warning("We're splitting the sentences up here!!!! WATCH OUT\n\n\n")
-        cnli = pd.read_csv(file_path)
+        swag = pd.read_csv(file_path)
 
         if self.use_only_gold_examples and file_path.endswith('train.csv'):
-            cnli = cnli[cnli['gold-source'].str.startswith('gold')]
+            swag = swag[swag['gold-source'].str.startswith('gold')]
 
-        for _, row in cnli.iterrows():
+        for _, row in swag.iterrows():
             if USE_S1:
                 premise = row['sent1']
                 endings = [row['ending{}'.format(i)] for i in range(4)]
diff --git a/swag_baselines/unarylstm/__init__.py b/swag_baselines/unarylstm/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/swag_baselines/unarylstm/dataset_reader.py b/swag_baselines/unarylstm/dataset_reader.py
new file mode 100644
index 0000000..ce043e8
--- /dev/null
+++ b/swag_baselines/unarylstm/dataset_reader.py
@@ -0,0 +1,99 @@
+# slightly different from the other dataset reader
+
+from typing import Dict, List
+import json
+import logging
+
+from overrides import overrides
+
+from allennlp.common import Params
+from allennlp.common.file_utils import cached_path
+from allennlp.data.dataset_readers.dataset_reader import DatasetReader
+from allennlp.data.fields import Field, TextField, LabelField
+from allennlp.data.instance import Instance
+from allennlp.data.token_indexers import SingleIdTokenIndexer, TokenIndexer
+from allennlp.data.tokenizers import Tokenizer, WordTokenizer
+import pandas as pd
+import numpy as np
+
+logger = logging.getLogger(__name__)  # pylint: disable=invalid-name
+
+
+
+@DatasetReader.register("swag")
+class SwagReader(DatasetReader):
+    """
+    Reads a file from the Stanford Natural Language Inference (SNLI) dataset.  This data is
+    formatted as jsonl, one json-formatted instance per line.  The keys in the data are
+    "gold_label", "sentence1", and "sentence2".  We convert these keys into fields named "label",
+    "premise" and "hypothesis".
+
+    Parameters
+    ----------
+    tokenizer : ``Tokenizer``, optional (default=``WordTokenizer()``)
+        We use this ``Tokenizer`` for both the premise and the hypothesis.  See :class:`Tokenizer`.
+    token_indexers : ``Dict[str, TokenIndexer]``, optional (default=``{"tokens": SingleIdTokenIndexer()}``)
+        We similarly use this for both the premise and the hypothesis.  See :class:`TokenIndexer`.
+    """
+
+    def __init__(self,
+                 tokenizer: Tokenizer = None,
+                 token_indexers: Dict[str, TokenIndexer] = None,
+                 use_only_gold_examples: bool = False,
+                 only_end: bool = False) -> None:
+        super().__init__(lazy=False)
+        self._tokenizer = tokenizer or WordTokenizer()
+        self._token_indexers = token_indexers or {'tokens': SingleIdTokenIndexer()}
+        self.use_only_gold_examples = use_only_gold_examples
+        self.only_end = only_end
+
+    @overrides
+    def _read(self, file_path: str):
+
+        swag = pd.read_csv(file_path)
+
+        if self.use_only_gold_examples and file_path.endswith('train.csv'):
+            swag = swag[swag['gold-source'].str.startswith('gold')]
+
+        for _, row in swag.iterrows():
+            premise = row['sent1']
+            endings = [row['ending{}'.format(i)] for i in range(4)]
+
+            if self.only_end:
+                # NOTE: we're JUST USING THE ENDING HERE, so hope that's what you intend
+                hypos = endings
+            else:
+                hypos = ['{} {} {}'.format(row['sent1'], row['sent2'], end) for end in endings]
+
+            yield self.text_to_instance(premise, hypos, label=row['label'] if hasattr(row, 'label') else None)
+
+    @overrides
+    def text_to_instance(self,  # type: ignore
+                         premise: str,
+                         hypotheses: List[str],
+                         label: int = None) -> Instance:
+        # pylint: disable=arguments-differ
+        fields: Dict[str, Field] = {}
+        # premise_tokens = self._tokenizer.tokenize(premise)
+        # fields['premise'] = TextField(premise_tokens, self._token_indexers)
+
+        # This could be another way to get randomness
+        for i, hyp in enumerate(hypotheses):
+            hypothesis_tokens = self._tokenizer.tokenize(hyp)
+            fields['hypothesis{}'.format(i)] = TextField(hypothesis_tokens, self._token_indexers)
+
+        if label is not None:
+            fields['label'] = LabelField(label, skip_indexing=True)
+        return Instance(fields)
+
+    @classmethod
+    def from_params(cls, params: Params) -> 'SwagReader':
+        tokenizer = Tokenizer.from_params(params.pop('tokenizer', {}))
+        token_indexers = TokenIndexer.dict_from_params(params.pop('token_indexers', {}))
+        use_only_gold_examples = params.pop('use_only_gold_examples', False)
+        only_end = params.pop('only_end', False)
+        params.assert_empty(cls.__name__)
+        return cls(tokenizer=tokenizer,
+                   token_indexers=token_indexers,
+                   use_only_gold_examples=use_only_gold_examples,
+                   only_end=only_end)
diff --git a/swag_baselines/unarylstm/lstm_swag.py b/swag_baselines/unarylstm/lstm_swag.py
new file mode 100644
index 0000000..30222fe
--- /dev/null
+++ b/swag_baselines/unarylstm/lstm_swag.py
@@ -0,0 +1,143 @@
+from typing import Dict, List, TextIO, Optional
+
+from overrides import overrides
+import torch
+from torch.nn.modules import Linear, Dropout
+import torch.nn.functional as F
+
+from allennlp.common import Params
+from allennlp.common.checks import check_dimensions_match
+from allennlp.data import Vocabulary
+from allennlp.modules import Seq2SeqEncoder, TimeDistributed, TextFieldEmbedder
+from allennlp.modules.token_embedders import Embedding, ElmoTokenEmbedder
+from allennlp.models.model import Model
+from allennlp.nn import InitializerApplicator, RegularizerApplicator
+from allennlp.nn.util import get_text_field_mask, sequence_cross_entropy_with_logits
+from allennlp.nn.util import get_lengths_from_binary_sequence_mask, viterbi_decode
+from allennlp.training.metrics import SpanBasedF1Measure
+from allennlp.training.metrics import CategoricalAccuracy
+
+@Model.register("lstm_swag")
+class LstmSwag(Model):
+    """
+    This model performs semantic role labeling using BIO tags using Propbank semantic roles.
+    Specifically, it is an implmentation of `Deep Semantic Role Labeling - What works
+    and what's next <https://homes.cs.washington.edu/~luheng/files/acl2017_hllz.pdf>`_ .
+
+    This implementation is effectively a series of stacked interleaved LSTMs with highway
+    connections, applied to embedded sequences of words concatenated with a binary indicator
+    containing whether or not a word is the verbal predicate to generate predictions for in
+    the sentence. Additionally, during inference, Viterbi decoding is applied to constrain
+    the predictions to contain valid BIO sequences.
+
+    Parameters
+    ----------
+    vocab : ``Vocabulary``, required
+        A Vocabulary, required in order to compute sizes for input/output projections.
+    text_field_embedder : ``TextFieldEmbedder``, required
+        Used to embed the ``tokens`` ``TextField`` we get as input to the model.
+    encoder : ``Seq2SeqEncoder``
+        The encoder (with its own internal stacking) that we will use in between embedding tokens
+        and predicting output tags.
+    binary_feature_dim : int, required.
+        The dimensionality of the embedding of the binary verb predicate features.
+    initializer : ``InitializerApplicator``, optional (default=``InitializerApplicator()``)
+        Used to initialize the model parameters.
+    regularizer : ``RegularizerApplicator``, optional (default=``None``)
+        If provided, will be used to calculate the regularization penalty during training.
+    label_smoothing : ``float``, optional (default = 0.0)
+        Whether or not to use label smoothing on the labels when computing cross entropy loss.
+    """
+    def __init__(self, vocab: Vocabulary,
+                 text_field_embedder: TextFieldEmbedder,
+                 encoder: Seq2SeqEncoder,
+                 # binary_feature_dim: int,
+                 embedding_dropout: float = 0.0,
+                 initializer: InitializerApplicator = InitializerApplicator(),
+                 regularizer: Optional[RegularizerApplicator] = None) -> None:
+        super(LstmSwag, self).__init__(vocab, regularizer)
+
+        self.text_field_embedder = text_field_embedder
+
+        # For the span based evaluation, we don't want to consider labels
+        # for verb, because the verb index is provided to the model.
+        self.encoder = encoder
+        self.embedding_dropout = Dropout(p=embedding_dropout)
+        self.output_prediction = Linear(self.encoder.get_output_dim(), 1, bias=False)
+
+        check_dimensions_match(text_field_embedder.get_output_dim(),
+                               encoder.get_input_dim(),
+                               "text embedding dim", "eq encoder input dim")
+        self._accuracy = CategoricalAccuracy()
+        self._loss = torch.nn.CrossEntropyLoss()
+
+        initializer(self)
+
+    def forward(self,  # type: ignore
+                hypothesis0: Dict[str, torch.LongTensor],
+                hypothesis1: Dict[str, torch.LongTensor],
+                hypothesis2: Dict[str, torch.LongTensor],
+                hypothesis3: Dict[str, torch.LongTensor],
+                label: torch.IntTensor = None,
+                ) -> Dict[str, torch.Tensor]:
+        # pylint: disable=arguments-differ
+        """
+        Parameters
+        ----------
+        Returns
+        -------
+        An output dictionary consisting of:
+        logits : torch.FloatTensor
+            A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing
+            unnormalised log probabilities of the tag classes.
+        class_probabilities : torch.FloatTensor
+            A tensor of shape ``(batch_size, num_tokens, tag_vocab_size)`` representing
+            a distribution of the tag classes per word.
+        loss : torch.FloatTensor, optional
+            A scalar loss to be optimised.
+
+        """
+        logits = []
+        for tokens in [hypothesis0, hypothesis1, hypothesis2, hypothesis3]:
+            if isinstance(self.text_field_embedder, ElmoTokenEmbedder):
+                self.text_field_embedder._elmo._elmo_lstm._elmo_lstm.reset_states()
+
+            embedded_text_input = self.embedding_dropout(self.text_field_embedder(tokens))
+            mask = get_text_field_mask(tokens)
+
+            batch_size, sequence_length, _ = embedded_text_input.size()
+
+            encoded_text = self.encoder(embedded_text_input, mask)
+
+            logits.append(self.output_prediction(encoded_text.max(1)[0]))
+
+        logits = torch.cat(logits, -1)
+        class_probabilities = F.softmax(logits, dim=-1).view([batch_size, 4])
+        output_dict = {"label_logits": logits, "label_probs": class_probabilities}
+
+        if label is not None:
+            loss = self._loss(logits, label.long().view(-1))
+            self._accuracy(logits, label.squeeze(-1))
+            output_dict["loss"] = loss
+
+        return output_dict
+
+    def get_metrics(self, reset: bool = False) -> Dict[str, float]:
+        return {
+                'accuracy': self._accuracy.get_metric(reset),
+                }
+
+    @classmethod
+    def from_params(cls, vocab: Vocabulary, params: Params) -> 'LstmSwag':
+        embedder_params = params.pop("text_field_embedder")
+        text_field_embedder = TextFieldEmbedder.from_params(vocab, embedder_params)
+        encoder = Seq2SeqEncoder.from_params(params.pop("encoder"))
+
+        initializer = InitializerApplicator.from_params(params.pop('initializer', []))
+        regularizer = RegularizerApplicator.from_params(params.pop('regularizer', []))
+        params.assert_empty(cls.__name__)
+        return cls(vocab=vocab,
+                   text_field_embedder=text_field_embedder,
+                   encoder=encoder,
+                   initializer=initializer,
+                   regularizer=regularizer)
diff --git a/swag_baselines/unarylstm/run_experiments.sh b/swag_baselines/unarylstm/run_experiments.sh
new file mode 100644
index 0000000..424ed50
--- /dev/null
+++ b/swag_baselines/unarylstm/run_experiments.sh
@@ -0,0 +1,21 @@
+#!/usr/bin/env bash
+
+export CUDA_VISIBLE_DEVICES=$1
+
+if [ $1 == "0" ]; then
+    echo "fuck! LSTM Numberbatch"
+    python -m allennlp.run train train-lstmbasic-numberbatch.json -s tmp/lstmbasicnumberbatch --include-package swag_baselines.unarylstm
+    echo "fuck! LSTM NUMBERBATCH GOLD ONLY!!!!"
+    python -m allennlp.run train train-lstmbasic-numberbatch-goldonly.json -s tmp/lstmbasicnumberbatchall --include-package swag_baselines.unarylstm
+elif [ $1 == "1" ]; then
+    echo "fuck! LSTM GloVe"
+    python -m allennlp.run train train-lstmbasic-glove.json -s tmp/lstmbasicglove --include-package swag_baselines.unarylstm
+    echo "fuck! LSTM GLOVE GOLD ONLY!!!!"
+    python -m allennlp.run train train-lstmbasic-glove-goldonly.json -s tmp/lstmbasicgloveall --include-package swag_baselines.unarylstm
+elif [ $1 == "2" ]; then
+    echo "fuck! LSTM Elmo"
+    python -m allennlp.run train train-lstmbasic-elmo.json -s tmp/lstmbasicelmo --include-package swag_baselines.unarylstm
+    echo "fuck! LSTM ELMO GOLD ONLY!!!!"
+    python -m allennlp.run train train-lstmbasic-elmo-goldonly.json -s tmp/lstmbasicelmoall --include-package swag_baselines.unarylstm
+fi
+
diff --git a/swag_baselines/unarylstm/run_experiments_ending.sh b/swag_baselines/unarylstm/run_experiments_ending.sh
new file mode 100644
index 0000000..6b0a083
--- /dev/null
+++ b/swag_baselines/unarylstm/run_experiments_ending.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+
+export CUDA_VISIBLE_DEVICES=$1
+
+echo "ONLY ENDING!!!!"
+if [ $1 == "0" ]; then
+    echo "fuck! LSTM Numberbatch"
+    python -m allennlp.run train train-lstmbasic-numberbatch-endingonly.json -s tmp/lstmbasicnumberbatch2 --include-package swag_baselines.unarylstm
+    echo "fuck! LSTM NUMBERBATCH GOLD ONLY!!!!"
+    python -m allennlp.run train train-lstmbasic-numberbatch-goldonly-endingonly.json -s tmp/lstmbasicnumberbatchgold2 --include-package swag_baselines.unarylstm
+elif [ $1 == "1" ]; then
+    echo "fuck! LSTM GloVe"
+    python -m allennlp.run train train-lstmbasic-glove-endingonly.json -s tmp/lstmbasicglove2 --include-package swag_baselines.unarylstm
+    echo "fuck! LSTM GLOVE GOLD ONLY!!!!"
+    python -m allennlp.run train train-lstmbasic-glove-goldonly-endingonly.json -s tmp/lstmbasicglovegold2 --include-package swag_baselines.unarylstm
+elif [ $1 == "2" ]; then
+    echo "fuck! LSTM Elmo"
+    python -m allennlp.run train train-lstmbasic-elmo-endingonly.json -s tmp/lstmbasicelmo2 --include-package swag_baselines.unarylstm
+    echo "fuck! LSTM ELMO GOLD ONLY!!!!"
+    python -m allennlp.run train train-lstmbasic-elmo-goldonly-endingonly.json -s tmp/lstmbasicelmogold2 --include-package swag_baselines.unarylstm
+fi
+
diff --git a/swag_baselines/unarylstm/train-cnn.json b/swag_baselines/unarylstm/train-cnn.json
new file mode 100644
index 0000000..42b2af9
--- /dev/null
+++ b/swag_baselines/unarylstm/train-cnn.json
@@ -0,0 +1,59 @@
+{
+  "dataset_reader": {
+    "type": "swag",
+    "token_indexers": {
+      "tokens": {
+        "type": "single_id",
+        "lowercase_tokens": false
+      }
+    },
+    "use_only_gold_examples": false,
+    "only_end": false
+  },
+  "train_data_path": "../../data/train.csv",
+  "validation_data_path": "../../data/val.csv",
+  "test_data_path": "../../data/test.csv",
+  "evaluate_on_test": true,
+  "model": {
+    "type": "lstm_swag",
+    "text_field_embedder": {
+      "tokens": {
+        "type": "embedding",
+        "pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.840B.300d.txt.gz",
+        "embedding_dim": 300,
+        "trainable": false
+      }
+    },
+    "encoder": {
+      "type": "cnn",
+      "conv_layer_activation": "relu",
+      "embedding_dim": 300,
+//      "output_dim": 1024,
+      "num_filters": 128,
+      "ngram_filter_sizes": [2,3,4,5],
+    }
+   },
+  "iterator": {
+    "type": "bucket",
+    "sorting_keys": [["hypothesis1", "num_tokens"]],
+    "batch_size": 32
+  },
+  "trainer": {
+    "optimizer": {
+        "type": "adam",
+        "lr": 0.001
+    },
+    "validation_metric": "+accuracy",
+    "num_serialized_models_to_keep": 2,
+    "num_epochs": 75,
+    "grad_norm": 1.0,
+    "patience": 5,
+    "cuda_device": 0,
+    "learning_rate_scheduler": {
+      "type": "reduce_on_plateau",
+      "factor": 0.5,
+      "mode": "max",
+      "patience": 0
+    }
+  }
+}
\ No newline at end of file
diff --git a/swag_baselines/unarylstm/train-lstmbasic-elmo-endingonly.json b/swag_baselines/unarylstm/train-lstmbasic-elmo-endingonly.json
new file mode 100644
index 0000000..7b614b1
--- /dev/null
+++ b/swag_baselines/unarylstm/train-lstmbasic-elmo-endingonly.json
@@ -0,0 +1,58 @@
+{
+  "dataset_reader": {
+    "type": "swag",
+    "token_indexers": {
+      "elmo": {
+        "type": "elmo_characters"
+      }
+    },
+    "use_only_gold_examples": false,
+    "only_end": true
+  },
+  "train_data_path": "../../data/train.csv",
+  "validation_data_path": "../../data/val.csv",
+  "test_data_path": "../../data/test.csv",
+  "evaluate_on_test": true,
+  "model": {
+    "type": "lstm_swag",
+    "text_field_embedder": {
+      "elmo":{
+        "type": "elmo_token_embedder",
+       "options_file": "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json",
+       "weight_file": "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5",
+        "do_layer_norm": false,
+        "dropout": 0.5
+      }
+    },
+    "encoder": {
+      "type": "lstm",
+      "input_size": 1024,
+      "hidden_size": 300,
+      "num_layers": 2,
+      "bidirectional": true
+    }
+   },
+  "iterator": {
+    "type": "bucket",
+    "sorting_keys": [["hypothesis1", "num_tokens"]],
+    "batch_size": 32
+  },
+  "trainer": {
+    "optimizer": {
+        "type": "adam",
+        "lr": 0.001
+    },
+    "validation_metric": "+accuracy",
+    "num_serialized_models_to_keep": 2,
+    "num_epochs": 75,
+    "grad_norm": 1.0,
+    "patience": 5,
+    "cuda_device": 0,
+    "learning_rate_scheduler": {
+      "type": "reduce_on_plateau",
+      "factor": 0.5,
+      "mode": "max",
+      "patience": 0
+    }
+  }
+}
diff --git a/swag_baselines/unarylstm/train-lstmbasic-elmo-goldonly-endingonly.json b/swag_baselines/unarylstm/train-lstmbasic-elmo-goldonly-endingonly.json
new file mode 100644
index 0000000..f9b9471
--- /dev/null
+++ b/swag_baselines/unarylstm/train-lstmbasic-elmo-goldonly-endingonly.json
@@ -0,0 +1,58 @@
+{
+  "dataset_reader": {
+    "type": "swag",
+    "token_indexers": {
+      "elmo": {
+        "type": "elmo_characters"
+      }
+    },
+    "use_only_gold_examples": true,
+    "only_end": true,
+  },
+  "train_data_path": "../../data/train.csv",
+  "validation_data_path": "../../data/val.csv",
+  "test_data_path": "../../data/test.csv",
+  "evaluate_on_test": true,
+  "model": {
+    "type": "lstm_swag",
+    "text_field_embedder": {
+      "elmo":{
+        "type": "elmo_token_embedder",
+       "options_file": "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json",
+       "weight_file": "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5",
+        "do_layer_norm": false,
+        "dropout": 0.5
+      }
+    },
+    "encoder": {
+      "type": "lstm",
+      "input_size": 1024,
+      "hidden_size": 300,
+      "num_layers": 2,
+      "bidirectional": true
+    }
+   },
+  "iterator": {
+    "type": "bucket",
+    "sorting_keys": [["hypothesis1", "num_tokens"]],
+    "batch_size": 32
+  },
+  "trainer": {
+    "optimizer": {
+        "type": "adam",
+        "lr": 0.001
+    },
+    "validation_metric": "+accuracy",
+    "num_serialized_models_to_keep": 2,
+    "num_epochs": 75,
+    "grad_norm": 1.0,
+    "patience": 5,
+    "cuda_device": 0,
+    "learning_rate_scheduler": {
+      "type": "reduce_on_plateau",
+      "factor": 0.5,
+      "mode": "max",
+      "patience": 0
+    }
+  }
+}
diff --git a/swag_baselines/unarylstm/train-lstmbasic-elmo-goldonly.json b/swag_baselines/unarylstm/train-lstmbasic-elmo-goldonly.json
new file mode 100644
index 0000000..1877367
--- /dev/null
+++ b/swag_baselines/unarylstm/train-lstmbasic-elmo-goldonly.json
@@ -0,0 +1,58 @@
+{
+  "dataset_reader": {
+    "type": "swag",
+    "token_indexers": {
+      "elmo": {
+        "type": "elmo_characters"
+      }
+    },
+    "use_only_gold_examples": true,
+    "only_end": false,
+  },
+  "train_data_path": "../../data/train.csv",
+  "validation_data_path": "../../data/val.csv",
+  "test_data_path": "../../data/test.csv",
+  "evaluate_on_test": true,
+  "model": {
+    "type": "lstm_swag",
+    "text_field_embedder": {
+      "elmo":{
+        "type": "elmo_token_embedder",
+       "options_file": "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json",
+       "weight_file": "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5",
+        "do_layer_norm": false,
+        "dropout": 0.5
+      }
+    },
+    "encoder": {
+      "type": "lstm",
+      "input_size": 1024,
+      "hidden_size": 300,
+      "num_layers": 2,
+      "bidirectional": true
+    }
+   },
+  "iterator": {
+    "type": "bucket",
+    "sorting_keys": [["hypothesis1", "num_tokens"]],
+    "batch_size": 32
+  },
+  "trainer": {
+    "optimizer": {
+        "type": "adam",
+        "lr": 0.001
+    },
+    "validation_metric": "+accuracy",
+    "num_serialized_models_to_keep": 2,
+    "num_epochs": 75,
+    "grad_norm": 1.0,
+    "patience": 5,
+    "cuda_device": 0,
+    "learning_rate_scheduler": {
+      "type": "reduce_on_plateau",
+      "factor": 0.5,
+      "mode": "max",
+      "patience": 0
+    }
+  }
+}
diff --git a/swag_baselines/unarylstm/train-lstmbasic-elmo.json b/swag_baselines/unarylstm/train-lstmbasic-elmo.json
new file mode 100644
index 0000000..c247a55
--- /dev/null
+++ b/swag_baselines/unarylstm/train-lstmbasic-elmo.json
@@ -0,0 +1,58 @@
+{
+  "dataset_reader": {
+    "type": "swag",
+    "token_indexers": {
+      "elmo": {
+        "type": "elmo_characters"
+      }
+    },
+    "use_only_gold_examples": false,
+    "only_end": false,
+  },
+  "train_data_path": "../../data/train.csv",
+  "validation_data_path": "../../data/val.csv",
+  "test_data_path": "../../data/test.csv",
+  "evaluate_on_test": true,
+  "model": {
+    "type": "lstm_swag",
+    "text_field_embedder": {
+      "elmo":{
+        "type": "elmo_token_embedder",
+       "options_file": "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json",
+       "weight_file": "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5",
+        "do_layer_norm": false,
+        "dropout": 0.5
+      }
+    },
+    "encoder": {
+      "type": "lstm",
+      "input_size": 1024,
+      "hidden_size": 300,
+      "num_layers": 2,
+      "bidirectional": true
+    }
+   },
+  "iterator": {
+    "type": "bucket",
+    "sorting_keys": [["hypothesis1", "num_tokens"]],
+    "batch_size": 32
+  },
+  "trainer": {
+    "optimizer": {
+        "type": "adam",
+        "lr": 0.001
+    },
+    "validation_metric": "+accuracy",
+    "num_serialized_models_to_keep": 2,
+    "num_epochs": 75,
+    "grad_norm": 1.0,
+    "patience": 5,
+    "cuda_device": 0,
+    "learning_rate_scheduler": {
+      "type": "reduce_on_plateau",
+      "factor": 0.5,
+      "mode": "max",
+      "patience": 0
+    }
+  }
+}
diff --git a/swag_baselines/unarylstm/train-lstmbasic-glove-endingonly.json b/swag_baselines/unarylstm/train-lstmbasic-glove-endingonly.json
new file mode 100644
index 0000000..2b79bdc
--- /dev/null
+++ b/swag_baselines/unarylstm/train-lstmbasic-glove-endingonly.json
@@ -0,0 +1,58 @@
+{
+  "dataset_reader": {
+    "type": "swag",
+    "token_indexers": {
+      "tokens": {
+        "type": "single_id",
+        "lowercase_tokens": true
+      }
+    },
+    "use_only_gold_examples": false,
+    "only_end": true
+  },
+  "train_data_path": "../../data/train.csv",
+  "validation_data_path": "../../data/val.csv",
+  "test_data_path": "../../data/test.csv",
+  "evaluate_on_test": true,
+  "model": {
+    "type": "lstm_swag",
+    "text_field_embedder": {
+      "tokens": {
+        "type": "embedding",
+        "pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.840B.300d.txt.gz",
+        "embedding_dim": 300,
+        "trainable": false
+      }
+    },
+    "encoder": {
+      "type": "lstm",
+      "input_size": 300,
+      "hidden_size": 300,
+      "num_layers": 2,
+      "bidirectional": true
+    }
+   },
+  "iterator": {
+    "type": "bucket",
+    "sorting_keys": [["hypothesis1", "num_tokens"]],
+    "batch_size": 32
+  },
+  "trainer": {
+    "optimizer": {
+        "type": "adam",
+        "lr": 0.001
+    },
+    "validation_metric": "+accuracy",
+    "num_serialized_models_to_keep": 2,
+    "num_epochs": 75,
+    "grad_norm": 1.0,
+    "patience": 5,
+    "cuda_device": 0,
+    "learning_rate_scheduler": {
+      "type": "reduce_on_plateau",
+      "factor": 0.5,
+      "mode": "max",
+      "patience": 0
+    }
+  }
+}
diff --git a/swag_baselines/unarylstm/train-lstmbasic-glove-goldonly-endingonly.json b/swag_baselines/unarylstm/train-lstmbasic-glove-goldonly-endingonly.json
new file mode 100644
index 0000000..1bc02b2
--- /dev/null
+++ b/swag_baselines/unarylstm/train-lstmbasic-glove-goldonly-endingonly.json
@@ -0,0 +1,58 @@
+{
+  "dataset_reader": {
+    "type": "swag",
+    "token_indexers": {
+      "tokens": {
+        "type": "single_id",
+        "lowercase_tokens": true
+      }
+    },
+    "use_only_gold_examples": true,
+    "only_end": true
+  },
+  "train_data_path": "../../data/train.csv",
+  "validation_data_path": "../../data/val.csv",
+  "test_data_path": "../../data/test.csv",
+  "evaluate_on_test": true,
+  "model": {
+    "type": "lstm_swag",
+    "text_field_embedder": {
+      "tokens": {
+        "type": "embedding",
+        "pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.840B.300d.txt.gz",
+        "embedding_dim": 300,
+        "trainable": false
+      }
+    },
+    "encoder": {
+      "type": "lstm",
+      "input_size": 300,
+      "hidden_size": 300,
+      "num_layers": 2,
+      "bidirectional": true
+    }
+   },
+  "iterator": {
+    "type": "bucket",
+    "sorting_keys": [["hypothesis1", "num_tokens"]],
+    "batch_size": 32
+  },
+  "trainer": {
+    "optimizer": {
+        "type": "adam",
+        "lr": 0.001
+    },
+    "validation_metric": "+accuracy",
+    "num_serialized_models_to_keep": 2,
+    "num_epochs": 75,
+    "grad_norm": 1.0,
+    "patience": 5,
+    "cuda_device": 0,
+    "learning_rate_scheduler": {
+      "type": "reduce_on_plateau",
+      "factor": 0.5,
+      "mode": "max",
+      "patience": 0
+    }
+  }
+}
diff --git a/swag_baselines/unarylstm/train-lstmbasic-glove-goldonly.json b/swag_baselines/unarylstm/train-lstmbasic-glove-goldonly.json
new file mode 100644
index 0000000..716d6f4
--- /dev/null
+++ b/swag_baselines/unarylstm/train-lstmbasic-glove-goldonly.json
@@ -0,0 +1,58 @@
+{
+  "dataset_reader": {
+    "type": "swag",
+    "token_indexers": {
+      "tokens": {
+        "type": "single_id",
+        "lowercase_tokens": true
+      }
+    },
+    "use_only_gold_examples": true,
+    "only_end": false,
+  },
+  "train_data_path": "../../data/train.csv",
+  "validation_data_path": "../../data/val.csv",
+  "test_data_path": "../../data/test.csv",
+  "evaluate_on_test": true,
+  "model": {
+    "type": "lstm_swag",
+    "text_field_embedder": {
+      "tokens": {
+        "type": "embedding",
+        "pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.840B.300d.txt.gz",
+        "embedding_dim": 300,
+        "trainable": false
+      }
+    },
+    "encoder": {
+      "type": "lstm",
+      "input_size": 300,
+      "hidden_size": 300,
+      "num_layers": 2,
+      "bidirectional": true
+    }
+   },
+  "iterator": {
+    "type": "bucket",
+    "sorting_keys": [["hypothesis1", "num_tokens"]],
+    "batch_size": 32
+  },
+  "trainer": {
+    "optimizer": {
+        "type": "adam",
+        "lr": 0.001
+    },
+    "validation_metric": "+accuracy",
+    "num_serialized_models_to_keep": 2,
+    "num_epochs": 75,
+    "grad_norm": 1.0,
+    "patience": 5,
+    "cuda_device": 0,
+    "learning_rate_scheduler": {
+      "type": "reduce_on_plateau",
+      "factor": 0.5,
+      "mode": "max",
+      "patience": 0
+    }
+  }
+}
diff --git a/swag_baselines/unarylstm/train-lstmbasic-glove.json b/swag_baselines/unarylstm/train-lstmbasic-glove.json
new file mode 100644
index 0000000..0c8ca96
--- /dev/null
+++ b/swag_baselines/unarylstm/train-lstmbasic-glove.json
@@ -0,0 +1,58 @@
+{
+  "dataset_reader": {
+    "type": "swag",
+    "token_indexers": {
+      "tokens": {
+        "type": "single_id",
+        "lowercase_tokens": true
+      }
+    },
+    "use_only_gold_examples": false,
+    "only_end": false,
+  },
+  "train_data_path": "../../data/train.csv",
+  "validation_data_path": "../../data/val.csv",
+  "test_data_path": "../../data/test.csv",
+  "evaluate_on_test": true,
+  "model": {
+    "type": "lstm_swag",
+    "text_field_embedder": {
+      "tokens": {
+        "type": "embedding",
+        "pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.840B.300d.txt.gz",
+        "embedding_dim": 300,
+        "trainable": false
+      }
+    },
+    "encoder": {
+      "type": "lstm",
+      "input_size": 300,
+      "hidden_size": 300,
+      "num_layers": 2,
+      "bidirectional": true
+    }
+   },
+  "iterator": {
+    "type": "bucket",
+    "sorting_keys": [["hypothesis1", "num_tokens"]],
+    "batch_size": 32
+  },
+  "trainer": {
+    "optimizer": {
+        "type": "adam",
+        "lr": 0.001
+    },
+    "validation_metric": "+accuracy",
+    "num_serialized_models_to_keep": 2,
+    "num_epochs": 75,
+    "grad_norm": 1.0,
+    "patience": 5,
+    "cuda_device": 0,
+    "learning_rate_scheduler": {
+      "type": "reduce_on_plateau",
+      "factor": 0.5,
+      "mode": "max",
+      "patience": 0
+    }
+  }
+}
diff --git a/swag_baselines/unarylstm/train-lstmbasic-numberbatch-endingonly.json b/swag_baselines/unarylstm/train-lstmbasic-numberbatch-endingonly.json
new file mode 100644
index 0000000..fdd8b44
--- /dev/null
+++ b/swag_baselines/unarylstm/train-lstmbasic-numberbatch-endingonly.json
@@ -0,0 +1,58 @@
+{
+  "dataset_reader": {
+    "type": "swag",
+    "token_indexers": {
+      "tokens": {
+        "type": "single_id",
+        "lowercase_tokens": true
+      }
+    },
+    "use_only_gold_examples": false,
+    "only_end": true
+  },
+  "train_data_path": "../../data/train.csv",
+  "validation_data_path": "../../data/val.csv",
+  "test_data_path": "../../data/test.csv",
+  "evaluate_on_test": true,
+  "model": {
+    "type": "lstm_swag",
+    "text_field_embedder": {
+      "tokens": {
+        "type": "embedding",
+        "pretrained_file": "https://conceptnet.s3.amazonaws.com/downloads/2017/numberbatch/numberbatch-en-17.06.txt.gz",
+        "embedding_dim": 300,
+        "trainable": false
+      }
+    },
+    "encoder": {
+      "type": "lstm",
+      "input_size": 300,
+      "hidden_size": 300,
+      "num_layers": 2,
+      "bidirectional": true
+    }
+   },
+  "iterator": {
+    "type": "bucket",
+    "sorting_keys": [["hypothesis1", "num_tokens"]],
+    "batch_size": 32
+  },
+  "trainer": {
+    "optimizer": {
+        "type": "adam",
+        "lr": 0.001
+    },
+    "validation_metric": "+accuracy",
+    "num_serialized_models_to_keep": 2,
+    "num_epochs": 75,
+    "grad_norm": 1.0,
+    "patience": 5,
+    "cuda_device": 0,
+    "learning_rate_scheduler": {
+      "type": "reduce_on_plateau",
+      "factor": 0.5,
+      "mode": "max",
+      "patience": 0
+    }
+  }
+}
diff --git a/swag_baselines/unarylstm/train-lstmbasic-numberbatch-goldonly-endingonly.json b/swag_baselines/unarylstm/train-lstmbasic-numberbatch-goldonly-endingonly.json
new file mode 100644
index 0000000..6b9faf7
--- /dev/null
+++ b/swag_baselines/unarylstm/train-lstmbasic-numberbatch-goldonly-endingonly.json
@@ -0,0 +1,58 @@
+{
+  "dataset_reader": {
+    "type": "swag",
+    "token_indexers": {
+      "tokens": {
+        "type": "single_id",
+        "lowercase_tokens": true
+      }
+    },
+    "use_only_gold_examples": true,
+    "only_end": true
+  },
+  "train_data_path": "../../data/train.csv",
+  "validation_data_path": "../../data/val.csv",
+  "test_data_path": "../../data/test.csv",
+  "evaluate_on_test": true,
+  "model": {
+    "type": "lstm_swag",
+    "text_field_embedder": {
+      "tokens": {
+        "type": "embedding",
+        "pretrained_file": "https://conceptnet.s3.amazonaws.com/downloads/2017/numberbatch/numberbatch-en-17.06.txt.gz",
+        "embedding_dim": 300,
+        "trainable": false
+      }
+    },
+    "encoder": {
+      "type": "lstm",
+      "input_size": 300,
+      "hidden_size": 300,
+      "num_layers": 2,
+      "bidirectional": true
+    }
+   },
+  "iterator": {
+    "type": "bucket",
+    "sorting_keys": [["hypothesis1", "num_tokens"]],
+    "batch_size": 32
+  },
+  "trainer": {
+    "optimizer": {
+        "type": "adam",
+        "lr": 0.001
+    },
+    "validation_metric": "+accuracy",
+    "num_serialized_models_to_keep": 2,
+    "num_epochs": 75,
+    "grad_norm": 1.0,
+    "patience": 5,
+    "cuda_device": 0,
+    "learning_rate_scheduler": {
+      "type": "reduce_on_plateau",
+      "factor": 0.5,
+      "mode": "max",
+      "patience": 0
+    }
+  }
+}
diff --git a/swag_baselines/unarylstm/train-lstmbasic-numberbatch-goldonly.json b/swag_baselines/unarylstm/train-lstmbasic-numberbatch-goldonly.json
new file mode 100644
index 0000000..aa682df
--- /dev/null
+++ b/swag_baselines/unarylstm/train-lstmbasic-numberbatch-goldonly.json
@@ -0,0 +1,58 @@
+{
+  "dataset_reader": {
+    "type": "swag",
+    "token_indexers": {
+      "tokens": {
+        "type": "single_id",
+        "lowercase_tokens": true
+      }
+    },
+    "use_only_gold_examples": true,
+    "only_end": false,
+  },
+  "train_data_path": "../../data/train.csv",
+  "validation_data_path": "../../data/val.csv",
+  "test_data_path": "../../data/test.csv",
+  "evaluate_on_test": true,
+  "model": {
+    "type": "lstm_swag",
+    "text_field_embedder": {
+      "tokens": {
+        "type": "embedding",
+        "pretrained_file": "https://conceptnet.s3.amazonaws.com/downloads/2017/numberbatch/numberbatch-en-17.06.txt.gz",
+        "embedding_dim": 300,
+        "trainable": false
+      }
+    },
+    "encoder": {
+      "type": "lstm",
+      "input_size": 300,
+      "hidden_size": 300,
+      "num_layers": 2,
+      "bidirectional": true
+    }
+   },
+  "iterator": {
+    "type": "bucket",
+    "sorting_keys": [["hypothesis1", "num_tokens"]],
+    "batch_size": 32
+  },
+  "trainer": {
+    "optimizer": {
+        "type": "adam",
+        "lr": 0.001
+    },
+    "validation_metric": "+accuracy",
+    "num_serialized_models_to_keep": 2,
+    "num_epochs": 75,
+    "grad_norm": 1.0,
+    "patience": 5,
+    "cuda_device": 0,
+    "learning_rate_scheduler": {
+      "type": "reduce_on_plateau",
+      "factor": 0.5,
+      "mode": "max",
+      "patience": 0
+    }
+  }
+}
diff --git a/swag_baselines/unarylstm/train-lstmbasic-numberbatch.json b/swag_baselines/unarylstm/train-lstmbasic-numberbatch.json
new file mode 100644
index 0000000..c056acb
--- /dev/null
+++ b/swag_baselines/unarylstm/train-lstmbasic-numberbatch.json
@@ -0,0 +1,58 @@
+{
+  "dataset_reader": {
+    "type": "swag",
+    "token_indexers": {
+      "tokens": {
+        "type": "single_id",
+        "lowercase_tokens": true
+      }
+    },
+    "use_only_gold_examples": false,
+    "only_end": false,
+  },
+  "train_data_path": "../../data/train.csv",
+  "validation_data_path": "../../data/val.csv",
+  "test_data_path": "../../data/test.csv",
+  "evaluate_on_test": true,
+  "model": {
+    "type": "lstm_swag",
+    "text_field_embedder": {
+      "tokens": {
+        "type": "embedding",
+        "pretrained_file": "https://conceptnet.s3.amazonaws.com/downloads/2017/numberbatch/numberbatch-en-17.06.txt.gz",
+        "embedding_dim": 300,
+        "trainable": false
+      }
+    },
+    "encoder": {
+      "type": "lstm",
+      "input_size": 300,
+      "hidden_size": 300,
+      "num_layers": 2,
+      "bidirectional": true
+    }
+   },
+  "iterator": {
+    "type": "bucket",
+    "sorting_keys": [["hypothesis1", "num_tokens"]],
+    "batch_size": 32
+  },
+  "trainer": {
+    "optimizer": {
+        "type": "adam",
+        "lr": 0.001
+    },
+    "validation_metric": "+accuracy",
+    "num_serialized_models_to_keep": 2,
+    "num_epochs": 75,
+    "grad_norm": 1.0,
+    "patience": 5,
+    "cuda_device": 0,
+    "learning_rate_scheduler": {
+      "type": "reduce_on_plateau",
+      "factor": 0.5,
+      "mode": "max",
+      "patience": 0
+    }
+  }
+}
diff --git a/swag_baselines/unarylstm/train.json b/swag_baselines/unarylstm/train.json
new file mode 100644
index 0000000..b606b4a
--- /dev/null
+++ b/swag_baselines/unarylstm/train.json
@@ -0,0 +1,58 @@
+{
+  "dataset_reader": {
+    "type": "swag",
+    "token_indexers": {
+      "tokens": {
+        "type": "single_id",
+        "lowercase_tokens": false
+      }
+    },
+    "use_only_gold_examples": false,
+    "only_end": false,
+  },
+  "train_data_path": "../../data/train.csv",
+  "validation_data_path": "../../data/val.csv",
+  "test_data_path": "../../data/test.csv",
+  "evaluate_on_test": true,
+  "model": {
+    "type": "lstm_swag",
+    "text_field_embedder": {
+      "tokens": {
+        "type": "embedding",
+        "pretrained_file": "https://s3-us-west-2.amazonaws.com/allennlp/datasets/glove/glove.840B.300d.txt.gz",
+        "embedding_dim": 300,
+        "trainable": false
+      }
+    },
+    "encoder": {
+      "type": "lstm",
+      "input_size": 300,
+      "hidden_size": 300,
+      "num_layers": 2,
+      "bidirectional": true,
+    }
+   },
+  "iterator": {
+    "type": "bucket",
+    "sorting_keys": [["hypothesis1", "num_tokens"]],
+    "batch_size": 32
+  },
+  "trainer": {
+    "optimizer": {
+        "type": "adam",
+        "lr": 0.001
+    },
+    "validation_metric": "+accuracy",
+    "num_serialized_models_to_keep": 2,
+    "num_epochs": 75,
+    "grad_norm": 1.0,
+    "patience": 5,
+    "cuda_device": 0,
+    "learning_rate_scheduler": {
+      "type": "reduce_on_plateau",
+      "factor": 0.5,
+      "mode": "max",
+      "patience": 0
+    }
+  }
+}