diff --git a/examples/GooglesWord2Vec.ipynb b/examples/GooglesWord2Vec.ipynb
deleted file mode 100644
index 115c3b2..0000000
--- a/examples/GooglesWord2Vec.ipynb
+++ /dev/null
@@ -1,567 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Using TensorFlow backend.\n"
-     ]
-    }
-   ],
-   "source": [
-    "import collections\n",
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "import gensim\n",
-    "from keras.models import Sequential\n",
-    "from keras.layers import Dense\n",
-    "from keras.layers.convolutional import Convolution1D\n",
-    "from keras.layers.convolutional import MaxPooling1D\n",
-    "from keras.layers import Flatten\n",
-    "from keras.utils import np_utils\n",
-    "\n",
-    "from sklearn.preprocessing import LabelEncoder"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Set random seed (for reproducibility)\n",
-    "np.random.seed(1000)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Set includes 3000000 words\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "//anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:14: DeprecationWarning: Call to deprecated `wv` (Attribute will be removed in 4.0.0, use self instead).\n"
-     ]
-    }
-   ],
-   "source": [
-    "#Get word vectors using googles pretrained word2vec  \n",
-    "#takes a minute \n",
-    "google = gensim.models.KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin',binary=True)\n",
-    "\n",
-    "#includes some stop words (i.e. the, also, should, but not a, and, of)\n",
-    "#includes misspellings \n",
-    "#includes commony paired words (i.e. New_York)\n",
-    "\n",
-    "vocab = google.vocab.keys()\n",
-    "total_vocab = len(vocab)\n",
-    "print (\"Set includes\", total_vocab, \"words\")\n",
-    "\n",
-    "# Copy word vectors and delete Word2Vec model  and original corpus to save memory\n",
-    "X_vecs = google.wv\n",
-    "#del google #wait to explore model first "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "//anaconda/lib/python3.5/site-packages/ipykernel/__main__.py:2: DeprecationWarning: Call to deprecated `wv` (Attribute will be removed in 4.0.0, use self instead).\n",
-      "  from ipykernel import kernelapp as app\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "[('global_warming', 0.889603853225708),\n",
-       " ('Climate_Change', 0.7147639393806458),\n",
-       " ('Climate', 0.6953692436218262),\n",
-       " ('Global_warming', 0.661054253578186),\n",
-       " ('climate', 0.6569506525993347),\n",
-       " ('greenhouse_gas_emissions', 0.6449477076530457),\n",
-       " ('greenhouse_gases', 0.6432511806488037),\n",
-       " ('carbon_emissions', 0.6395047307014465),\n",
-       " ('Global_Warming', 0.6281516551971436),\n",
-       " ('reducing_carbon_emissions', 0.6227284669876099)]"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from gensim.models import KeyedVectors\n",
-    "google.wv.most_similar('climate_change')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "del google "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([-0.05419922,  0.01708984, -0.00527954,  0.33203125, -0.25      ,\n",
-       "       -0.01397705, -0.15039062, -0.265625  ,  0.01647949,  0.3828125 ,\n",
-       "       -0.03295898, -0.09716797, -0.16308594, -0.04443359,  0.00946045,\n",
-       "        0.18457031,  0.03637695,  0.16601562,  0.36328125, -0.25585938,\n",
-       "        0.375     ,  0.171875  ,  0.21386719, -0.19921875,  0.13085938,\n",
-       "       -0.07275391, -0.02819824,  0.11621094,  0.15332031,  0.09082031,\n",
-       "        0.06787109, -0.0300293 , -0.16894531, -0.20800781, -0.03710938,\n",
-       "       -0.22753906,  0.26367188,  0.012146  ,  0.18359375,  0.31054688,\n",
-       "       -0.10791016, -0.19140625,  0.21582031,  0.13183594, -0.03515625,\n",
-       "        0.18554688, -0.30859375,  0.04785156, -0.10986328,  0.14355469,\n",
-       "       -0.43554688, -0.0378418 ,  0.10839844,  0.140625  , -0.10595703,\n",
-       "        0.26171875, -0.17089844,  0.39453125,  0.12597656, -0.27734375,\n",
-       "       -0.28125   ,  0.14746094, -0.20996094,  0.02355957,  0.18457031,\n",
-       "        0.00445557, -0.27929688, -0.03637695, -0.29296875,  0.19628906,\n",
-       "        0.20703125,  0.2890625 , -0.20507812,  0.06787109, -0.43164062,\n",
-       "       -0.10986328, -0.2578125 , -0.02331543,  0.11328125,  0.23144531,\n",
-       "       -0.04418945,  0.10839844, -0.2890625 , -0.09521484, -0.10351562,\n",
-       "       -0.0324707 ,  0.07763672, -0.13378906,  0.22949219,  0.06298828,\n",
-       "        0.08349609,  0.02929688, -0.11474609,  0.00534058, -0.12988281,\n",
-       "        0.02514648,  0.08789062,  0.24511719, -0.11474609, -0.296875  ,\n",
-       "       -0.59375   , -0.29492188, -0.13378906,  0.27734375, -0.04174805,\n",
-       "        0.11621094,  0.28320312,  0.00241089,  0.13867188, -0.00683594,\n",
-       "       -0.30078125,  0.16210938,  0.01171875, -0.13867188,  0.48828125,\n",
-       "        0.02880859,  0.02416992,  0.04736328,  0.05859375, -0.23828125,\n",
-       "        0.02758789,  0.05981445, -0.03857422,  0.06933594,  0.14941406,\n",
-       "       -0.10888672, -0.07324219,  0.08789062,  0.27148438,  0.06591797,\n",
-       "       -0.37890625, -0.26171875, -0.13183594,  0.09570312, -0.3125    ,\n",
-       "        0.10205078,  0.03063965,  0.23632812,  0.00582886,  0.27734375,\n",
-       "        0.20507812, -0.17871094, -0.31445312, -0.01586914,  0.13964844,\n",
-       "        0.13574219,  0.0390625 , -0.29296875,  0.234375  , -0.33984375,\n",
-       "       -0.11816406,  0.10644531, -0.18457031, -0.02099609,  0.02563477,\n",
-       "        0.25390625,  0.07275391,  0.13574219, -0.00138092, -0.2578125 ,\n",
-       "       -0.2890625 ,  0.10107422,  0.19238281, -0.04882812,  0.27929688,\n",
-       "       -0.3359375 , -0.07373047,  0.01879883, -0.10986328, -0.04614258,\n",
-       "        0.15722656,  0.06689453, -0.03417969,  0.16308594,  0.08642578,\n",
-       "        0.44726562,  0.02026367, -0.01977539,  0.07958984,  0.17773438,\n",
-       "       -0.04370117, -0.00952148,  0.16503906,  0.17285156,  0.23144531,\n",
-       "       -0.04272461,  0.02355957,  0.18359375, -0.41601562, -0.01745605,\n",
-       "        0.16796875,  0.04736328,  0.14257812,  0.08496094,  0.33984375,\n",
-       "        0.1484375 , -0.34375   , -0.14160156, -0.06835938, -0.14648438,\n",
-       "       -0.02844238,  0.07421875, -0.07666016,  0.12695312,  0.05859375,\n",
-       "       -0.07568359, -0.03344727,  0.23632812, -0.16308594,  0.16503906,\n",
-       "        0.1484375 , -0.2421875 , -0.3515625 , -0.30664062,  0.00491333,\n",
-       "        0.17675781,  0.46289062,  0.14257812, -0.25      , -0.25976562,\n",
-       "        0.04370117,  0.34960938,  0.05957031,  0.07617188, -0.02868652,\n",
-       "       -0.09667969, -0.01281738,  0.05859375, -0.22949219, -0.1953125 ,\n",
-       "       -0.12207031,  0.20117188, -0.42382812,  0.06005859,  0.50390625,\n",
-       "        0.20898438,  0.11230469, -0.06054688,  0.33203125,  0.07421875,\n",
-       "       -0.05786133,  0.11083984, -0.06494141,  0.05639648,  0.01757812,\n",
-       "        0.08398438,  0.13769531,  0.2578125 ,  0.16796875, -0.16894531,\n",
-       "        0.01794434,  0.16015625,  0.26171875,  0.31640625, -0.24804688,\n",
-       "        0.05371094, -0.0859375 ,  0.17089844, -0.39453125, -0.00156403,\n",
-       "       -0.07324219, -0.04614258, -0.16210938, -0.15722656,  0.21289062,\n",
-       "       -0.15820312,  0.04394531,  0.28515625,  0.01196289, -0.26953125,\n",
-       "       -0.04370117,  0.37109375,  0.04663086, -0.19726562,  0.3046875 ,\n",
-       "       -0.36523438, -0.23632812,  0.08056641, -0.04248047, -0.14648438,\n",
-       "       -0.06225586, -0.0534668 , -0.05664062,  0.18945312,  0.37109375,\n",
-       "       -0.22070312,  0.04638672,  0.02612305, -0.11474609,  0.265625  ,\n",
-       "       -0.02453613,  0.11083984, -0.02514648, -0.12060547,  0.05297852,\n",
-       "        0.07128906,  0.00063705, -0.36523438, -0.13769531, -0.12890625], dtype=float32)"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "#Explore the vectors \n",
-    "X_vecs['hello'] #check vectors \n",
-    "#X_vecs['global warming'.split()] #check vectors \n",
-    "#X_vecs['global_warming'] # Includes common phrases "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Full dataset: 6090\n",
-      "dataset without NaN: 6087\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Load in data\n",
-    "# One hot encode sentiment \n",
-    "\n",
-    "data = pd.read_csv(\"../core/data/tweet_global_warming.csv\", encoding=\"latin\")\n",
-    "print(\"Full dataset: {}\".format(data.shape[0]))\n",
-    "data['existence'].fillna(value='ambiguous', inplace = True) #replace NA's in existence with \"ambiguous\"\n",
-    "data['existence'].replace(('Y', 'N'), ('Yes', 'No'), inplace=True) #rename so encoder doesnt get confused\n",
-    "data = data.dropna() #now drop NA values\n",
-    "print(\"dataset without NaN: {}\".format(data.shape[0]))\n",
-    "X = data.iloc[:,0]\n",
-    "Y = data.iloc[:,1]\n",
-    "#print(\"Number of unique words: {}\".format(len(np.unique(X)))) ##why is this wrong?? ##\n",
-    "\n",
-    "#one hot encoding = dummy vars from categorical var \n",
-    "#Create a one-hot encoded binary matrix \n",
-    "#N, Y, Ambig\n",
-    "#1, 0, 0 \n",
-    "#0, 1, 0\n",
-    "#0, 0, 1\n",
-    "\n",
-    "#encode class as integers \n",
-    "encoder = LabelEncoder()\n",
-    "encoder.fit(Y)\n",
-    "encoded_Y = encoder.transform(Y) \n",
-    "\n",
-    "#convert integers to one hot encoded\n",
-    "Y = np_utils.to_categorical(encoded_Y)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "########I think we want to look at gensim over nltk for this data########\n",
-    "###################### See 3 cells below! #######\n",
-    "from nltk.stem.lancaster import LancasterStemmer\n",
-    "from nltk.tokenize import RegexpTokenizer\n",
-    "\n",
-    "corpus = X\n",
-    "# Tokenize and stem\n",
-    "tkr = RegexpTokenizer('[a-zA-Z0-9@]+')\n",
-    "stemmer = LancasterStemmer()\n",
-    "\n",
-    "tokenized_corpus = []\n",
-    "\n",
-    "for i, tweet in enumerate(corpus):\n",
-    "    tokens = [stemmer.stem(t) for t in tkr.tokenize(tweet) if not t.startswith('@')]\n",
-    "    tokenized_corpus.append(tokens)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def read_data(data_file):\n",
-    "    for i, line in enumerate (data_file): \n",
-    "        # do some pre-processing and return a list of words for each review text\n",
-    "        yield gensim.utils.simple_preprocess (line)\n",
-    "\n",
-    "def build_dataset(vocab, n_words):\n",
-    "    \"\"\"Process the top n_words from raw inputs (vocab from read_data) into a dataset.\"\"\"\n",
-    "    count = [['UNK', -1]] #stores when word is found --> UNK = unknown \n",
-    "    count.extend(collections.Counter(vocab).most_common(n_words - 1))\n",
-    "    dictionary = dict()\n",
-    "    for word, _ in count:\n",
-    "        dictionary[word] = len(dictionary)\n",
-    "    token = list() \n",
-    "    unk_count = 0\n",
-    "    for word in vocab: #\n",
-    "        if word in dictionary:\n",
-    "            index = dictionary[word]\n",
-    "        else:\n",
-    "            index = 0  # dictionary['UNK'] assigned to 0 \n",
-    "            unk_count += 1\n",
-    "        token.append(index) #outputs a list of integers that represent words\n",
-    "    count[0][1] = unk_count\n",
-    "    reversed_dictionary = dict(zip(dictionary.values(), dictionary.keys())) #allows for word lookup by integer\n",
-    "    return token, count, dictionary, reversed_dictionary"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Number of unique words: 12117\n"
-     ]
-    }
-   ],
-   "source": [
-    "top_words = 20000 #use number higher than expected unique words\n",
-    "\n",
-    "tweet_vocab = list(read_data(data['tweet']))\n",
-    "flat_tweet_vocab = [item for sublist in tweet_vocab for item in sublist]\n",
-    "token, count, dictionary, reversed_dictionary = build_dataset(flat_tweet_vocab, top_words)\n",
-    "\n",
-    "print(\"Number of unique words: {}\".format(len(count))) #correct num of unique words "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "using ntlk to preprocess: ['govern', 'report', 'say', 'glob', 'warm', 'may', 'caus', 'cant', 'ment', 'il', 'cnsnews', 'com', 'link']\n",
-      "using gensim to preprocess: ['government', 'report', 'says', 'global', 'warming', 'may', 'cause', 'cancer', 'mental', 'illness', 'cnsnews', 'com', 'link']\n"
-     ]
-    }
-   ],
-   "source": [
-    "#Compare preprocessing methods \n",
-    "\n",
-    "#using ntlk \n",
-    "print ('using ntlk to preprocess:', tokenized_corpus[15])\n",
-    "\n",
-    "#using gensim simple preprocesser \n",
-    "print ('using gensim to preprocess:', tweet_vocab[15])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Create train and test sets\n",
-    "# Generate random indexes\n",
-    "\n",
-    "test_split = 0.8\n",
-    "train_size = int(len(X)*test_split)\n",
-    "test_size = len(X) - train_size\n",
-    "vector_size = 300\n",
-    "window_size = 10\n",
-    "max_tweet_length=512\n",
-    "\n",
-    "indexes = set(np.random.choice(len(tweet_vocab), train_size + test_size, replace=False))\n",
-    "\n",
-    "X_train = np.zeros((train_size, max_tweet_length, vector_size))\n",
-    "Y_train = np.zeros((train_size, 3), dtype=np.int32)\n",
-    "X_test = np.zeros((test_size, max_tweet_length, vector_size))\n",
-    "Y_test = np.zeros((test_size, 3), dtype=np.int32)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for i, index in enumerate(indexes):\n",
-    "    for t, token in enumerate(tweet_vocab[index]):\n",
-    "        if t >= max_tweet_length:\n",
-    "            break\n",
-    "        \n",
-    "        if token not in X_vecs:\n",
-    "            continue\n",
-    "    \n",
-    "        if i < train_size:\n",
-    "            X_train[i, t, :] = X_vecs[token]\n",
-    "        else:\n",
-    "            X_test[i - train_size, t, :] = X_vecs[token]\n",
-    "            \n",
-    "            \n",
-    "    if i < train_size:\n",
-    "        Y_train[i, :] = Y[index]\n",
-    "    else:\n",
-    "        Y_test[i - train_size, :] = Y[index]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[ 0.18652344 -0.14941406  0.05883789  0.12011719 -0.0279541  -0.1328125\n",
-      "  0.08837891 -0.203125    0.20410156  0.140625   -0.11328125 -0.08105469\n",
-      " -0.11328125 -0.07910156 -0.07519531  0.15625     0.10693359  0.20996094\n",
-      "  0.18554688 -0.06982422 -0.11230469 -0.06933594 -0.05932617 -0.11621094\n",
-      "  0.05859375 -0.02294922 -0.03417969  0.12597656  0.09570312  0.13378906\n",
-      " -0.10009766 -0.1328125  -0.12255859  0.09375    -0.11035156  0.00282288\n",
-      " -0.16113281 -0.08691406 -0.13671875  0.11230469  0.21972656 -0.16503906\n",
-      " -0.04711914 -0.06835938  0.06835938 -0.07128906 -0.00334167  0.05371094\n",
-      "  0.10644531  0.03637695  0.07177734 -0.14453125 -0.05883789 -0.02539062\n",
-      " -0.01708984  0.04443359  0.03833008 -0.05957031 -0.04736328  0.0088501\n",
-      " -0.01098633  0.11035156  0.0135498  -0.18359375  0.11181641  0.02648926\n",
-      " -0.06933594  0.06176758 -0.13964844 -0.05273438 -0.04248047 -0.07519531\n",
-      "  0.0703125   0.15332031 -0.125      -0.01489258 -0.02099609 -0.15820312\n",
-      " -0.0255127   0.02783203 -0.01403809 -0.22070312 -0.03295898  0.13378906\n",
-      " -0.22558594 -0.08154297 -0.08544922  0.05078125 -0.0559082  -0.15429688\n",
-      " -0.02563477 -0.04248047 -0.11035156 -0.05493164 -0.16503906 -0.04882812\n",
-      " -0.20703125  0.02966309 -0.0177002   0.08398438 -0.12011719 -0.09960938\n",
-      " -0.07714844  0.08056641 -0.13476562 -0.03149414 -0.13671875 -0.0859375\n",
-      "  0.02111816 -0.10839844  0.20117188  0.06347656 -0.01574707  0.10009766\n",
-      "  0.14648438  0.04370117 -0.05541992  0.15429688 -0.00848389  0.08984375\n",
-      " -0.13183594 -0.00836182 -0.11328125 -0.07226562 -0.20117188  0.06591797\n",
-      "  0.05078125 -0.0859375   0.14257812  0.10253906 -0.11181641  0.02905273\n",
-      " -0.11425781 -0.16601562 -0.05786133 -0.0612793  -0.14355469  0.00325012\n",
-      " -0.07275391 -0.20703125  0.08349609 -0.19433594  0.09423828  0.25390625\n",
-      " -0.00793457  0.15917969 -0.09375     0.07958984 -0.26757812  0.03955078\n",
-      "  0.01208496  0.11962891 -0.10595703  0.15429688 -0.00952148  0.10644531\n",
-      " -0.19433594 -0.05102539 -0.13085938 -0.10644531 -0.078125    0.10058594\n",
-      "  0.16894531  0.03540039 -0.17773438  0.02087402 -0.08056641 -0.13476562\n",
-      "  0.02880859  0.09619141 -0.14550781  0.01287842 -0.20019531  0.078125\n",
-      " -0.06884766 -0.11962891 -0.23339844 -0.20898438 -0.01733398  0.01068115\n",
-      " -0.08447266 -0.23242188 -0.04614258  0.1328125   0.07080078 -0.04614258\n",
-      " -0.078125    0.03344727  0.10693359  0.02355957  0.09814453 -0.13964844\n",
-      " -0.11816406  0.04345703 -0.203125   -0.02258301 -0.05004883  0.23242188\n",
-      " -0.22851562  0.01306152 -0.00982666  0.13476562 -0.08789062 -0.06103516\n",
-      "  0.11083984  0.22363281 -0.09814453 -0.11132812  0.09179688 -0.05859375\n",
-      "  0.04467773  0.0300293   0.06982422 -0.0035553  -0.08398438  0.125\n",
-      "  0.0612793   0.06689453 -0.09326172 -0.11767578  0.00221252  0.13769531\n",
-      "  0.04760742 -0.10058594  0.24902344  0.04589844  0.17285156  0.00723267\n",
-      "  0.05541992 -0.06103516 -0.03588867 -0.0859375   0.12109375  0.02758789\n",
-      " -0.00683594 -0.06176758 -0.16796875 -0.02160645  0.07226562  0.07226562\n",
-      "  0.02307129  0.10888672 -0.02661133 -0.11474609  0.09082031  0.11572266\n",
-      "  0.16015625  0.11523438 -0.00744629  0.02832031 -0.01635742 -0.09765625\n",
-      "  0.14648438  0.11181641  0.17089844  0.00653076  0.07568359  0.01055908\n",
-      " -0.0625     -0.14550781  0.07324219  0.01416016  0.05639648  0.05102539\n",
-      " -0.06103516  0.10449219  0.078125   -0.02893066  0.00488281  0.0255127\n",
-      "  0.18066406  0.12597656  0.07910156 -0.11669922  0.04760742 -0.12255859\n",
-      " -0.23046875  0.02282715  0.11523438 -0.10498047 -0.09716797  0.12988281\n",
-      "  0.0201416   0.21582031  0.08740234 -0.01708984 -0.12695312 -0.06787109\n",
-      " -0.04296875  0.07910156 -0.15625     0.04150391 -0.01409912  0.07470703\n",
-      " -0.03759766 -0.12304688 -0.1171875   0.05957031  0.06884766  0.04516602]\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(X_test[1][2])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "_________________________________________________________________\n",
-      "Layer (type)                 Output Shape              Param #   \n",
-      "=================================================================\n",
-      "conv1d_1 (Conv1D)            (None, 512, 32)           28832     \n",
-      "_________________________________________________________________\n",
-      "conv1d_2 (Conv1D)            (None, 512, 32)           3104      \n",
-      "_________________________________________________________________\n",
-      "max_pooling1d_1 (MaxPooling1 (None, 256, 32)           0         \n",
-      "_________________________________________________________________\n",
-      "flatten_1 (Flatten)          (None, 8192)              0         \n",
-      "_________________________________________________________________\n",
-      "dense_1 (Dense)              (None, 250)               2048250   \n",
-      "_________________________________________________________________\n",
-      "dense_2 (Dense)              (None, 3)                 753       \n",
-      "=================================================================\n",
-      "Total params: 2,080,939\n",
-      "Trainable params: 2,080,939\n",
-      "Non-trainable params: 0\n",
-      "_________________________________________________________________\n",
-      "None\n",
-      "Train on 4869 samples, validate on 1218 samples\n",
-      "Epoch 1/2\n",
-      "4869/4869 [==============================] - 52s 11ms/step - loss: 0.5755 - acc: 0.6901 - val_loss: 0.5582 - val_acc: 0.7094\n",
-      "Epoch 2/2\n",
-      "4869/4869 [==============================] - 41s 8ms/step - loss: 0.4659 - acc: 0.7754 - val_loss: 0.5275 - val_acc: 0.7356\n",
-      "Accuracy: 73.56%\n"
-     ]
-    }
-   ],
-   "source": [
-    "#Some variables \n",
-    "\n",
-    "top_words = 1000\n",
-    "max_words = 150\n",
-    "filters = 32 #filter = 1 x KERNEL \n",
-    "\n",
-    "# create the model \n",
-    "model = Sequential()\n",
-    "\n",
-    "model.add(Convolution1D(32, kernel_size=3, activation='elu', padding='same',\n",
-    "                 input_shape=(max_tweet_length, vector_size)))\n",
-    "\n",
-    "model.add(Convolution1D(filters=filters, kernel_size=3, padding='same', activation='relu'))\n",
-    "model.add(MaxPooling1D(pool_size=2))\n",
-    "model.add(Flatten())\n",
-    "model.add(Dense(250, activation='relu'))\n",
-    "model.add(Dense(3, activation='sigmoid')) \n",
-    "model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) \n",
-    "print(model.summary())\n",
-    "\n",
-    "# Fit the model\n",
-    "model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=2, batch_size=128,\n",
-    "    verbose=1)\n",
-    "\n",
-    "# Final evaluation of the model\n",
-    "scores = model.evaluate(X_test, Y_test, verbose=0)\n",
-    "print(\"Accuracy: %.2f%%\" % (scores[1]*100))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python [default]",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.5.2"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/examples/Sarah_Google_Word2Vec.ipynb b/examples/Sarah_Google_Word2Vec.ipynb
new file mode 100644
index 0000000..faa1fb9
--- /dev/null
+++ b/examples/Sarah_Google_Word2Vec.ipynb
@@ -0,0 +1,409 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import collections\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import gensim\n",
+    "from keras.models import Sequential\n",
+    "from keras.layers import Dense\n",
+    "from keras.layers.convolutional import Convolution1D\n",
+    "from keras.layers.convolutional import MaxPooling1D\n",
+    "from keras.layers import Flatten\n",
+    "from keras.utils import np_utils\n",
+    "\n",
+    "from sklearn.preprocessing import LabelEncoder"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Googles Word2Vec \n",
+    "\n",
+    "Shows how to use googles pretrained model as inputs to a CNN \n",
+    "\n",
+    "### First lets load in the pretrained model and do some data exploration"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Set random seed (for reproducibility)\n",
+    "np.random.seed(1000)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Set includes 3000000 words\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "//anaconda/envs/py36/lib/python3.6/site-packages/ipykernel_launcher.py:14: DeprecationWarning: Call to deprecated `wv` (Attribute will be removed in 4.0.0, use self instead).\n",
+      "  \n"
+     ]
+    }
+   ],
+   "source": [
+    "#Get word vectors using googles pretrained word2vec  \n",
+    "#takes a minute \n",
+    "google = gensim.models.KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin',binary=True)\n",
+    "\n",
+    "#includes some stop words (i.e. the, also, should, but not a, and, of)\n",
+    "#includes misspellings \n",
+    "#includes commony paired words (i.e. New_York)\n",
+    "\n",
+    "vocab = google.vocab.keys()\n",
+    "total_vocab = len(vocab)\n",
+    "print (\"Set includes\", total_vocab, \"words\")\n",
+    "\n",
+    "# Copy word vectors and delete Word2Vec model  and original corpus to save memory\n",
+    "X_vecs = google.wv\n",
+    "#del google #wait to explore model first "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "//anaconda/envs/py36/lib/python3.6/site-packages/ipykernel_launcher.py:2: DeprecationWarning: Call to deprecated `wv` (Attribute will be removed in 4.0.0, use self instead).\n",
+      "  \n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[('global_warming', 0.889603853225708),\n",
+       " ('Climate_Change', 0.7147639393806458),\n",
+       " ('Climate', 0.6953692436218262),\n",
+       " ('Global_warming', 0.661054253578186),\n",
+       " ('climate', 0.6569506525993347),\n",
+       " ('greenhouse_gas_emissions', 0.6449477076530457),\n",
+       " ('greenhouse_gases', 0.6432511806488037),\n",
+       " ('carbon_emissions', 0.6395047307014465),\n",
+       " ('Global_Warming', 0.6281516551971436),\n",
+       " ('reducing_carbon_emissions', 0.6227284669876099)]"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from gensim.models import KeyedVectors\n",
+    "google.wv.most_similar('climate_change')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "del google #save mem "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "300"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "#Explore the vectors \n",
+    "X_vecs['hello'].size #check vectors "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Now see how using pretrained vectors improves the model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Full dataset: 6090\n",
+      "dataset without NaN: 6087\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Load in data\n",
+    "# One hot encode sentiment \n",
+    "\n",
+    "data = pd.read_csv(\"../wyns/data/tweet_global_warming.csv\", encoding=\"latin\")\n",
+    "print(\"Full dataset: {}\".format(data.shape[0]))\n",
+    "data['existence'].fillna(value='ambiguous', inplace = True) #replace NA's in existence with \"ambiguous\"\n",
+    "data['existence'].replace(('Y', 'N'), ('Yes', 'No'), inplace=True) #rename so encoder doesnt get confused\n",
+    "data = data.dropna() #now drop NA values\n",
+    "print(\"dataset without NaN: {}\".format(data.shape[0]))\n",
+    "X = data.iloc[:,0]\n",
+    "Y = data.iloc[:,1]\n",
+    "\n",
+    "#one hot encoding = dummy vars from categorical var \n",
+    "#Create a one-hot encoded binary matrix \n",
+    "#N, Y, Ambig\n",
+    "#1, 0, 0 \n",
+    "#0, 1, 0\n",
+    "#0, 0, 1\n",
+    "\n",
+    "#encode class as integers \n",
+    "encoder = LabelEncoder()\n",
+    "encoder.fit(Y)\n",
+    "encoded_Y = encoder.transform(Y) \n",
+    "\n",
+    "#convert integers to one hot encoded\n",
+    "Y = np_utils.to_categorical(encoded_Y)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def read_data(data_file):\n",
+    "    for i, line in enumerate (data_file): \n",
+    "        yield gensim.utils.simple_preprocess (line)\n",
+    "\n",
+    "def build_dataset(vocab, n_words):\n",
+    "    \"\"\"Process the top n_words from raw inputs (vocab from read_data) into a dataset.\"\"\"\n",
+    "    count = [['UNK', -1]] #stores when word is found --> UNK = unknown \n",
+    "    count.extend(collections.Counter(vocab).most_common(n_words - 1))\n",
+    "    dictionary = dict()\n",
+    "    for word, _ in count:\n",
+    "        dictionary[word] = len(dictionary)\n",
+    "    token = list() \n",
+    "    unk_count = 0\n",
+    "    for word in vocab: #\n",
+    "        if word in dictionary:\n",
+    "            index = dictionary[word]\n",
+    "        else:\n",
+    "            index = 0  # dictionary['UNK'] assigned to 0 \n",
+    "            unk_count += 1\n",
+    "        token.append(index) #outputs a list of integers that represent words\n",
+    "    count[0][1] = unk_count\n",
+    "    reversed_dictionary = dict(zip(dictionary.values(), dictionary.keys())) #allows for word lookup by integer\n",
+    "    return token, count, dictionary, reversed_dictionary"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Number of unique words: 12117\n"
+     ]
+    }
+   ],
+   "source": [
+    "top_words = 20000 #use number higher than expected unique words\n",
+    "\n",
+    "tweet_vocab = list(read_data(data['tweet']))\n",
+    "flat_tweet_vocab = [item for sublist in tweet_vocab for item in sublist]\n",
+    "token, count, dictionary, reversed_dictionary = build_dataset(flat_tweet_vocab, top_words)\n",
+    "\n",
+    "print(\"Number of unique words: {}\".format(len(count))) #correct num of unique words "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create train and test sets\n",
+    "# Generate random indexes\n",
+    "\n",
+    "test_split = 0.8\n",
+    "train_size = int(len(X)*test_split)\n",
+    "test_size = len(X) - train_size\n",
+    "vector_size = 300\n",
+    "window_size = 10\n",
+    "max_tweet_length=512\n",
+    "\n",
+    "indexes = set(np.random.choice(len(tweet_vocab), train_size + test_size, replace=False))\n",
+    "\n",
+    "X_train = np.zeros((train_size, max_tweet_length, vector_size))\n",
+    "Y_train = np.zeros((train_size, 3), dtype=np.int32)\n",
+    "X_test = np.zeros((test_size, max_tweet_length, vector_size))\n",
+    "Y_test = np.zeros((test_size, 3), dtype=np.int32)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for i, index in enumerate(indexes):\n",
+    "    for t, token in enumerate(tweet_vocab[index]):\n",
+    "        if t >= max_tweet_length:\n",
+    "            break\n",
+    "        \n",
+    "        if token not in X_vecs:\n",
+    "            continue\n",
+    "    \n",
+    "        if i < train_size:\n",
+    "            X_train[i, t, :] = X_vecs[token]\n",
+    "        else:\n",
+    "            X_test[i - train_size, t, :] = X_vecs[token]\n",
+    "            \n",
+    "            \n",
+    "    if i < train_size:\n",
+    "        Y_train[i, :] = Y[index]\n",
+    "    else:\n",
+    "        Y_test[i - train_size, :] = Y[index]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Lets look at how our model performs now! "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "_________________________________________________________________\n",
+      "Layer (type)                 Output Shape              Param #   \n",
+      "=================================================================\n",
+      "conv1d_1 (Conv1D)            (None, 512, 32)           28832     \n",
+      "_________________________________________________________________\n",
+      "conv1d_2 (Conv1D)            (None, 512, 32)           3104      \n",
+      "_________________________________________________________________\n",
+      "max_pooling1d_1 (MaxPooling1 (None, 256, 32)           0         \n",
+      "_________________________________________________________________\n",
+      "flatten_1 (Flatten)          (None, 8192)              0         \n",
+      "_________________________________________________________________\n",
+      "dense_1 (Dense)              (None, 250)               2048250   \n",
+      "_________________________________________________________________\n",
+      "dense_2 (Dense)              (None, 3)                 753       \n",
+      "=================================================================\n",
+      "Total params: 2,080,939\n",
+      "Trainable params: 2,080,939\n",
+      "Non-trainable params: 0\n",
+      "_________________________________________________________________\n",
+      "None\n",
+      "Train on 4869 samples, validate on 1218 samples\n",
+      "Epoch 1/2\n",
+      "4869/4869 [==============================] - 41s 8ms/step - loss: 0.5755 - acc: 0.6904 - val_loss: 0.5578 - val_acc: 0.7124\n",
+      "Epoch 2/2\n",
+      "4869/4869 [==============================] - 41s 8ms/step - loss: 0.4663 - acc: 0.7755 - val_loss: 0.5273 - val_acc: 0.7332\n",
+      "Accuracy: 73.32%\n"
+     ]
+    }
+   ],
+   "source": [
+    "#Some variables \n",
+    "\n",
+    "top_words = 1000\n",
+    "max_words = 150\n",
+    "filters = 32 #filter = 1 x KERNEL \n",
+    "\n",
+    "# create the model \n",
+    "model = Sequential()\n",
+    "\n",
+    "model.add(Convolution1D(32, kernel_size=3, activation='elu', padding='same',\n",
+    "                 input_shape=(max_tweet_length, vector_size)))\n",
+    "\n",
+    "model.add(Convolution1D(filters=filters, kernel_size=3, padding='same', activation='relu'))\n",
+    "model.add(MaxPooling1D(pool_size=2))\n",
+    "model.add(Flatten())\n",
+    "model.add(Dense(250, activation='relu'))\n",
+    "model.add(Dense(3, activation='sigmoid')) \n",
+    "model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) \n",
+    "print(model.summary())\n",
+    "\n",
+    "# Fit the model\n",
+    "model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=2, batch_size=128,\n",
+    "    verbose=1)\n",
+    "\n",
+    "# Final evaluation of the model\n",
+    "scores = model.evaluate(X_test, Y_test, verbose=0)\n",
+    "print(\"Accuracy: %.2f%%\" % (scores[1]*100))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/Sarah_Tweepy.ipynb b/examples/Sarah_Tweepy.ipynb
new file mode 100644
index 0000000..b461407
--- /dev/null
+++ b/examples/Sarah_Tweepy.ipynb
@@ -0,0 +1,477 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Tweepy for getting tweets quickly \n",
+    "\n",
+    "Read the docs: http://tweepy.readthedocs.io/en/v3.5.0/\n",
+    "\n",
+    "(couldn't get python-twitter to run on my desktop) \n",
+    "\n",
+    "Code below taken from: https://www.karambelkar.info/2015/01/how-to-use-twitters-search-rest-api-most-effectively./ (great read)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import tweepy\n",
+    "\n",
+    "# consumer key, consumer secret, access token, access secret. Unique to each person. Read Wes' API notebook for more\n",
+    "API_KEY = 'IPbYoAbOUR1URWvXWeNwQNnZD'\n",
+    "API_SECRET = 'goN7XnztVpn6CgkEAAxU9GOVSwbUYwjuFC0ChXdxjWBhRrYZcj'\n",
+    "access_token = '506759494-rt09qdTZGlGH8WkBDd5M8Vgr6eGbZtlxQVaEH7hA'\n",
+    "access_token_secret = 'k6tPQuDCnqIf25Ethn6mtZ4pTAoncEufAIy8EVujP2JF2'\n",
+    "\n",
+    "auth = tweepy.AppAuthHandler(API_KEY, API_SECRET)\n",
+    "\n",
+    "api = tweepy.API(auth, wait_on_rate_limit=True,\n",
+    "                 wait_on_rate_limit_notify=True)\n",
+    "\n",
+    "if (not api):\n",
+    "    print (\"Can't Authenticate\")\n",
+    "    sys.exit(-1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import sys\n",
+    "import jsonpickle\n",
+    "import os\n",
+    "import json\n",
+    "\n",
+    "searchQuery = 'climate change'  # this is what we're searching for\n",
+    "maxTweets = 2000 # Some arbitrary large number (The while loop will run until Twitter times you out (~15min))\n",
+    "tweetsPerQry = 100  # max the API permits\n",
+    "fName = 'tweets.txt' # Stores tweets in text as well as a json file \n",
+    "\n",
+    "#below basically prevents pulling duplicate tweets (I think)\n",
+    "\n",
+    "# If results from a specific ID onwards are reqd, set since_id to that ID.\n",
+    "# else default to no lower limit, go as far back as API allows\n",
+    "sinceId = None\n",
+    "\n",
+    "# If results only below a specific ID are, set max_id to that ID.\n",
+    "# else default to no upper limit, start from the most recent tweet matching the search query.\n",
+    "max_id = -1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading max 2000 tweets\n",
+      "Downloaded 100 tweets\n",
+      "Downloaded 200 tweets\n",
+      "Downloaded 300 tweets\n",
+      "Downloaded 400 tweets\n",
+      "Downloaded 500 tweets\n",
+      "Downloaded 600 tweets\n",
+      "Downloaded 700 tweets\n",
+      "Downloaded 800 tweets\n",
+      "Downloaded 900 tweets\n",
+      "Downloaded 1000 tweets\n",
+      "Downloaded 1087 tweets\n",
+      "Downloaded 1177 tweets\n",
+      "Downloaded 1277 tweets\n",
+      "Downloaded 1377 tweets\n",
+      "Downloaded 1474 tweets\n",
+      "Downloaded 1574 tweets\n",
+      "Downloaded 1674 tweets\n",
+      "Downloaded 1769 tweets\n",
+      "Downloaded 1862 tweets\n",
+      "Downloaded 1962 tweets\n",
+      "Downloaded 2062 tweets\n",
+      "Downloaded 2062 tweets, Saved to tweets.txt\n"
+     ]
+    }
+   ],
+   "source": [
+    "tweetCount = 0\n",
+    "print(\"Downloading max {0} tweets\".format(maxTweets))\n",
+    "with open(fName, 'w') as f:\n",
+    "    while tweetCount < maxTweets:\n",
+    "        try:\n",
+    "            if (max_id <= 0):\n",
+    "                if (not sinceId):\n",
+    "                    new_tweets = api.search(q=searchQuery, count=tweetsPerQry)\n",
+    "                else:\n",
+    "                    new_tweets = api.search(q=searchQuery, count=tweetsPerQry,\n",
+    "                                            since_id=sinceId)\n",
+    "            else:\n",
+    "                if (not sinceId):\n",
+    "                    new_tweets = api.search(q=searchQuery, count=tweetsPerQry,\n",
+    "                                            max_id=str(max_id - 1))\n",
+    "                else:\n",
+    "                    new_tweets = api.search(q=searchQuery, count=tweetsPerQry,\n",
+    "                                            max_id=str(max_id - 1),\n",
+    "                                            since_id=sinceId)\n",
+    "            if not new_tweets:\n",
+    "                print(\"No more tweets found\")\n",
+    "                break\n",
+    "            for tweet in new_tweets:\n",
+    "                f.write(jsonpickle.encode(tweet._json, unpicklable=False) +\n",
+    "                        '\\n')\n",
+    "            tweetCount += len(new_tweets)\n",
+    "            print(\"Downloaded {0} tweets\".format(tweetCount))\n",
+    "            max_id = new_tweets[-1].id\n",
+    "        except tweepy.TweepError as e:\n",
+    "            # Just exit if any error\n",
+    "            print(\"some error : \" + str(e))\n",
+    "            break\n",
+    "\n",
+    "print (\"Downloaded {0} tweets, Saved to {1}\".format(tweetCount, fName))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "df = pd.read_json('tweets.txt',  lines=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "18 hits have location data in 'place'\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "[{'attributes': {},\n",
+       "  'bounding_box': {'coordinates': [[[-88.1236583, 42.385769],\n",
+       "     [-88.051803, 42.385769],\n",
+       "     [-88.051803, 42.4480902],\n",
+       "     [-88.1236583, 42.4480902]]],\n",
+       "   'type': 'Polygon'},\n",
+       "  'contained_within': [],\n",
+       "  'country': 'United States',\n",
+       "  'country_code': 'US',\n",
+       "  'full_name': 'Lake Villa, IL',\n",
+       "  'id': '83d2443c72c001a9',\n",
+       "  'name': 'Lake Villa',\n",
+       "  'place_type': 'city',\n",
+       "  'url': 'https://api.twitter.com/1.1/geo/id/83d2443c72c001a9.json'},\n",
+       " {'attributes': {},\n",
+       "  'bounding_box': {'coordinates': [[[-79.9327940509939, 40.59852214703781],\n",
+       "     [-79.9327940509939, 40.59852214703781],\n",
+       "     [-79.9327940509939, 40.59852214703781],\n",
+       "     [-79.9327940509939, 40.59852214703781]]],\n",
+       "   'type': 'Polygon'},\n",
+       "  'contained_within': [],\n",
+       "  'country': 'United States',\n",
+       "  'country_code': 'US',\n",
+       "  'full_name': 'Allison Park, PA',\n",
+       "  'id': '07d9f81d4f885000',\n",
+       "  'name': 'Allison Park, PA',\n",
+       "  'place_type': 'poi',\n",
+       "  'url': 'https://api.twitter.com/1.1/geo/id/07d9f81d4f885000.json'},\n",
+       " {'attributes': {},\n",
+       "  'bounding_box': {'coordinates': [[[105.290653, 20.564095000000002],\n",
+       "     [106.020197, 20.564095000000002],\n",
+       "     [106.020197, 21.385226],\n",
+       "     [105.290653, 21.385226]]],\n",
+       "   'type': 'Polygon'},\n",
+       "  'contained_within': [],\n",
+       "  'country': 'Vietnam',\n",
+       "  'country_code': 'VN',\n",
+       "  'full_name': 'Hanoi, Vietnam',\n",
+       "  'id': '0192af60292eace8',\n",
+       "  'name': 'Hanoi',\n",
+       "  'place_type': 'admin',\n",
+       "  'url': 'https://api.twitter.com/1.1/geo/id/0192af60292eace8.json'},\n",
+       " {'attributes': {},\n",
+       "  'bounding_box': {'coordinates': [[[105.290653, 20.564095000000002],\n",
+       "     [106.020197, 20.564095000000002],\n",
+       "     [106.020197, 21.385226],\n",
+       "     [105.290653, 21.385226]]],\n",
+       "   'type': 'Polygon'},\n",
+       "  'contained_within': [],\n",
+       "  'country': 'Vietnam',\n",
+       "  'country_code': 'VN',\n",
+       "  'full_name': 'Hanoi, Vietnam',\n",
+       "  'id': '0192af60292eace8',\n",
+       "  'name': 'Hanoi',\n",
+       "  'place_type': 'admin',\n",
+       "  'url': 'https://api.twitter.com/1.1/geo/id/0192af60292eace8.json'},\n",
+       " {'attributes': {},\n",
+       "  'bounding_box': {'coordinates': [[[-124.482003, 32.528832],\n",
+       "     [-114.131212, 32.528832],\n",
+       "     [-114.131212, 42.009519],\n",
+       "     [-124.482003, 42.009519]]],\n",
+       "   'type': 'Polygon'},\n",
+       "  'contained_within': [],\n",
+       "  'country': 'United States',\n",
+       "  'country_code': 'US',\n",
+       "  'full_name': 'California, USA',\n",
+       "  'id': 'fbd6d2f5a4e4a15e',\n",
+       "  'name': 'California',\n",
+       "  'place_type': 'admin',\n",
+       "  'url': 'https://api.twitter.com/1.1/geo/id/fbd6d2f5a4e4a15e.json'},\n",
+       " {'attributes': {},\n",
+       "  'bounding_box': {'coordinates': [[[3.9825499, 50.8993057],\n",
+       "     [4.164221, 50.8993057],\n",
+       "     [4.164221, 50.9948827],\n",
+       "     [3.9825499, 50.9948827]]],\n",
+       "   'type': 'Polygon'},\n",
+       "  'contained_within': [],\n",
+       "  'country': 'Belgium',\n",
+       "  'country_code': 'BE',\n",
+       "  'full_name': 'Aalst, België',\n",
+       "  'id': 'b0500318040c21d2',\n",
+       "  'name': 'Aalst',\n",
+       "  'place_type': 'city',\n",
+       "  'url': 'https://api.twitter.com/1.1/geo/id/b0500318040c21d2.json'},\n",
+       " {'attributes': {},\n",
+       "  'bounding_box': {'coordinates': [[[-85.644548, 38.072047],\n",
+       "     [-85.520944, 38.072047],\n",
+       "     [-85.520944, 38.143229],\n",
+       "     [-85.644548, 38.143229]]],\n",
+       "   'type': 'Polygon'},\n",
+       "  'contained_within': [],\n",
+       "  'country': 'United States',\n",
+       "  'country_code': 'US',\n",
+       "  'full_name': 'Heritage Creek, KY',\n",
+       "  'id': '0157e4d7264811f5',\n",
+       "  'name': 'Heritage Creek',\n",
+       "  'place_type': 'city',\n",
+       "  'url': 'https://api.twitter.com/1.1/geo/id/0157e4d7264811f5.json'},\n",
+       " {'attributes': {},\n",
+       "  'bounding_box': {'coordinates': [[[-77.119401, 38.801826],\n",
+       "     [-76.909396, 38.801826],\n",
+       "     [-76.909396, 38.9953797],\n",
+       "     [-77.119401, 38.9953797]]],\n",
+       "   'type': 'Polygon'},\n",
+       "  'contained_within': [],\n",
+       "  'country': 'United States',\n",
+       "  'country_code': 'US',\n",
+       "  'full_name': 'Washington, DC',\n",
+       "  'id': '01fbe706f872cb32',\n",
+       "  'name': 'Washington',\n",
+       "  'place_type': 'city',\n",
+       "  'url': 'https://api.twitter.com/1.1/geo/id/01fbe706f872cb32.json'},\n",
+       " {'attributes': {},\n",
+       "  'bounding_box': {'coordinates': [[[-81.7297293, 28.819687],\n",
+       "     [-81.6317899, 28.819687],\n",
+       "     [-81.6317899, 28.8919998],\n",
+       "     [-81.7297293, 28.8919998]]],\n",
+       "   'type': 'Polygon'},\n",
+       "  'contained_within': [],\n",
+       "  'country': 'United States',\n",
+       "  'country_code': 'US',\n",
+       "  'full_name': 'Eustis, FL',\n",
+       "  'id': 'e1ad088723f72290',\n",
+       "  'name': 'Eustis',\n",
+       "  'place_type': 'city',\n",
+       "  'url': 'https://api.twitter.com/1.1/geo/id/e1ad088723f72290.json'},\n",
+       " {'attributes': {},\n",
+       "  'bounding_box': {'coordinates': [[[-79.429128, 43.798004],\n",
+       "     [-79.170217, 43.798004],\n",
+       "     [-79.170217, 43.963385],\n",
+       "     [-79.429128, 43.963385]]],\n",
+       "   'type': 'Polygon'},\n",
+       "  'contained_within': [],\n",
+       "  'country': 'Canada',\n",
+       "  'country_code': 'CA',\n",
+       "  'full_name': 'Markham, Ontario',\n",
+       "  'id': '7d2673c3623fe492',\n",
+       "  'name': 'Markham',\n",
+       "  'place_type': 'city',\n",
+       "  'url': 'https://api.twitter.com/1.1/geo/id/7d2673c3623fe492.json'},\n",
+       " {'attributes': {},\n",
+       "  'bounding_box': {'coordinates': [[[29.0788593, -25.9971906],\n",
+       "     [29.3804417, -25.9971906],\n",
+       "     [29.3804417, -25.7928753],\n",
+       "     [29.0788593, -25.7928753]]],\n",
+       "   'type': 'Polygon'},\n",
+       "  'contained_within': [],\n",
+       "  'country': 'South Africa',\n",
+       "  'country_code': 'ZA',\n",
+       "  'full_name': 'eMalahleni, South Africa',\n",
+       "  'id': '27b2c17b988dcc52',\n",
+       "  'name': 'eMalahleni',\n",
+       "  'place_type': 'city',\n",
+       "  'url': 'https://api.twitter.com/1.1/geo/id/27b2c17b988dcc52.json'},\n",
+       " {'attributes': {},\n",
+       "  'bounding_box': {'coordinates': [[[-77.053632691935, 38.922719893926285],\n",
+       "     [-77.053632691935, 38.922719893926285],\n",
+       "     [-77.053632691935, 38.922719893926285],\n",
+       "     [-77.053632691935, 38.922719893926285]]],\n",
+       "   'type': 'Polygon'},\n",
+       "  'contained_within': [],\n",
+       "  'country': 'United States',\n",
+       "  'country_code': 'US',\n",
+       "  'full_name': 'Omni Shoreham Hotel',\n",
+       "  'id': '07d9db1114c80001',\n",
+       "  'name': 'Omni Shoreham Hotel',\n",
+       "  'place_type': 'poi',\n",
+       "  'url': 'https://api.twitter.com/1.1/geo/id/07d9db1114c80001.json'},\n",
+       " {'attributes': {},\n",
+       "  'bounding_box': {'coordinates': [[[-90.4181075, 41.696088],\n",
+       "     [-82.122971, 41.696088],\n",
+       "     [-82.122971, 48.306272],\n",
+       "     [-90.4181075, 48.306272]]],\n",
+       "   'type': 'Polygon'},\n",
+       "  'contained_within': [],\n",
+       "  'country': 'United States',\n",
+       "  'country_code': 'US',\n",
+       "  'full_name': 'Michigan, USA',\n",
+       "  'id': '67d92742f1ebf307',\n",
+       "  'name': 'Michigan',\n",
+       "  'place_type': 'admin',\n",
+       "  'url': 'https://api.twitter.com/1.1/geo/id/67d92742f1ebf307.json'},\n",
+       " {'attributes': {},\n",
+       "  'bounding_box': {'coordinates': [[[103.749959507073, 1.2123138339349],\n",
+       "     [103.918426999964, 1.2123138339349],\n",
+       "     [103.918426999964, 1.3687449990256901],\n",
+       "     [103.749959507073, 1.3687449990256901]]],\n",
+       "   'type': 'Polygon'},\n",
+       "  'contained_within': [],\n",
+       "  'country': 'Singapore',\n",
+       "  'country_code': 'SG',\n",
+       "  'full_name': 'Central Region, Singapore',\n",
+       "  'id': '58a4c3a0d54e1400',\n",
+       "  'name': 'Central Region',\n",
+       "  'place_type': 'admin',\n",
+       "  'url': 'https://api.twitter.com/1.1/geo/id/58a4c3a0d54e1400.json'},\n",
+       " {'attributes': {},\n",
+       "  'bounding_box': {'coordinates': [[[-80.20811, 26.080935],\n",
+       "     [-80.0902351, 26.080935],\n",
+       "     [-80.0902351, 26.219801],\n",
+       "     [-80.20811, 26.219801]]],\n",
+       "   'type': 'Polygon'},\n",
+       "  'contained_within': [],\n",
+       "  'country': 'United States',\n",
+       "  'country_code': 'US',\n",
+       "  'full_name': 'Fort Lauderdale, FL',\n",
+       "  'id': '6c686af766d8429c',\n",
+       "  'name': 'Fort Lauderdale',\n",
+       "  'place_type': 'city',\n",
+       "  'url': 'https://api.twitter.com/1.1/geo/id/6c686af766d8429c.json'},\n",
+       " {'attributes': {},\n",
+       "  'bounding_box': {'coordinates': [[[-87.095676, 34.507116],\n",
+       "     [-86.925426, 34.507116],\n",
+       "     [-86.925426, 34.654734],\n",
+       "     [-87.095676, 34.654734]]],\n",
+       "   'type': 'Polygon'},\n",
+       "  'contained_within': [],\n",
+       "  'country': 'United States',\n",
+       "  'country_code': 'US',\n",
+       "  'full_name': 'Decatur, AL',\n",
+       "  'id': '246fb652d518385d',\n",
+       "  'name': 'Decatur',\n",
+       "  'place_type': 'city',\n",
+       "  'url': 'https://api.twitter.com/1.1/geo/id/246fb652d518385d.json'},\n",
+       " {'attributes': {},\n",
+       "  'bounding_box': {'coordinates': [[[-119.5937583, 49.773384],\n",
+       "     [-119.319893, 49.773384],\n",
+       "     [-119.319893, 50.024913],\n",
+       "     [-119.5937583, 50.024913]]],\n",
+       "   'type': 'Polygon'},\n",
+       "  'contained_within': [],\n",
+       "  'country': 'Canada',\n",
+       "  'country_code': 'CA',\n",
+       "  'full_name': 'Kelowna, British Columbia',\n",
+       "  'id': '484de3636fa22d62',\n",
+       "  'name': 'Kelowna',\n",
+       "  'place_type': 'city',\n",
+       "  'url': 'https://api.twitter.com/1.1/geo/id/484de3636fa22d62.json'},\n",
+       " {'attributes': {},\n",
+       "  'bounding_box': {'coordinates': [[[-109.282237, 41.5595061],\n",
+       "     [-109.192506, 41.5595061],\n",
+       "     [-109.192506, 41.633358],\n",
+       "     [-109.282237, 41.633358]]],\n",
+       "   'type': 'Polygon'},\n",
+       "  'contained_within': [],\n",
+       "  'country': 'United States',\n",
+       "  'country_code': 'US',\n",
+       "  'full_name': 'Rock Springs, WY',\n",
+       "  'id': '5db2aef731e97df0',\n",
+       "  'name': 'Rock Springs',\n",
+       "  'place_type': 'city',\n",
+       "  'url': 'https://api.twitter.com/1.1/geo/id/5db2aef731e97df0.json'}]"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "place = df[\"place\"]  # Simple preprocess \n",
+    "place = place.dropna()\n",
+    "print (len(list(place)), \"hits have location data in 'place'\")\n",
+    "list(place)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/wyns/get_tweets.py b/wyns/get_tweets.py
new file mode 100644
index 0000000..5a66fd8
--- /dev/null
+++ b/wyns/get_tweets.py
@@ -0,0 +1,69 @@
+import json
+import jsonpickle
+import os
+import sys
+import tweepy
+
+#Use to get tweets in a way that bypasses twitters weird rules#
+#Should be able to run on a build node on hyak - havent tested 
+
+# consumer key, consumer secret, access token, access secret. Unique to each person. Read Wes' API notebook for more
+API_KEY = 'IPbYoAbOUR1URWvXWeNwQNnZD'
+API_SECRET = 'goN7XnztVpn6CgkEAAxU9GOVSwbUYwjuFC0ChXdxjWBhRrYZcj'
+access_token = '506759494-rt09qdTZGlGH8WkBDd5M8Vgr6eGbZtlxQVaEH7hA'
+access_token_secret = 'k6tPQuDCnqIf25Ethn6mtZ4pTAoncEufAIy8EVujP2JF2'
+
+auth = tweepy.AppAuthHandler(API_KEY, API_SECRET)
+api = tweepy.API(auth, wait_on_rate_limit=True,
+                 wait_on_rate_limit_notify=True)
+
+if (not api):
+    print ("Can't Authenticate")
+    sys.exit(-1)
+
+searchQuery = 'climate change'
+maxTweets = 2000 # Some arbitrary large number (Will run until twitter API times you out)
+tweetsPerQry = 100  # max the API permits per query 
+fName = 'tweets.txt' # Stores tweets in text as well as a json file 
+
+#below basically prevents pulling duplicate tweets (I think)
+sinceId = None
+max_id = -1
+
+tweetCount = 0
+print("Downloading max {0} tweets".format(maxTweets))
+
+with open(fName, 'w') as f:
+    while tweetCount < maxTweets:
+        try:
+            if (max_id <= 0):
+                if (not sinceId):
+                    new_tweets = api.search(q=searchQuery, count=tweetsPerQry)
+                else:
+                    new_tweets = api.search(q=searchQuery, count=tweetsPerQry,
+                                            since_id=sinceId)
+            else:
+                if (not sinceId):
+                    new_tweets = api.search(q=searchQuery, count=tweetsPerQry,
+                                            max_id=str(max_id - 1))
+                else:
+                    new_tweets = api.search(q=searchQuery, count=tweetsPerQry,
+                                            max_id=str(max_id - 1),
+                                            since_id=sinceId)
+            if not new_tweets:
+                print("No more tweets found")
+                break
+            for tweet in new_tweets:
+                f.write(jsonpickle.encode(tweet._json, unpicklable=False) +
+                        '\n')
+            tweetCount += len(new_tweets)
+            print("Downloaded {0} tweets".format(tweetCount))
+            max_id = new_tweets[-1].id
+        except tweepy.TweepError as e:
+            # Just exit if any error
+            print("some error : " + str(e))
+            break
+
+print ("Downloaded {0} tweets, Saved to {1}".format(tweetCount, fName))
+
+