From 1daa587abe6957c127d0b29e8c93b6ae98127735 Mon Sep 17 00:00:00 2001
From: Peter Organisciak <organisciak@gmail.com>
Date: Tue, 16 May 2023 17:46:06 -0600
Subject: [PATCH] Allow for an alternative correct answer

---
 labs/08-lab-regular-expressions.ipynb | 1979 ++++++++++++-------------
 1 file changed, 975 insertions(+), 1004 deletions(-)
diff --git a/labs/08-lab-regular-expressions.ipynb b/labs/08-lab-regular-expressions.ipynb
index 4c9851b..f498e11 100644
--- a/labs/08-lab-regular-expressions.ipynb
+++ b/labs/08-lab-regular-expressions.ipynb
@@ -1,1010 +1,981 @@
 {
- "cells": [
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {
-    "colab_type": "text",
-    "id": "view-in-github"
-   },
-   "source": [
-    "<a href=\"https://colab.research.google.com/github/organisciak/Scripting-Course/blob/master/labs/08-lab-regular-expressions.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "Zuf8EoDzwEem"
-   },
-   "source": [
-    "# Week 8 Lab: Regular Expressions"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {
-    "id": "UquVNqG4wEeo"
-   },
-   "outputs": [],
-   "source": [
-    "import re\n",
-    "import pandas as pd"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "OXHoXToPwEer"
-   },
-   "source": [
-    "Here is a synopsis of the best film of our time, *Paddington*:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 69
-    },
-    "id": "PhMVBwhpwEes",
-    "outputId": "ebb331af-4ec3-4039-a2af-4fc4bbc37802"
-   },
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.google.colaboratory.intrinsic+json": {
-       "type": "string"
-      },
-      "text/plain": [
-       "'After a deadly earthquake destroys his home in Peruvian rainforest, a young bear (Ben Whishaw) makes his way to England in search of a new home. The bear, dubbed \"Paddington\" for the london train station, finds shelter with the family of Henry (Hugh Bonneville) and Mary Brown (Sally Hawkins). Although Paddington\\'s amazement at urban living soon endears him to the Browns, someone else has her eye on him: Taxidermist Millicent Clyde (Nicole Kidman) has designs on the rare bear and his hide.'"
-      ]
-     },
-     "execution_count": 1,
-     "metadata": {
-      "tags": []
-     },
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "synopsis = '''After a deadly earthquake destroys his home in the Peruvian rainforest, a young bear (Ben Whishaw) makes his way to England in search of a new home. The bear, dubbed \"Paddington\" for the london train station, finds shelter with the family of Henry (Hugh Bonneville) and Mary Brown (Sally Hawkins). Although Paddington's amazement at urban living soon endears him to the Browns, someone else has her eye on him: Taxidermist Millicent Clyde (Nicole Kidman) has designs on the rare bear and his hide.'''\n",
-    "synopsis"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "mzj3T_AUwEez"
-   },
-   "source": [
-    "For these questions, use `re.findall(pattern, text)`, like so:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 34
-    },
-    "id": "J6oZ8HVcwEe0",
-    "outputId": "77adfe21-3c6f-4443-d744-0b3a75b5daf1"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['After ', 'Henry ', 'Brown ', 'Sally ', 'Clyde ']"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {
-      "tags": []
-     },
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "re.findall('[A-Z]....\\s', synopsis)"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "For many of the questions, I'll ask you for the `pattern` that goes into that search."
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "IqAdp-3iwEe5"
-   },
-   "source": [
-    "### Example Question\n",
-    "\n",
-    "- Q0.0: Write the regular expression to find all three or four-letter words:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {
-    "cellView": "form",
-    "colab": {
-     "base_uri": "https://localhost:8080/"
-    },
-    "id": "Ub9b8TUEwEe6",
-    "outputId": "a915df70-cf14-497f-db87-3e4334eff5b2"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[' his ',\n",
-       " ' bear ',\n",
-       " ' his ',\n",
-       " ' new ',\n",
-       " ' The ',\n",
-       " ' for ',\n",
-       " ' with ',\n",
-       " ' and ',\n",
-       " ' soon ',\n",
-       " ' him ',\n",
-       " ' the ',\n",
-       " ' else ',\n",
-       " ' her ',\n",
-       " ' has ',\n",
-       " ' the ',\n",
-       " ' bear ',\n",
-       " ' his ']"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {
-      "tags": []
-     },
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "#@markdown Run this cell to see if it works\n",
-    "q0_answer = '\\s\\w\\w\\w\\w?\\s'    #@param {type:'string'}\n",
-    "re.findall(q0_answer, synopsis)"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "dVtGDijDwEfE"
-   },
-   "source": [
-    "## Questions #1: Matching Characters (20 pts)\n",
-    "\n",
-    "- Q1a: Write a regular expression to match all three-character words surrounded by spaces. You need to change the answer in the form field and run it to see if it works. (5pts)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "cellView": "form",
-    "id": "ZIIqEYxXwEfJ",
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "#@markdown *Run this cell to see if it works*\n",
-    "q1a_answer = ''    #@param {type:'string'}\n",
-    "re.findall(q1a_answer, synopsis) # This runs your pattern"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "esCPsEnjwEfM"
-   },
-   "source": [
-    "- Q1b: Write a regular expression to match all three-character words, allowing for instances like `(Ben` and `him:`.  (5pts)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "cellView": "form",
-    "id": "qt8_JQnxwEfM"
-   },
-   "outputs": [],
-   "source": [
-    "#@markdown *Run this cell to see if it works*\n",
-    "q1b_answer = ''    #@param {type:'string'}\n",
-    "re.findall(q1b_answer, synopsis)"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "AuFhWEX-wEfQ"
-   },
-   "source": [
-    "- Q1c: Write the regular expression to determine how many times the words `the` or `The` show up.  (5pts)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "cellView": "form",
-    "id": "R5LxSp3ewEfQ"
-   },
-   "outputs": [],
-   "source": [
-    "q1c_answer = ''  #@param {type:'string'}\n",
-    "matches = re.findall(q1c_answer, synopsis)\n",
-    "print(matches)\n",
-    "print(\"Count\", len(matches))"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "KSxzA382wEfT"
-   },
-   "source": [
-    "- Q1d: Write a regular expression to find the context at the end of a sentence: the five characters leading up to the period. Results should be `[' home.', 'kins).', ' hide.']`  (5pts)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "cellView": "form",
-    "id": "seEtUQAFwEfU"
-   },
-   "outputs": [],
-   "source": [
-    "q1d_answer = ''  #@param {type:'string'}\n",
-    "re.findall(q1d_answer, synopsis)"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "dTJiDjSpwEfX"
-   },
-   "source": [
-    "## Questions #2: Repeating Patterns (36 pts)\n",
-    "\n",
-    "*Full Reference*\n",
-    "\n",
-    "**Matching characters**\n",
-    "- `a` - Match the letter `a`. Same for most other characters\n",
-    "- `.` - Match any single character\n",
-    "- `\\w` - Match any word character (letters, number... support for non-English characters varies)\n",
-    "- `\\W` - Match any non-word characters\n",
-    "- `\\d` - Match any digit\n",
-    "- `.` - Matches *any* character\n",
-    "- `\\.` - Matches a literal period (or `\\\\` matches literal backslash, `\\(` matches literal parenthesis, etc)\n",
-    "- `\\s` - Match any whitespace character (space, tabs, line breaks sometimes)\n",
-    "\n",
-    "**Multiple Matches**\n",
-    "- `[ab]` - Group of multiple possible characters - in this case `a` or `b`\n",
-    "- `[a-z]` matches any character from a to z\n",
-    "- `[A-Z]` matches any character from A to Z\n",
-    "- `[A-Zab]` matches any character from A to Z (`A-Z`), *or* `a` *or* `b`\n",
-    "\n",
-    "**Repeating**\n",
-    "\n",
-    "*'greedy' means that it captures as much as it can, 'lazy' means it captures as little as possible.*\n",
-    "`?` - One or zero of the preceding match\n",
-    "- `+` - One or more of the preceding match (greedy)\n",
-    "- `*` - Zero or more of the preceding match (greedy)\n",
-    "- `*?`, `+?`  - Lazy versions of `*` and `+`\n",
-    "\n",
-    "**Position**\n",
-    "- `^` - Start of line\n",
-    "- `$` - End of line\n",
-    "\n",
-    "*Questions*\n",
-    "\n",
-    "- Q2a: Write the expression that matches the period at the end of a sentence and the first word of the next sentence. The matches should be: `['. The', '. Although']` (6 pts)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "cellView": "form",
-    "id": "GCkA1uQIwEfX"
-   },
-   "outputs": [],
-   "source": [
-    "q2a_answer = ''  #@param {type:'string'}\n",
-    "re.findall(q2a_answer, synopsis)"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "hmv4DTKNwEfa"
-   },
-   "source": [
-    "- Q2b: Write the expression to match the two words following the word `a`. The results should be `['a deadly earthquake', 'a young bear', 'a new home']` (6 pts)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "cellView": "form",
-    "id": "VSi2v7rmwEfb"
-   },
-   "outputs": [],
-   "source": [
-    "q2b_answer = ''  #@param {type:'string'}\n",
-    "re.findall(q2b_answer, synopsis)"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "CgKT_nK5wEfe"
-   },
-   "source": [
-    "- Q2c: Write the expression to match all words with an `'s`. In this case, `Paddington's` is the only match.  (6 pts)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "cellView": "form",
-    "id": "7zbb7pzCwEff"
-   },
-   "outputs": [],
-   "source": [
-    "q2c_answer = ''  #@param {type:'string'}\n",
-    "re.findall(q2c_answer, synopsis)"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "ZJNrE3OOwEfj"
-   },
-   "source": [
-    "- Q2d: Write the expression to match all values in parentheses. The results should be `['(Ben Whishaw)', '(Hugh Bonneville)', '(Sally Hawkins)', '(Nicole Kidman)]'` (6 pts)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "cellView": "form",
-    "id": "pLz1VYtjwEfk"
-   },
-   "outputs": [],
-   "source": [
-    "q2d_answer = ''  #@param {type:'string'}\n",
-    "re.findall(q2d_answer, synopsis)"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "a0WWNmnwwEfn"
-   },
-   "source": [
-    "- Q2e: Write the expression to match all capitalized words, including `Paddington's`. (6 pts)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "cellView": "form",
-    "id": "E7Z2tH1awEfo"
-   },
-   "outputs": [],
-   "source": [
-    "q2e_answer = ''  #@param {type:'string'}\n",
-    "re.findall(q2e_answer, synopsis)"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "IszncrdAwEfr"
-   },
-   "source": [
-    "- Q2f: Write the expression to match all pairings of capitalized words, like `Mary Brown` and `Although Paddington`. (6 pts)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "cellView": "form",
-    "id": "xFrn1WuNwEfs"
-   },
-   "outputs": [],
-   "source": [
-    "q2f_answer = ''  #@param {type:'string'}\n",
-    "re.findall(q2f_answer, synopsis)"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "BjGW8Q2zwEfv"
-   },
-   "source": [
-    "## Other Python Regular Expression Functions\n",
-    "\n",
-    "**Replace a pattern**:\n",
-    "    \n",
-    "```\n",
-    "re.sub(pattern, repl, string)\n",
-    "```\n",
-    "\n",
-    "This replaces the pattern matches with whatever string you provide to `repl`.\n",
-    "\n",
-    "**Check a match**:\n",
-    "    \n",
-    "```\n",
-    "regex.search(string)\n",
-    "```\n",
-    "\n",
-    "This doesn't return anything if there are no matches, so it's useful for asking, \"is there a match or not?\""
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "vx7lh1DjwEfw"
-   },
-   "source": [
-    "## Pandas\n",
-    "\n",
-    "Load the following data into a DataFrame, containing the tweets of Lord_Voldemort7:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 195
-    },
-    "id": "Ou_n26JWwEfx",
-    "outputId": "a9ab9400-f94d-4163-86df-664486e096e6"
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>id</th>\n",
-       "      <th>favourites</th>\n",
-       "      <th>retweets</th>\n",
-       "      <th>created_at</th>\n",
-       "      <th>text</th>\n",
-       "      <th>is_quoted</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>903664289411686400</td>\n",
-       "      <td>1921</td>\n",
-       "      <td>556</td>\n",
-       "      <td>2017-09-01 17:02:10</td>\n",
-       "      <td>#19YearsLater #BackToHogwarts</td>\n",
-       "      <td>False</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>901217559860703234</td>\n",
-       "      <td>2556</td>\n",
-       "      <td>934</td>\n",
-       "      <td>2017-08-25 22:59:44</td>\n",
-       "      <td>\"I rose up from the dead, I do it all the time...</td>\n",
-       "      <td>False</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>879505014422740994</td>\n",
-       "      <td>3879</td>\n",
-       "      <td>1578</td>\n",
-       "      <td>2017-06-27 01:01:50</td>\n",
-       "      <td>#HarryPotter20 There will always be magic.</td>\n",
-       "      <td>False</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>818293342622650368</td>\n",
-       "      <td>737</td>\n",
-       "      <td>169</td>\n",
-       "      <td>2017-01-09 03:08:30</td>\n",
-       "      <td>Hermione tries to play Quidditch but hits her ...</td>\n",
-       "      <td>False</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>815434344357625856</td>\n",
-       "      <td>2719</td>\n",
-       "      <td>1012</td>\n",
-       "      <td>2017-01-01 05:47:52</td>\n",
-       "      <td>2016 was channeling The Little Mermaid and end...</td>\n",
-       "      <td>False</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "view-in-github",
+        "colab_type": "text"
+      },
+      "source": [
+        "<a href=\"https://colab.research.google.com/github/organisciak/Scripting-Course/blob/master/labs/08-lab-regular-expressions.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Zuf8EoDzwEem"
+      },
+      "source": [
+        "# Week 8 Lab: Regular Expressions"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "UquVNqG4wEeo"
+      },
+      "outputs": [],
+      "source": [
+        "import re\n",
+        "import pandas as pd"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "OXHoXToPwEer"
+      },
+      "source": [
+        "Here is a synopsis of the best film of our time, *Paddington*:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 69
+        },
+        "id": "PhMVBwhpwEes",
+        "outputId": "ebb331af-4ec3-4039-a2af-4fc4bbc37802"
+      },
+      "outputs": [
+        {
+          "data": {
+            "application/vnd.google.colaboratory.intrinsic+json": {
+              "type": "string"
+            },
+            "text/plain": [
+              "'After a deadly earthquake destroys his home in Peruvian rainforest, a young bear (Ben Whishaw) makes his way to England in search of a new home. The bear, dubbed \"Paddington\" for the london train station, finds shelter with the family of Henry (Hugh Bonneville) and Mary Brown (Sally Hawkins). Although Paddington\\'s amazement at urban living soon endears him to the Browns, someone else has her eye on him: Taxidermist Millicent Clyde (Nicole Kidman) has designs on the rare bear and his hide.'"
+            ]
+          },
+          "execution_count": 1,
+          "metadata": {
+            "tags": []
+          },
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "synopsis = '''After a deadly earthquake destroys his home in the Peruvian rainforest, a young bear (Ben Whishaw) makes his way to England in search of a new home. The bear, dubbed \"Paddington\" for the london train station, finds shelter with the family of Henry (Hugh Bonneville) and Mary Brown (Sally Hawkins). Although Paddington's amazement at urban living soon endears him to the Browns, someone else has her eye on him: Taxidermist Millicent Clyde (Nicole Kidman) has designs on the rare bear and his hide.'''\n",
+        "synopsis"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "mzj3T_AUwEez"
+      },
+      "source": [
+        "For these questions, use `re.findall(pattern, text)`, like so:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 34
+        },
+        "id": "J6oZ8HVcwEe0",
+        "outputId": "77adfe21-3c6f-4443-d744-0b3a75b5daf1"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "['After ', 'Henry ', 'Brown ', 'Sally ', 'Clyde ']"
+            ]
+          },
+          "execution_count": 6,
+          "metadata": {
+            "tags": []
+          },
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "re.findall('[A-Z]....\\s', synopsis)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "NNRlqYqh8j_h"
+      },
+      "source": [
+        "For many of the questions, I'll ask you for the `pattern` that goes into that search."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "IqAdp-3iwEe5"
+      },
+      "source": [
+        "### Example Question\n",
+        "\n",
+        "- Q0.0: Write the regular expression to find all three or four-letter words:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Ub9b8TUEwEe6",
+        "outputId": "a915df70-cf14-497f-db87-3e4334eff5b2"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "[' his ',\n",
+              " ' bear ',\n",
+              " ' his ',\n",
+              " ' new ',\n",
+              " ' The ',\n",
+              " ' for ',\n",
+              " ' with ',\n",
+              " ' and ',\n",
+              " ' soon ',\n",
+              " ' him ',\n",
+              " ' the ',\n",
+              " ' else ',\n",
+              " ' her ',\n",
+              " ' has ',\n",
+              " ' the ',\n",
+              " ' bear ',\n",
+              " ' his ']"
+            ]
+          },
+          "execution_count": 5,
+          "metadata": {
+            "tags": []
+          },
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "#@markdown Run this cell to see if it works\n",
+        "q0_answer = '\\s\\w\\w\\w\\w?\\s'    #@param {type:'string'}\n",
+        "re.findall(q0_answer, synopsis)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "dVtGDijDwEfE"
+      },
+      "source": [
+        "## Questions #1: Matching Characters (20 pts)\n",
+        "\n",
+        "- Q1a: Write a regular expression to match all three-character words surrounded by spaces. You need to change the answer in the form field and run it to see if it works. (5pts)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "ZIIqEYxXwEfJ",
+        "scrolled": true
+      },
+      "outputs": [],
+      "source": [
+        "#@markdown *Run this cell to see if it works*\n",
+        "q1a_answer = ''    #@param {type:'string'}\n",
+        "re.findall(q1a_answer, synopsis) # This runs your pattern"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "esCPsEnjwEfM"
+      },
+      "source": [
+        "- Q1b: Write a regular expression to match all three-character words, allowing for instances like `(Ben` and `him:`.  (5pts)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "qt8_JQnxwEfM"
+      },
+      "outputs": [],
+      "source": [
+        "#@markdown *Run this cell to see if it works*\n",
+        "q1b_answer = ''    #@param {type:'string'}\n",
+        "re.findall(q1b_answer, synopsis)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "AuFhWEX-wEfQ"
+      },
+      "source": [
+        "- Q1c: Write the regular expression to determine how many times the words `the` or `The` show up.  (5pts)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "R5LxSp3ewEfQ"
+      },
+      "outputs": [],
+      "source": [
+        "q1c_answer = ''  #@param {type:'string'}\n",
+        "matches = re.findall(q1c_answer, synopsis)\n",
+        "print(matches)\n",
+        "print(\"Count\", len(matches))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "KSxzA382wEfT"
+      },
+      "source": [
+        "- Q1d: Write a regular expression to find the context at the end of a sentence: the five characters leading up to the period. Results should be `[' home.', 'kins).', ' hide.']`  (5pts)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "seEtUQAFwEfU"
+      },
+      "outputs": [],
+      "source": [
+        "q1d_answer = ''  #@param {type:'string'}\n",
+        "re.findall(q1d_answer, synopsis)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "dTJiDjSpwEfX"
+      },
+      "source": [
+        "## Questions #2: Repeating Patterns (36 pts)\n",
+        "\n",
+        "*Full Reference*\n",
+        "\n",
+        "**Matching characters**\n",
+        "- `a` - Match the letter `a`. Same for most other characters\n",
+        "- `.` - Match any single character\n",
+        "- `\\w` - Match any word character (letters, number... support for non-English characters varies)\n",
+        "- `\\W` - Match any non-word characters\n",
+        "- `\\d` - Match any digit\n",
+        "- `.` - Matches *any* character\n",
+        "- `\\.` - Matches a literal period (or `\\\\` matches literal backslash, `\\(` matches literal parenthesis, etc)\n",
+        "- `\\s` - Match any whitespace character (space, tabs, line breaks sometimes)\n",
+        "\n",
+        "**Multiple Matches**\n",
+        "- `[ab]` - Group of multiple possible characters - in this case `a` or `b`\n",
+        "- `[a-z]` matches any character from a to z\n",
+        "- `[A-Z]` matches any character from A to Z\n",
+        "- `[A-Zab]` matches any character from A to Z (`A-Z`), *or* `a` *or* `b`\n",
+        "\n",
+        "**Repeating**\n",
+        "\n",
+        "*'greedy' means that it captures as much as it can, 'lazy' means it captures as little as possible.*\n",
+        "`?` - One or zero of the preceding match\n",
+        "- `+` - One or more of the preceding match (greedy)\n",
+        "- `*` - Zero or more of the preceding match (greedy)\n",
+        "- `*?`, `+?`  - Lazy versions of `*` and `+`\n",
+        "\n",
+        "**Position**\n",
+        "- `^` - Start of line\n",
+        "- `$` - End of line\n",
+        "\n",
+        "*Questions*\n",
+        "\n",
+        "- Q2a: Write the expression that matches the period at the end of a sentence and the first word of the next sentence. The matches should be: `['. The', '. Although']` (6 pts)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "GCkA1uQIwEfX"
+      },
+      "outputs": [],
+      "source": [
+        "q2a_answer = ''  #@param {type:'string'}\n",
+        "re.findall(q2a_answer, synopsis)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hmv4DTKNwEfa"
+      },
+      "source": [
+        "- Q2b: Write the expression to match the two words following the word `a`. The results should be `['a deadly earthquake', 'a young bear', 'a new home']` (6 pts)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "VSi2v7rmwEfb"
+      },
+      "outputs": [],
+      "source": [
+        "q2b_answer = ''  #@param {type:'string'}\n",
+        "re.findall(q2b_answer, synopsis)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "CgKT_nK5wEfe"
+      },
+      "source": [
+        "- Q2c: Write the expression to match all words with an `'s`. In this case, `Paddington's` is the only match.  (6 pts)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "7zbb7pzCwEff"
+      },
+      "outputs": [],
+      "source": [
+        "q2c_answer = ''  #@param {type:'string'}\n",
+        "re.findall(q2c_answer, synopsis)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ZJNrE3OOwEfj"
+      },
+      "source": [
+        "- Q2d: Write the expression to match all values in parentheses. The results should be `['(Ben Whishaw)', '(Hugh Bonneville)', '(Sally Hawkins)', '(Nicole Kidman)]'` (6 pts)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "pLz1VYtjwEfk"
+      },
+      "outputs": [],
+      "source": [
+        "q2d_answer = ''  #@param {type:'string'}\n",
+        "re.findall(q2d_answer, synopsis)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "a0WWNmnwwEfn"
+      },
+      "source": [
+        "- Q2e: Write the expression to match all capitalized words, including `Paddington's`. (6 pts)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "E7Z2tH1awEfo"
+      },
+      "outputs": [],
+      "source": [
+        "q2e_answer = ''  #@param {type:'string'}\n",
+        "re.findall(q2e_answer, synopsis)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "IszncrdAwEfr"
+      },
+      "source": [
+        "- Q2f: Write the expression to match all pairings of capitalized words, like `Mary Brown` and `Although Paddington`. (6 pts)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "xFrn1WuNwEfs"
+      },
+      "outputs": [],
+      "source": [
+        "q2f_answer = ''  #@param {type:'string'}\n",
+        "re.findall(q2f_answer, synopsis)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "BjGW8Q2zwEfv"
+      },
+      "source": [
+        "## Other Python Regular Expression Functions\n",
+        "\n",
+        "**Replace a pattern**:\n",
+        "    \n",
+        "```\n",
+        "re.sub(pattern, repl, string)\n",
+        "```\n",
+        "\n",
+        "This replaces the pattern matches with whatever string you provide to `repl`.\n",
+        "\n",
+        "**Check a match**:\n",
+        "    \n",
+        "```\n",
+        "regex.search(string)\n",
+        "```\n",
+        "\n",
+        "This doesn't return anything if there are no matches, so it's useful for asking, \"is there a match or not?\""
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vx7lh1DjwEfw"
+      },
+      "source": [
+        "## Pandas\n",
+        "\n",
+        "Load the following data into a DataFrame, containing the tweets of Lord_Voldemort7:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 195
+        },
+        "id": "Ou_n26JWwEfx",
+        "outputId": "a9ab9400-f94d-4163-86df-664486e096e6"
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>id</th>\n",
+              "      <th>favourites</th>\n",
+              "      <th>retweets</th>\n",
+              "      <th>created_at</th>\n",
+              "      <th>text</th>\n",
+              "      <th>is_quoted</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>0</th>\n",
+              "      <td>903664289411686400</td>\n",
+              "      <td>1921</td>\n",
+              "      <td>556</td>\n",
+              "      <td>2017-09-01 17:02:10</td>\n",
+              "      <td>#19YearsLater #BackToHogwarts</td>\n",
+              "      <td>False</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1</th>\n",
+              "      <td>901217559860703234</td>\n",
+              "      <td>2556</td>\n",
+              "      <td>934</td>\n",
+              "      <td>2017-08-25 22:59:44</td>\n",
+              "      <td>\"I rose up from the dead, I do it all the time...</td>\n",
+              "      <td>False</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2</th>\n",
+              "      <td>879505014422740994</td>\n",
+              "      <td>3879</td>\n",
+              "      <td>1578</td>\n",
+              "      <td>2017-06-27 01:01:50</td>\n",
+              "      <td>#HarryPotter20 There will always be magic.</td>\n",
+              "      <td>False</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>3</th>\n",
+              "      <td>818293342622650368</td>\n",
+              "      <td>737</td>\n",
+              "      <td>169</td>\n",
+              "      <td>2017-01-09 03:08:30</td>\n",
+              "      <td>Hermione tries to play Quidditch but hits her ...</td>\n",
+              "      <td>False</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>4</th>\n",
+              "      <td>815434344357625856</td>\n",
+              "      <td>2719</td>\n",
+              "      <td>1012</td>\n",
+              "      <td>2017-01-01 05:47:52</td>\n",
+              "      <td>2016 was channeling The Little Mermaid and end...</td>\n",
+              "      <td>False</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "                   id  ...  is_quoted\n",
+              "0  903664289411686400  ...      False\n",
+              "1  901217559860703234  ...      False\n",
+              "2  879505014422740994  ...      False\n",
+              "3  818293342622650368  ...      False\n",
+              "4  815434344357625856  ...      False\n",
+              "\n",
+              "[5 rows x 6 columns]"
+            ]
+          },
+          "execution_count": 11,
+          "metadata": {
+            "tags": []
+          },
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "tweets = pd.read_csv('https://raw.githubusercontent.com/organisciak/Scripting-Course/master/data/voldemort_tweets.csv')\n",
+        "tweets.head()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Ndbd_zOUwEfz"
+      },
+      "source": [
+        "String actions on a DataFrame column (that is, a Series) are accessed with:\n",
+        "\n",
+        "```\n",
+        "df['columnName'].str\n",
+        "```\n",
+        "\n",
+        "For matching on regular expressions, you can use `.str.contains()`. For examples:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 166
+        },
+        "id": "JhDkKIGgwEf0",
+        "outputId": "4b0f64ef-78aa-45d0-a17f-a9c9a04224db",
+        "scrolled": true
+      },
+      "outputs": [
+        {
+          "data": {
+            "text/html": [
+              "<div>\n",
+              "<style scoped>\n",
+              "    .dataframe tbody tr th:only-of-type {\n",
+              "        vertical-align: middle;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe tbody tr th {\n",
+              "        vertical-align: top;\n",
+              "    }\n",
+              "\n",
+              "    .dataframe thead th {\n",
+              "        text-align: right;\n",
+              "    }\n",
+              "</style>\n",
+              "<table border=\"1\" class=\"dataframe\">\n",
+              "  <thead>\n",
+              "    <tr style=\"text-align: right;\">\n",
+              "      <th></th>\n",
+              "      <th>id</th>\n",
+              "      <th>favourites</th>\n",
+              "      <th>retweets</th>\n",
+              "      <th>created_at</th>\n",
+              "      <th>text</th>\n",
+              "      <th>is_quoted</th>\n",
+              "    </tr>\n",
+              "  </thead>\n",
+              "  <tbody>\n",
+              "    <tr>\n",
+              "      <th>721</th>\n",
+              "      <td>300324033327661056</td>\n",
+              "      <td>1592</td>\n",
+              "      <td>3431</td>\n",
+              "      <td>2013-02-09 19:23:23</td>\n",
+              "      <td>#ThingsYouShouldntDo: Tell Harry Potter that y...</td>\n",
+              "      <td>False</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>549</th>\n",
+              "      <td>331832894916026369</td>\n",
+              "      <td>440</td>\n",
+              "      <td>802</td>\n",
+              "      <td>2013-05-07 18:08:21</td>\n",
+              "      <td>If looks could kill then my body would be a de...</td>\n",
+              "      <td>False</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>2172</th>\n",
+              "      <td>133699908418019328</td>\n",
+              "      <td>322</td>\n",
+              "      <td>884</td>\n",
+              "      <td>2011-11-08 00:18:36</td>\n",
+              "      <td>\"Over my dead body\" is trending... http://t.co...</td>\n",
+              "      <td>False</td>\n",
+              "    </tr>\n",
+              "    <tr>\n",
+              "      <th>1846</th>\n",
+              "      <td>160917485812584448</td>\n",
+              "      <td>289</td>\n",
+              "      <td>1693</td>\n",
+              "      <td>2012-01-22 02:51:32</td>\n",
+              "      <td>First sources said Joe Paterno was dead. Now t...</td>\n",
+              "      <td>False</td>\n",
+              "    </tr>\n",
+              "  </tbody>\n",
+              "</table>\n",
+              "</div>"
+            ],
+            "text/plain": [
+              "                      id  ...  is_quoted\n",
+              "721   300324033327661056  ...      False\n",
+              "549   331832894916026369  ...      False\n",
+              "2172  133699908418019328  ...      False\n",
+              "1846  160917485812584448  ...      False\n",
+              "\n",
+              "[4 rows x 6 columns]"
+            ]
+          },
+          "execution_count": 21,
+          "metadata": {
+            "tags": []
+          },
+          "output_type": "execute_result"
+        }
       ],
-      "text/plain": [
-       "                   id  ...  is_quoted\n",
-       "0  903664289411686400  ...      False\n",
-       "1  901217559860703234  ...      False\n",
-       "2  879505014422740994  ...      False\n",
-       "3  818293342622650368  ...      False\n",
-       "4  815434344357625856  ...      False\n",
-       "\n",
-       "[5 rows x 6 columns]"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {
-      "tags": []
-     },
-     "output_type": "execute_result"
+      "source": [
+        "matches = tweets['text'].str.contains('dead')\n",
+        "tweets[matches].sample(4)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "KHKiYQ6jwEf2"
+      },
+      "source": [
+        "*Tip*: The tweet text will be cutoff by default when print. You can change the width of columns to show the entire tweet with the following code:"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "97VgtzEnwEf3"
+      },
+      "outputs": [],
+      "source": [
+        "pd.set_option(\"display.max_colwidth\", 160)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hCfqr3LdwEf5"
+      },
+      "source": [
+        "## Questions 3 (24pts)\n",
+        "\n",
+        "*Unless otherwise specified, return all columns when asked to return tweets; e.g. don't remove the id or retweets columns.*"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "_9TTHrs1wEf6"
+      },
+      "source": [
+        "- Q3a. Write the pattern to find all the tweets that mention \"Harry Potter\". (4 pts)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "GTti7e-twEf6"
+      },
+      "outputs": [],
+      "source": [
+        "q3a_answer = ''  #@param {type:'string'}\n",
+        "matches = tweets['text'].str.contains(q3a_answer)\n",
+        "tweets[matches]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "WXuc2aWnwEf-"
+      },
+      "source": [
+        "- Q3b. Write the pattern to return the tweets that have a hashtag (assuming only word characters in hashtags). There should be 1432 rows. (6 pts)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "199e5AkdwEf-"
+      },
+      "outputs": [],
+      "source": [
+        "q3b_answer = ''  #@param {type:'string'}\n",
+        "matches = tweets['text'].str.contains(q3b_answer)\n",
+        "tweets[matches]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "VG7bkS7owEgH"
+      },
+      "source": [
+        "- Q3c. Write the pattern to return the tweets that have at least *two* hashtags (assuming only word characters in hashtags). There should be either 190 rows or 166 rows (depending on how you interpreted the problem). (8 pts)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "1v0C3jl3wEgI"
+      },
+      "outputs": [],
+      "source": [
+        "q3c_answer = '' #@param {type:'string'}\n",
+        "matches = tweets['text'].str.contains(q3c_answer)\n",
+        "tweets[matches]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "6Qkab5LQwEgK"
+      },
+      "source": [
+        "- Q3d. Write the pattern to return the tweets that mention a year in the current millenium (or something that looks like one). (6 pts)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "ORjv0yqNwEgL"
+      },
+      "outputs": [],
+      "source": [
+        "q3d_answer = '' #@param {type:'string'}\n",
+        "matches = tweets['text'].str.contains(q3d_answer)\n",
+        "tweets[matches]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-LqHFbXPwEgN"
+      },
+      "source": [
+        "## Some more Pandas string methods\n",
+        "\n",
+        "Counting matches in a column:\n",
+        "\n",
+        "```\n",
+        "df['columnName'].str.count(pat)\n",
+        "```\n",
+        "\n",
+        "Replacing a pattern with a string:\n",
+        "\n",
+        "```\n",
+        "df['columnName'].str.replace(pat, repl)\n",
+        "```"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "kJGxCqQKwEgO"
+      },
+      "source": [
+        "## Questions 4: Other string methods and non-regex Pandas practice (20 pts)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "sxPB7bftwEgO"
+      },
+      "source": [
+        "- Q4a Return the 20 tweets with the most favourites. (5 pts)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "M3BEeekXwEgP",
+        "scrolled": true
+      },
+      "outputs": [],
+      "source": [
+        "# Answer-Q4a\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "00D18Di4wEgT"
+      },
+      "source": [
+        "- Q4b How many total favourites do this account's first 100 tweets ever have? (5 pts)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "lGF9yvphwEgT"
+      },
+      "outputs": [],
+      "source": [
+        "q4b_answer = \"\" #@param {type:'string'}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Yar0FSsnwEgW"
+      },
+      "source": [
+        "- Q4c What is the id of the tweet matching the following criteria: has the lowest retweet-to-favourites ratio, among tweets *without hashtags* and *with more than 1000 favourites*. (10pts)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "vzpJJuL5wEgX"
+      },
+      "outputs": [],
+      "source": [
+        "q4c_answer = \"\" #@param {type:'string'}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "l_qJtoTnQmAG"
+      },
+      "source": [
+        "## Submission Instructions\n",
+        "\n",
+        "Only one person from your group needs to submit the assignment."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "ZUaRz9ncQc4V"
+      },
+      "outputs": [],
+      "source": [
+        "#@markdown ### Enter your group member names for grading\n",
+        "my_name = \"\" #@param { type:'string' }\n",
+        "\n",
+        "#@markdown _Have you saved your work for yourself? Don't forget to Save a Copy in Drive so that you have your progress._"
+      ]
     }
-   ],
-   "source": [
-    "tweets = pd.read_csv('https://raw.githubusercontent.com/organisciak/Scripting-Course/master/data/voldemort_tweets.csv')\n",
-    "tweets.head()"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "Ndbd_zOUwEfz"
-   },
-   "source": [
-    "String actions on a DataFrame column (that is, a Series) are accessed with:\n",
-    "\n",
-    "```\n",
-    "df['columnName'].str\n",
-    "```\n",
-    "\n",
-    "For matching on regular expressions, you can use `.str.contains()`. For examples:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
+  ],
+  "metadata": {
     "colab": {
-     "base_uri": "https://localhost:8080/",
-     "height": 166
-    },
-    "id": "JhDkKIGgwEf0",
-    "outputId": "4b0f64ef-78aa-45d0-a17f-a9c9a04224db",
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>id</th>\n",
-       "      <th>favourites</th>\n",
-       "      <th>retweets</th>\n",
-       "      <th>created_at</th>\n",
-       "      <th>text</th>\n",
-       "      <th>is_quoted</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>721</th>\n",
-       "      <td>300324033327661056</td>\n",
-       "      <td>1592</td>\n",
-       "      <td>3431</td>\n",
-       "      <td>2013-02-09 19:23:23</td>\n",
-       "      <td>#ThingsYouShouldntDo: Tell Harry Potter that y...</td>\n",
-       "      <td>False</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>549</th>\n",
-       "      <td>331832894916026369</td>\n",
-       "      <td>440</td>\n",
-       "      <td>802</td>\n",
-       "      <td>2013-05-07 18:08:21</td>\n",
-       "      <td>If looks could kill then my body would be a de...</td>\n",
-       "      <td>False</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2172</th>\n",
-       "      <td>133699908418019328</td>\n",
-       "      <td>322</td>\n",
-       "      <td>884</td>\n",
-       "      <td>2011-11-08 00:18:36</td>\n",
-       "      <td>\"Over my dead body\" is trending... http://t.co...</td>\n",
-       "      <td>False</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1846</th>\n",
-       "      <td>160917485812584448</td>\n",
-       "      <td>289</td>\n",
-       "      <td>1693</td>\n",
-       "      <td>2012-01-22 02:51:32</td>\n",
-       "      <td>First sources said Joe Paterno was dead. Now t...</td>\n",
-       "      <td>False</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                      id  ...  is_quoted\n",
-       "721   300324033327661056  ...      False\n",
-       "549   331832894916026369  ...      False\n",
-       "2172  133699908418019328  ...      False\n",
-       "1846  160917485812584448  ...      False\n",
-       "\n",
-       "[4 rows x 6 columns]"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {
-      "tags": []
-     },
-     "output_type": "execute_result"
+      "name": "08-lab-regular-expressions.ipynb",
+      "provenance": [],
+      "include_colab_link": true
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.8.10"
     }
-   ],
-   "source": [
-    "matches = tweets['text'].str.contains('dead')\n",
-    "tweets[matches].sample(4)"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "KHKiYQ6jwEf2"
-   },
-   "source": [
-    "*Tip*: The tweet text will be cutoff by default when print. You can change the width of columns to show the entire tweet with the following code:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "97VgtzEnwEf3"
-   },
-   "outputs": [],
-   "source": [
-    "pd.set_option(\"display.max_colwidth\", 160)"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "hCfqr3LdwEf5"
-   },
-   "source": [
-    "## Questions 3 (24pts)\n",
-    "\n",
-    "*Unless otherwise specified, return all columns when asked to return tweets; e.g. don't remove the id or retweets columns.*"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "_9TTHrs1wEf6"
-   },
-   "source": [
-    "- Q3a. Write the pattern to find all the tweets that mention \"Harry Potter\". (4 pts)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "cellView": "form",
-    "id": "GTti7e-twEf6"
-   },
-   "outputs": [],
-   "source": [
-    "q3a_answer = ''  #@param {type:'string'}\n",
-    "matches = tweets['text'].str.contains(q3a_answer)\n",
-    "tweets[matches]"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "WXuc2aWnwEf-"
-   },
-   "source": [
-    "- Q3b. Write the pattern to return the tweets that have a hashtag (assuming only word characters in hashtags). There should be 1432 rows. (6 pts)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "cellView": "form",
-    "id": "199e5AkdwEf-"
-   },
-   "outputs": [],
-   "source": [
-    "q3b_answer = ''  #@param {type:'string'}\n",
-    "matches = tweets['text'].str.contains(q3b_answer)\n",
-    "tweets[matches]"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "VG7bkS7owEgH"
-   },
-   "source": [
-    "- Q3c. Write the pattern to return the tweets that have at least *two* hashtags (assuming only word characters in hashtags). There should be 190 rows. (8 pts)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "cellView": "form",
-    "id": "1v0C3jl3wEgI"
-   },
-   "outputs": [],
-   "source": [
-    "q3c_answer = '' #@param {type:'string'}\n",
-    "matches = tweets['text'].str.contains(q3c_answer)\n",
-    "tweets[matches]"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "6Qkab5LQwEgK"
-   },
-   "source": [
-    "- Q3d. Write the pattern to return the tweets that mention a year in the current millenium (or something that looks like one). (6 pts)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "cellView": "form",
-    "id": "ORjv0yqNwEgL"
-   },
-   "outputs": [],
-   "source": [
-    "q3d_answer = '' #@param {type:'string'}\n",
-    "matches = tweets['text'].str.contains(q3d_answer)\n",
-    "tweets[matches]"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "-LqHFbXPwEgN"
-   },
-   "source": [
-    "## Some more Pandas string methods\n",
-    "\n",
-    "Counting matches in a column:\n",
-    "\n",
-    "```\n",
-    "df['columnName'].str.count(pat)\n",
-    "```\n",
-    "\n",
-    "Replacing a pattern with a string:\n",
-    "\n",
-    "```\n",
-    "df['columnName'].str.replace(pat, repl)\n",
-    "```"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "kJGxCqQKwEgO"
-   },
-   "source": [
-    "## Questions 4: Other string methods and non-regex Pandas practice (20 pts)"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "sxPB7bftwEgO"
-   },
-   "source": [
-    "- Q4a Return the 20 tweets with the most favourites. (5 pts)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "id": "M3BEeekXwEgP",
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "# Answer-Q4a\n"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "00D18Di4wEgT"
-   },
-   "source": [
-    "- Q4b How many total favourites do this account's first 100 tweets ever have? (5 pts)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "cellView": "form",
-    "id": "lGF9yvphwEgT"
-   },
-   "outputs": [],
-   "source": [
-    "q4b_answer = \"\" #@param {type:'string'}"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "Yar0FSsnwEgW"
-   },
-   "source": [
-    "- Q4c What is the id of the tweet matching the following criteria: has the lowest retweet-to-favourites ratio, among tweets *without hashtags* and *with more than 1000 favourites*. (10pts)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "cellView": "form",
-    "id": "vzpJJuL5wEgX"
-   },
-   "outputs": [],
-   "source": [
-    "q4c_answer = \"\" #@param {type:'string'}"
-   ]
-  },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {
-    "id": "l_qJtoTnQmAG"
-   },
-   "source": [
-    "## Submission Instructions\n",
-    "\n",
-    "Only one person from your group needs to submit the assignment."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "cellView": "form",
-    "id": "ZUaRz9ncQc4V"
-   },
-   "outputs": [],
-   "source": [
-    "#@markdown ### Enter your group member names for grading\n",
-    "my_name = \"\" #@param { type:'string' }\n",
-    "\n",
-    "#@markdown _Have you saved your work for yourself? Don't forget to Save a Copy in Drive so that you have your progress._"
-   ]
-  }
- ],
- "metadata": {
-  "colab": {
-   "include_colab_link": true,
-   "name": "08-lab-regular-expressions.ipynb",
-   "provenance": []
-  },
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
   },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.10"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 1
-}
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
\ No newline at end of file

	id	favourites	retweets	created_at	text	is_quoted
0	903664289411686400	1921	556	2017-09-01 17:02:10	#19YearsLater #BackToHogwarts	False
1	901217559860703234	2556	934	2017-08-25 22:59:44	\"I rose up from the dead, I do it all the time...	False
2	879505014422740994	3879	1578	2017-06-27 01:01:50	#HarryPotter20 There will always be magic.	False
3	818293342622650368	737	169	2017-01-09 03:08:30	Hermione tries to play Quidditch but hits her ...	False
4	815434344357625856	2719	1012	2017-01-01 05:47:52	2016 was channeling The Little Mermaid and end...	False
	id	favourites	retweets	created_at	text	is_quoted
721	300324033327661056	1592	3431	2013-02-09 19:23:23	#ThingsYouShouldntDo: Tell Harry Potter that y...	False
549	331832894916026369	440	802	2013-05-07 18:08:21	If looks could kill then my body would be a de...	False
2172	133699908418019328	322	884	2011-11-08 00:18:36	\"Over my dead body\" is trending... http://t.co...	False
1846	160917485812584448	289	1693	2012-01-22 02:51:32	First sources said Joe Paterno was dead. Now t...	False