In [1]:
import random

import pandas as pd
from pymongo import MongoClient

In [2]:
client = MongoClient(host="localhost",port=27017)
db = client["abtest-db"]
ds_app = db["ds-applicants"]
print("client:", type(client))
print("ds_app:", type(ds_app))

client: <class 'pymongo.synchronous.mongo_client.MongoClient'>
ds_app: <class 'pymongo.synchronous.collection.Collection'>


Extract: Developing the Hypothesis

In [4]:
# How many applicants complete admissions quiz?
result = ds_app.aggregate(
    [
        {
            "$group":{
                "_id": "$admissionsQuiz",
                "count": { "$count": {}}
            }
        }
    ]
)

for r in result: 
    if r["_id"] == 'complete':
        complete = r["count"]
    else :
        incomplete = r["count"]
    print(r)

print("Completed quiz:", complete)
print("Did not complete quiz:", incomplete)

{'_id': 'complete', 'count': 3717}
{'_id': 'incomplete', 'count': 1308}
Completed quiz: 3717
Did not complete quiz: 1308


In [5]:

prop_incomplete = incomplete/(complete+incomplete)
print(
    "Proportion of users who don't complete admissions quiz:", round(prop_incomplete, 2)
)

Proportion of users who don't complete admissions quiz: 0.26


In [6]:
null_hypothesis = """
There is no relationship between receiveing an email and completing the admissions quiz.
Sending an email to 'no-quiz applicants' does not increase the rate of completion."""

alternate_hypothesis = """
There is a relationship between receiving an email and completing the admissions quiz.
Sending an email to 'no-quiz applicants' does increase the rate of completion.
"""

print("Null Hypothesis:", null_hypothesis)
print("Alternate Hypothesis:", alternate_hypothesis)

Null Hypothesis: 
There is no relationship between receiveing an email and completing the admissions quiz.
Sending an email to 'no-quiz applicants' does not increase the rate of completion.
Alternate Hypothesis: 
There is a relationship between receiving an email and completing the admissions quiz.
Sending an email to 'no-quiz applicants' does increase the rate of completion.



In [16]:
def find_by_date(collection, date_string):
    """Find records in a PyMongo Collection created on a given date.

    Parameters
    ----------
    collection : pymongo.collection.Collection
        Collection in which to search for documents.
    date_string : str
        Date to query. Format must be '%Y-%m-%d', e.g. '2022-06-28'.

    Returns
    -------
    observations : list
        Result of query. List of documents (dictionaries).
    """
    # Convert `date_string` to datetime object
    start = pd.to_datetime(date_string, format="%Y-%m-%d")
    end = start + pd.DateOffset(days=1)

    # Try both string and datetime query to handle both cases
    start_str = start.strftime("%Y-%m-%dT%H:%M:%S")
    end_str = end.strftime("%Y-%m-%dT%H:%M:%S")

    # Build OR query to match both possible types
    query = {
        "$and": [
            { "admissionsQuiz": "incomplete" },
            {
                "$or": [
                    { "createdAt": { "$gte": start, "$lt": end } },        # if createdAt is stored as datetime
                    { "createdAt": { "$gte": start_str, "$lt": end_str } } # if createdAt is stored as string
                ]
            }
        ]
    }

    result = collection.find(query)
    observations = list(result)

    return observations


In [19]:
find_by_date(collection=ds_app,date_string="2022-05-04")[0]

{'_id': '6525d787953844722c838415',
 'createdAt': '2022-05-05 18:39:40',
 'firstName': 'Curtis',
 'lastName': 'Rogers',
 'email': 'curtis.rogers14@microsift.com',
 'birthday': '1987-12-04 00:00:00',
 'gender': 'male',
 'highestDegreeEarned': 'Some College (1-3 years)',
 'countryISO2': 'NG',
 'admissionsQuiz': 'incomplete'}

In [20]:
observations = find_by_date(collection=ds_app, date_string="2022-05-02")

print("observations type:", type(observations))
print("observations len:", len(observations))
observations[0]

observations type: <class 'list'>
observations len: 43


{'_id': '6525d787953844722c838459',
 'createdAt': '2022-05-03 20:28:29',
 'firstName': 'Thomas',
 'lastName': 'Follmer',
 'email': 'thomas.follmer56@gmall.com',
 'birthday': '1978-05-12 00:00:00',
 'gender': 'male',
 'highestDegreeEarned': 'High School or Baccalaureate',
 'countryISO2': 'NG',
 'admissionsQuiz': 'incomplete'}