# Number of interviews

**Goal:** We are interested in measuring the number of interviews over time

### Imports

In [11]:
from datetime import datetime, date
import sys
import os

sys.path.insert(0,os.path.abspath('../..'))

import pandas as pd

import utils
plotly, Box, Histogram, Layout, Figure, Bar, Scatter, Marker = utils.setPlotly()

### Getting the data

In [12]:
from custom import allowedDomains, allowedStatus

In [13]:
ia = utils.getInterviewArchive()

### Total number of interviews

In [14]:
ia.count()

0

### Filters

We want specific statuses, domains and date in our dataframe

In [15]:
july6  = datetime(2016, 7, 6)
july7  = datetime(2016, 7, 7)
april4 = datetime(2016, 4, 4)
sept1  = datetime(2016, 9, 1)

In [16]:
# july_6 = pd.DataFrame(list(ia.find({"createTime": {"$gte": july6 , "$lt": july7 }, "domainName": {"$in": allowedDomains}})))
allFromApril4 = pd.DataFrame(list(ia.find({"createTime": {"$gte": april4}, "domainName": {"$in": allowedDomains}})))

### Friendly functions

Allow us to obtain grouped dataframes

In [17]:
def onlyPeds(d):
    """
    Filter records for ped modules
    """
    return d[d['chiefComplaint'].str.contains('ediatric')]

def groupByDate(df):
    """
    Group by create time
    """
    groupedByDate = df.groupby(df["createTime"].dt.date)
    x = pd.DataFrame(groupedByDate.size())
    x = x.rename(index = None, columns={0:'total'})
    x = x.reset_index()
    return x

def groupByModule(df):
    """
    Group by create time and module
    """
    _groupByModule = df.copy()
    _groupByModule["createDate"] = _groupByModule["createTime"].dt.date
    _groupedByModule = _groupByModule.groupby(["chiefComplaint", "createDate"])
    x = pd.DataFrame(_groupedByModule.size())
    x = x.rename(index = None, columns={0:'total'})
    x = x.reset_index()
    return x

## Number of interviews

### All interviews

In [18]:
totalInterviews = groupByDate(allFromApril4)
allFromApril4["createDate"] = allFromApril4["createTime"].dt.date
plotly.offline.iplot(Figure(data=[Bar(y=totalInterviews['total'], x=totalInterviews['createTime'])], layout=Layout(title="Number of interviews per day")))

KeyError: 'createTime'

### Pediatric interviews

In [None]:
totalPeds = groupByDate(onlyPeds(allFromApril4))
plotly.offline.iplot(Figure(data=[Bar(y=totalPeds['total'], x=totalPeds['createTime'])], layout=Layout(title="Number of pediatric interviews per day")))

In [None]:
x = groupByModule(allFromApril4[allFromApril4["status"].isin(allowedStatus) & allFromApril4["domainName"].isin(allowedDomains)])

In [None]:
modules = pd.Series(x["chiefComplaint"]).unique()


### Interviews by module

In [None]:
charts = []
for m in modules:
    y = x[x["chiefComplaint"] == m]
    charts.append(Bar(y=y['total'], x=y['createDate'], name=m))

layout = Layout(barmode='stack', title='Interviews per module')
fig = Figure(data=charts, layout=layout)
iplot(fig)

In [None]:
x = groupByModule(onlyPeds(allFromApril4[allFromApril4["status"].isin(allowedStatus) & allFromApril4["domainName"].isin(allowedDomains)]))
# x = groupByModule(onlyPeds(allFromApril4))

### Pediatric interviews per module

In [None]:
charts = []
for m in modules:
    y = x[x["chiefComplaint"] == m]
    charts.append(Bar(y=y['total'], x=y['createDate'], name=m))

layout = Layout(barmode='stack', title='Pediatric interviews per module')
fig = Figure(data=charts, layout=layout)
iplot(fig)