### This notebook is to test the bottom most loop of the algorithm.

In [32]:
import pandas as pd
from collections import defaultdict

pd.set_option('display.max_columns', None)

In [33]:
# Values for this drill down
csv = '../data/yrbss_2017.csv'
location = 'XX'
topic = 'Unintentional Injuries and Violence'
subtopic = 'Behaviors that Contribute to Violence'
risk_question = 'Were electronically bullied'
# These are the elements of the three groups which will be looped over for each question.
Gender = ['Male', 'Female']
Race = ['Asian', 'Black or African American', 'Hispanic or Latino', 'White', 
        'Multiple Race', 'American Indian or Alaska Native', 'Native Hawaiian or Other Pacific Islander']
Sexuality = ['Gay or lesbian', 'Gay, lesbian, or bisexual', 'Bisexual', 'Not sure', 
        'Heterosexual (straight)']

In [34]:
# Create a dataframe which holds just the rows needed to be looped over in this notebook
yrbss = pd.read_csv(csv)
us = yrbss[yrbss['LocationAbbr'] == location]
uiv = us[us['Topic'] == topic]
bcv = uiv[uiv['Subtopic'] == subtopic]
question = bcv[bcv['Greater_Risk_Question'] == risk_question]

In [35]:
# The overall results for this question, all significance tests will go against these values
q_total = question[(question['Grade'] == 'Total') & (question['Sex'] == 'Total') &
    (question['Race'] == 'Total') & (question['SexualIdentity'] == 'Total') & 
    (question['SexOfSexualContacts'] == 'Total')]
q_val = q_total['Greater_Risk_Data_Value'].iloc[0]
q_low = q_total['Greater_Risk_Low_Confidence_Limit'].iloc[0]
q_high = q_total['Greater_Risk_High_Confidence_Limit'].iloc[0]
print('q_total', q_val, q_low, q_high)

q_total 14.9369 13.7456 16.2121


In [36]:
# lists for collecting the values as the question is looped over
group = []
data_val = []
low_val = []
high_val = []
significance = []

In [37]:
print('q_total', q_val, q_low, q_high)
for gender in Gender:
    web_total = question[(question['Grade'] == 'Total') & (question['Sex'] == gender) &
        (question['Race'] == 'Total') & (question['SexualIdentity'] == 'Total') & 
        (question['SexOfSexualContacts'] == 'Total')]
    g_val = web_total['Greater_Risk_Data_Value'].iloc[0]
    g_low = web_total['Greater_Risk_Low_Confidence_Limit'].iloc[0]
    g_high = web_total['Greater_Risk_High_Confidence_Limit'].iloc[0]
    if g_val < q_low and q_val > g_high:
        group.append(gender)
        data_val.append(g_val)
        low_val.append(g_low)
        high_val.append(g_high)
        significance.append('less')
    elif g_val > q_high and q_val < g_low:
        group.append(gender)
        data_val.append(g_val)
        low_val.append(g_low)
        high_val.append(g_high)
        significance.append('more')
    else:
        print(gender, 'no sig', g_val, g_low, g_high)

q_total 14.9369 13.7456 16.2121


In [38]:
print('q_total', q_val, q_low, q_high)
for race in Race:
    web_total = question[(question['Grade'] == 'Total') & (question['Sex'] == 'Total') &
        (question['Race'] == race) & (question['SexualIdentity'] == 'Total') & 
        (question['SexOfSexualContacts'] == 'Total')]
    g_val = web_total['Greater_Risk_Data_Value'].iloc[0]
    g_low = web_total['Greater_Risk_Low_Confidence_Limit'].iloc[0]
    g_high = web_total['Greater_Risk_High_Confidence_Limit'].iloc[0]
    if g_val < q_low and q_val > g_high:
        group.append(race)
        data_val.append(g_val)
        low_val.append(g_low)
        high_val.append(g_high)
        significance.append('less')
    elif g_val > q_high and q_val < g_low:
        group.append(race)
        data_val.append(g_val)
        low_val.append(g_low)
        high_val.append(g_high)
        significance.append('more')
    else:
        print(race, 'no sig', g_val, g_low, g_high)

q_total 14.9369 13.7456 16.2121
Multiple Race no sig 16.0412 12.0546 21.031
American Indian or Alaska Native no sig 13.1927 7.2066 22.9227
Native Hawaiian or Other Pacific Islander no sig 15.0359 10.2643 21.4943


In [39]:
print('q_total', q_val, q_low, q_high)
for sexuality in Sexuality:
    web_total = question[(question['Grade'] == 'Total') & (question['Sex'] == 'Total') &
        (question['Race'] == 'Total') & (question['SexualIdentity'] == sexuality) & 
        (question['SexOfSexualContacts'] == 'Total')]
    g_val = web_total['Greater_Risk_Data_Value'].iloc[0]
    g_low = web_total['Greater_Risk_Low_Confidence_Limit'].iloc[0]
    g_high = web_total['Greater_Risk_High_Confidence_Limit'].iloc[0] 
    if g_val < q_low and q_val > g_high:
        group.append(sexuality)
        data_val.append(g_val)
        low_val.append(g_low)
        high_val.append(g_high)
        significance.append('less')
    elif g_val > q_high and q_val < g_low:
        group.append(sexuality)
        data_val.append(g_val)
        low_val.append(g_low)
        high_val.append(g_high)
        significance.append('more')
    else:
        print(sexuality, 'no sig', g_val,  g_low, g_high)

q_total 14.9369 13.7456 16.2121
Gay or lesbian no sig 18.3849 14.4083 23.162


In [40]:
results_df = pd.DataFrame.from_dict({'topic': topic,'group': group, 
                    'data_val': data_val, 'low_val': low_val,
                    'high_val': high_val, 'significance': significance})

In [41]:
results_df

Unnamed: 0,topic,group,data_val,low_val,high_val,significance
0,Unintentional Injuries and Violence,Male,9.9111,9.1891,10.6831,less
1,Unintentional Injuries and Violence,Female,19.7383,17.4162,22.2864,more
2,Unintentional Injuries and Violence,Asian,10.0449,7.3943,13.5071,less
3,Unintentional Injuries and Violence,Black or African American,10.8866,8.9959,13.1173,less
4,Unintentional Injuries and Violence,Hispanic or Latino,12.283,11.5039,13.107,less
5,Unintentional Injuries and Violence,White,17.3498,15.6353,19.2095,more
6,Unintentional Injuries and Violence,"Gay, lesbian, or bisexual",27.0578,23.1161,31.3971,more
7,Unintentional Injuries and Violence,Bisexual,29.6005,24.7944,34.9058,more
8,Unintentional Injuries and Violence,Not sure,21.9765,16.9313,28.0181,more
9,Unintentional Injuries and Violence,Heterosexual (straight),13.3442,12.3753,14.3765,less
