#### base_m_survey - https://borninbradford.github.io/datadict/bib/bib_baseline/

In [2]:
import sys

sys.path.append('/Users/samrelins/Documents/LIDA/dental_project/src/')

from baseline_data_prep import *
import os
import pandas as pd
import plotly.express as px
from patsy import dmatrices
import statsmodels.api as sm

In [2]:
cols_of_interest = ['entity_id', 'agemy_mbqall', 'alc0drpreg',
                    'ben0carall', 'ben0chdben', 'ben0chdtxc',
                    'ben0disbla', 'ben0dnwtoa', 'ben0dontkw', 'ben0houseb',
                    'ben0incapb', 'ben0incsup', 'ben0intjsa', 'ben0mentst',
                    'ben0nobenf', 'ben0wrktxc',
                    'brd0brolls', 'brd0brwbrd', 'brd0nnptbg', 'brd0othbrd',
                    'brd0parath', 'brd0pizzas', 'brd0rotich',
                    'brd0whtbrd', 'cdr0clcfpd', 'cdr0clcfpw', 'cdr0cldcpd',
                    'cdr0cldcpw', 'cdr0dccfpd', 'cdr0dccfpw', 'cdr0dcdcpd',
                    'cdr0dcdcpw', 'drg0drguse', 'edu0fthede', 'edu0mumede',
                    'eth0eth3gp', 'fin0finnow', 'fin0frsdec',
                    'fin0frselg', 'fin0frsffm', 'fin0frsfur', 'fin0frshci',
                    'fin0frshob', 'fin0frshol', 'fin0frshwm', 'fin0frssav',
                    'fin0frssho', 'fin0frsysf', 'fin0manfin', 'fin0upbill',
                    'ghq0fctrsc', 'ghq0ques01', 'ghq0ques02', 'ghq0ques03',
                    'ghq0ques04', 'ghq0ques05', 'ghq0ques06', 'ghq0ques07',
                    'ghq0ques08', 'ghq0ques09', 'ghq0ques10', 'ghq0ques11',
                    'ghq0ques12', 'ghq0ques13', 'ghq0ques14', 'ghq0ques15',
                    'ghq0ques16', 'ghq0ques17', 'ghq0ques18', 'ghq0ques19',
                    'ghq0ques20', 'ghq0ques21', 'ghq0ques22', 'ghq0ques23',
                    'ghq0ques24', 'ghq0ques25', 'ghq0ques26', 'ghq0ques27',
                    'ghq0ques28', 'h2o0twathm', 'hhd0cohabt', 'hhd0f16_64',
                    'hhd0f2_15y', 'hhd0fov65y', 'hhd0fund2y', 'hhd0m16_64',
                    'hhd0m2_15y', 'hhd0marchb', 'hhd0mov65y', 'hhd0mund2y',
                    'hhd0tothhd', 'imd_2007_decile_nat',
                    'imd_2007_quintile_nat', 'imd_2007_rank_nat',
                    'imd_2007_score', 'imd_2010_decile_nat',
                    'imd_2010_quintile_nat', 'imd_2010_rank_nat',
                    'imd_2010_score', 'imddecileswithinbradford',
                    'imdquintileswithinbradford', 'job0curemp', 'job0fthemp',
                    'job0mumemp', 'mbqlcasep5gp', 'mms0mbkbmi', 'res0bedrms',
                    'res0hseten', 'smk0regsmk', 'smk0smkprg']

* 'agemy_mbqall' - Mother age (months): Maternal Baseline Questionnaire
* 'alc0drpreg' - Mother drank alcohol during pregnancy or 3 months before

#### benefits:
* 'ben0carall' - Carer’s allowance
* 'ben0chdben' - Child benefit
* 'ben0chdtxc' - Child Tax credit
* 'ben0disbla' - Disability living allowance
* 'ben0dnwtoa' - Does not wish to answer
* 'ben0dontkw' - Don’t know
* 'ben0houseb' - Housing benefit
* 'ben0incapb' - Incapacity benefit
* 'ben0incsup' - Income support
* 'ben0intjsa' - Income tested job seekers allowance
* 'ben0mentst' - Means tested benefits received
* 'ben0nobenf' - No Benefits
* 'ben0wrktxc' - Working Tax credit

#### food:

* 'brd0brolls' - Baps/rolls eaten per week
* 'brd0brwbrd' - Slices of brown bread eaten per week
* 'brd0nnptbg' - Naan/pitta bread/bagel eaten per week
* 'brd0othbrd' - Other bread products eaten per week
* 'brd0parath' - Parathas eaten per week
* 'brd0pizzas' - Slices of pizza eaten per week
* 'brd0rotich' - Roti/chappatis eaten per week
* 'brd0whtbrd' - Slices of white bread eaten per week

#### soft drinks:

* 'cdr0clcfpd' - Number of cups of cola (regular, caffeinated) per day
* 'cdr0clcfpw' - Number of cups of cola (regular, caffeinated) per week
* 'cdr0cldcpd' - Number of cups of cola (regular, decaffeinated) per day
* 'cdr0cldcpw' - Number of cups of cola (regular, decaffeinated) per week
* 'cdr0dccfpd' - Number of cups of diet cola (caffeinated) per day
* 'cdr0dccfpw' - Number of cups of diet cola (caffeinated) per week
* 'cdr0dcdcpd' - Number of cups of diet cola (decaffeinated) per day
* 'cdr0dcdcpw' - Number of cups of diet cola (decaffeinated) per week

* 'drg0drguse' - Mother used drugs during pregnancy

#### education:

* 'edu0fthede' - Baby’s father’s highest educational qualification (equivalised)
* 'edu0mumede' - Mother’s highest educational qualification (equivalised)

#### ethnicity:

* 'eth0eth3gp' - Mother’s ethnic group - 3 categories

#### financial standing:

* 'fin0finnow' - Compared to a year ago how are you doing financially
* 'fin0frsdec' - Enough money to keep home in decent state of decoration
* 'fin0frselg' - Money to replace or repair major electrical goods
* 'fin0frsffm' - Family and friends for a drink or meal at least once a month
* 'fin0frsfur' - Money to replace any worn out furniture
* 'fin0frshci' - Household contents insurance
* 'fin0frshob' - A hobby or leisure activity
* 'fin0frshol' - Holiday from home for at least one week once a year
* 'fin0frshwm' - In winter are you able to keep home warm enough
* 'fin0frssav' - Money to make regular savings of 10 a month
* 'fin0frssho' - Two pairs of all weather shoes
* 'fin0frsysf' - A small amount of money to spend on yourself each week
* 'fin0manfin' - How well mother and husband/partner managing financially
* 'fin0upbill' - Are you up to date with all these bills

#### health / values questions:

* 'ghq0fctrsc' - GHQ factor score (derived)
* 'ghq0ques01' - Been feeling perfectly well & in good health
* 'ghq0ques02' - Been feeling in need of a good tonic
* 'ghq0ques03' - Been feeling run down and out of sorts
* 'ghq0ques04' - Felt that you are ill
* 'ghq0ques05' - Been getting pains in your head
* 'ghq0ques06' - Been getting a feeling of tightness or pressure in your head
* 'ghq0ques07' - Been having hot or cold spells
* 'ghq0ques08' - Lost much sleep over worry
* 'ghq0ques09' - Had difficulty staying asleep once you are off
* 'ghq0ques10' - Felt constantly under strain
* 'ghq0ques11' - Been getting edgy and bad-tempered
* 'ghq0ques12' - Been getting scared or panicky for no good reason
* 'ghq0ques13' - Found everything getting on top of you
* 'ghq0ques14' - Been feeling nervous and strung-up all the time
* 'ghq0ques15' - Been managing to keep yourself busy and occupied
* 'ghq0ques16' - Been taking longer over the things you do
* 'ghq0ques17' - Felt on the whole you were doing things well
* 'ghq0ques18' - Been satisfied with way you’ve carried out your tasks
* 'ghq0ques19' - Felt that you are playing a useful part in things
* 'ghq0ques20' - Felt capable of making decisions about things
* 'ghq0ques21' - Been able to enjoy your normal day-to-day activities
* 'ghq0ques22' - Been thinking of yourself as worthless person
* 'ghq0ques23' - Felt life is entirely hopeless
* 'ghq0ques24' - Felt that life isn’t worth living
* 'ghq0ques25' - Thought of the possibility that you might make away with yourself
* 'ghq0ques26' - Found at times couldn’t do anything because nerves too bad
* 'ghq0ques27' - Found yourself wishing you were dead and away from it all
* 'ghq0ques28' - Found the idea of taking your own life kept coming into your mind

* 'h2o0twathm' - Glasses of tap water drank at home per day

* 'hhd0cohabt' - Cohabitation status
* 'hhd0f16_64' - Number of females between 16 and 64 in household
* 'hhd0f2_15y' - Number of females between 2 and 15 in household
* 'hhd0fov65y' - Number of females over 65 in household
* 'hhd0fund2y' - Number of females under 2 in household
* 'hhd0m16_64' - Number of males between 16 and 64 in household
* 'hhd0m2_15y' - Number of males between 2 and 15 in household
* 'hhd0marchb' - Marital and cohabitation status combined (derived)
* 'hhd0mov65y' - Number of males over 65 in household
* 'hhd0mund2y' - Number of males under 2 in household
* 'hhd0tothhd' - Total number of members in household

#### IMD info

* 'imd_2007_decile_nat' - 2007_decile
* 'imd_2007_quintile_nat' - 2007_quintile
* 'imd_2007_rank_nat' - IMD_2007_RANK
* 'imd_2007_score' - IMD_200710nov
* 'imd_2010_decile_nat' - 2010_decile
* 'imd_2010_quintile_nat' - 2010_quintile
* 'imd_2010_rank_nat' - RANK_2010
* 'imd_2010_score' - IMD_2010_SCORE
* 'imddecileswithinbradford' - IMD Deciles (WITHIN Bradford)
* 'imdquintileswithinbradford' - IMD Quintiles (WITHIN Bradford)

#### employment info:

* 'job0curemp' - Mother currently employed
* 'job0fthemp' - Father’s employment status
* 'job0mumemp' - Mother’s emplyment status - derived
* 'mbqlcasep5gp' - Socio-economic position (5 group LCA)
* 'mms0mbkbmi' - Mother’s booking BMI (derived)
* 'res0bedrms' - How many bedrooms in household
* 'res0hseten' - Housing tenure (derived)
* 'smk0regsmk' - Mother ever regularly smoked
* 'smk0smkprg' - Mother smoked during pregnancy (derived)

In [3]:
def plot_ga_rate_comparison(feature):
    bib_dir = "/Users/samrelins/Documents/LIDA/dental_project/data/bib_data/"
    feature_df = return_baseline_ga_df(bib_dir, [feature])
    feature_df["has_dental_ga"] = feature_df.has_dental_ga.astype("category")
    feature_counts = (feature_df.groupby([feature, "has_dental_ga"])
                      .agg("count").reset_index()
                      .rename({"entity_id": "count"}, axis=1))
    for feature_val in feature_df[feature].unique():
        val_map = feature_counts[feature] == feature_val
        counts = feature_counts[val_map]["count"].values
        sum = counts.sum()
        feature_counts.loc[val_map, "pct"] = counts / sum

    fig1 = px.bar(feature_counts, x=feature, y="pct",
                 color="has_dental_ga", barmode="group")
    fig1.show()

    fig2 = px.bar(feature_counts, x=feature, y="count",
                  color="has_dental_ga", barmode="group")
    fig2.show()

In [5]:
plot_ga_rate_comparison("hhd0marchb")




Columns (110,247,303) have mixed types.Specify dtype option on import or set low_memory=False.

