# Results report

Basic report of results

In [1]:
import os
import clarite

import pandas as pd
import nhanes as nh
import numpy as np
import matplotlib.pyplot as plt

#### SET PATHS
paths  = nh.set_project_paths()

#### READ DATA
results = pd.read_csv(os.path.join(paths[2], 'ResultsTable.csv')).\
             set_index(['Variable','Outcome'])

#### SAVE PERCENT FEMALES LIST
nh.check_balanced_tests(results,
                        paths[2])

rpy2 ModuleSpec(name='rpy2', loader=<_frozen_importlib_external.SourceFileLoader object at 0x7f7912b45f40>, origin='/home/tomas/anaconda3/envs/py_clarite/lib/python3.9/site-packages/rpy2/__init__.py', submodule_search_locations=['/home/tomas/anaconda3/envs/py_clarite/lib/python3.9/site-packages/rpy2'])


How many tests converged in each of the four cohorts?

In [2]:
n_tests = len(results)
print('There are ' + 
      str(n_tests) + 
      ' tests')

There are 100 tests


How many results were significant? and of what type?

In [3]:
n_significant = sum(results['difference_type'] != 'None')
n_quant       = sum(results['difference_type'] == 'Quantitative')
n_qual        = sum(results['difference_type'] == 'Qualitative')
n_pure        = sum(results['difference_type'] == 'Pure')
print('There are ' + str(n_significant) + ' significant results')
print(str(n_quant) + ' quantitative, ' + str(n_qual) + ' qualitative, and ' + str(n_pure) + ' pure')


There are 2 significant results
0 quantitative, 0 qualitative, and 2 pure


How many unique exposures and phenotypes?

In [4]:
bool_significant = results['difference_type'] != 'None'
n_exposures  = len(results[bool_significant].value_counts('Variable'))
n_phenotypes = len(results[bool_significant].value_counts('Outcome'))
print('Only ' + str(n_phenotypes) + ' phenotypes were significant, and ' + str(n_exposures) + ' exposures')


Only 2 phenotypes were significant, and 2 exposures


In [5]:
results[bool_significant].value_counts('Outcome_Name')

Outcome_Name
Lymphocyte number       1
Triglyceride (mg/dL)    1
dtype: int64

In [6]:
results[bool_significant].value_counts('Variable_Category')

Variable_Category
nutrients                20
heavy metals             18
smoking behavior         11
food component recall     6
supplement use            4
cotinine                  1
pesticides                1
phthalates                1
smoking family            1
volatile compounds        1
dtype: int64

In [7]:
results[bool_significant].value_counts('Variable_Name')

Variable_Name
Lead (ug/dL)                                8
Cadmium (ug/L)                              7
Vitamin E (ug/dL)                           5
Lutein and zeaxanthin (ug/dL)               5
Current or Past Cigarette Smoker?           3
Vitamin A (ug/dL)                           3
g-Tocopherol (ug/dL)                        2
IRON_mg                                     2
Retinyl Palmitate (ug/dL)                   2
Cobalt, urine (ng/mL)                       2
Current Cigarette Smoker?                   2
Gamma-hexachlorocyclohexane (ng/g)          1
How many years smoked this amount           1
Mono-n-methyl phthalate                     1
Folate, RBC (ng/mL RBC)                     1
VITAMIN_C_mg                                1
Vitamin D (ng/mL)                           1
b-Cryptoxanthin (ug/dL)                     1
Food folate (mcg)                           1
# cigarettes smoked per day now             1
Folate, DFE (mcg)                           1
# days smoked cigs d

## Pure differences

In [8]:
bool_pure = results['difference_type'] == 'Pure'
results[bool_pure].value_counts('Variable_Category')

Variable_Category
nutrients                10
smoking behavior          7
heavy metals              6
food component recall     3
cotinine                  1
smoking family            1
supplement use            1
dtype: int64

In [9]:
results[bool_pure].value_counts('Outcome_Name')

Outcome_Name
Albumin (g/dL)                    17
Direct HDL-Cholesterol (mg/dL)     2
C-reactive protein(mg/dL)          1
GGT (U/L)                          1
Hematocrit (%)                     1
Hemoglobin (g/dL)                  1
Lymphocyte number                  1
Lymphocyte percent (%)             1
Mean cell volume (fL)              1
Protein, total (g/dL)              1
Red cell count SI                  1
Sodium (mmol/L)                    1
dtype: int64

In [10]:
betas_pure_females = results.loc[bool_pure,'Beta_female']
betas_pure_males   = results.loc[bool_pure,'Beta_male']

females_greater = sum(abs(betas_pure_females) > abs(betas_pure_males))
males_greater   = sum(abs(betas_pure_females) < abs(betas_pure_males))

print('From the pure differences, in ' + str(females_greater) + ' cases, females showed significant effects while in ' + 
       str(males_greater) + ' cases, males showed significant effects')

From the pure differences, in 9 cases, females showed significant effects while in 20 cases, males showed significant effects


In [11]:
print_columns = ['Variable_Name',
                 'Outcome_Name',
                 'Beta_female',
                 'SE_female',
                 'pvalue_female',
                 'Beta_male',
                 'SE_male',
                 'pvalue_male']
results.loc[bool_pure,print_columns]

Unnamed: 0_level_0,Unnamed: 1_level_0,Variable_Name,Outcome_Name,Beta_female,SE_female,pvalue_female,Beta_male,SE_male,pvalue_male
Variable,Outcome,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
DR1TCALC,LBXSAL,Calcium (mg),Albumin (g/dL),-0.066957,0.008815,3.058731e-14,0.009591,0.013366,0.4730319
DR1TCARB,LBXLYPCT,Carbohydrate (gm),Lymphocyte percent (%),-0.061278,0.011468,9.126683e-08,0.025154,0.014451,0.08174744
DR1TFDFE,LBXSAL,"Folate, DFE (mcg)",Albumin (g/dL),-0.060033,0.011575,2.142226e-07,0.027875,0.014385,0.05264445
LBXBCD,LBXSAL,Cadmium (ug/L),Albumin (g/dL),-0.004649,0.009483,0.6239368,-0.087992,0.013839,2.038226e-10
LBXBPB,LBDHDL,Lead (ug/dL),Direct HDL-Cholesterol (mg/dL),-0.023585,0.016311,0.1481927,0.095229,0.019219,7.232572e-07
LBXBPB,LBXSAL,Lead (ug/dL),Albumin (g/dL),0.098868,0.012004,1.772873e-16,-0.007432,0.015698,0.6359104
LBXBPB,LBXSNASI,Lead (ug/dL),Sodium (mmol/L),0.077513,0.014171,4.500553e-08,-0.01845,0.014746,0.2108785
LBXCOT,LBXSAL,Cotinine (ng/mL),Albumin (g/dL),0.009661,0.006603,0.143453,-0.086585,0.011819,2.370469e-13
LBXLUZ,LBXMCVSI,Lutein and zeaxanthin (ug/dL),Mean cell volume (fL),0.022217,0.012612,0.07813556,-0.062907,0.009745,1.079201e-10
LBXLUZ,LBXSAL,Lutein and zeaxanthin (ug/dL),Albumin (g/dL),0.011154,0.012599,0.3759652,0.121909,0.013238,3.284954e-20


### Pure differences in females

In [12]:
results[bool_pure][abs(betas_pure_females) > abs(betas_pure_males)].value_counts('Variable_Category')

Variable_Category
heavy metals             4
food component recall    3
nutrients                2
dtype: int64

In [13]:
results[bool_pure][abs(betas_pure_females) > abs(betas_pure_males)].value_counts('Variable_Name')

Variable_Name
Cobalt, urine (ng/mL)    2
Lead (ug/dL)             2
Calcium (mg)             1
Carbohydrate (gm)        1
Folate, DFE (mcg)        1
Vitamin A (ug/dL)        1
Vitamin E (ug/dL)        1
dtype: int64

In [14]:
results[bool_pure][abs(betas_pure_females) > abs(betas_pure_males)].value_counts('Outcome_Name')

Outcome_Name
Albumin (g/dL)                    3
Direct HDL-Cholesterol (mg/dL)    1
Hematocrit (%)                    1
Hemoglobin (g/dL)                 1
Lymphocyte number                 1
Lymphocyte percent (%)            1
Sodium (mmol/L)                   1
dtype: int64

### Pure differences in males

In [15]:
results[bool_pure][abs(betas_pure_females) < abs(betas_pure_males)].value_counts('Variable_Category')

Variable_Category
nutrients           8
smoking behavior    7
heavy metals        2
cotinine            1
smoking family      1
supplement use      1
dtype: int64

In [16]:
results[bool_pure][abs(betas_pure_females) < abs(betas_pure_males)].value_counts('Variable_Name')

Variable_Name
Vitamin E (ug/dL)                           4
Lutein and zeaxanthin (ug/dL)               2
# cigarettes smoked per day now             1
# days smoked cigs during past 30 days      1
Avg # cigarettes/day during past 30 days    1
Cadmium (ug/L)                              1
Cotinine (ng/mL)                            1
Current or Past Cigarette Smoker?           1
Does anyone smoke in home?                  1
FTC Nicotine Content                        1
FTC Tar Content                             1
How many years smoked this amount           1
Lead (ug/dL)                                1
VITAMIN_C_mg                                1
Vitamin A (ug/dL)                           1
Vitamin D (ng/mL)                           1
dtype: int64

In [17]:
results[bool_pure][abs(betas_pure_females) < abs(betas_pure_males)].value_counts('Outcome_Name')

Outcome_Name
Albumin (g/dL)                    14
C-reactive protein(mg/dL)          1
Direct HDL-Cholesterol (mg/dL)     1
GGT (U/L)                          1
Mean cell volume (fL)              1
Protein, total (g/dL)              1
Red cell count SI                  1
dtype: int64

## Quantitative differences

In [18]:
bool_quant = results['difference_type'] == 'Quantitative'
results[bool_quant].value_counts('Variable_Category')

Variable_Category
heavy metals             10
nutrients                 7
smoking behavior          4
food component recall     1
pesticides                1
supplement use            1
dtype: int64

In [19]:
results[bool_quant].value_counts('Outcome_Name')

Outcome_Name
Albumin (g/dL)                      2
Red cell count SI                   2
Triglycerides (mg/dL)               2
Triglyceride (mg/dL)                2
Segmented neutrophils number        2
White blood cell count (SI)         2
GGT (U/L)                           1
Hematocrit (%)                      1
Hemoglobin (g/dL)                   1
Albumin, urine (ug/mL)              1
Mean cell hemoglobin (pg)           1
Mean cell volume (fL)               1
Phosphorus (mg/dL)                  1
Direct HDL-Cholesterol (mg/dL)      1
C-reactive protein(mg/dL)           1
Bone alkaline phosphotase (ug/L)    1
Alkaline phosphotase (U/L)          1
Homocysteine (umol/L)               1
dtype: int64

In [20]:
betas_quant_females = results.loc[bool_quant,'Beta_female']
betas_quant_males   = results.loc[bool_quant,'Beta_male']

females_greater = sum(abs(betas_quant_females) > abs(betas_quant_males))
males_greater   = sum(abs(betas_quant_females) < abs(betas_quant_males))

print('From the quantitative differences, in ' + str(females_greater) + ' cases, females showed greater effects while in ' + 
       str(males_greater) + ' cases, males showed greater ones')

From the quantitative differences, in 9 cases, females showed greater effects while in 15 cases, males showed greater ones


In [21]:
results.loc[bool_quant, print_columns]

Unnamed: 0_level_0,Unnamed: 1_level_0,Variable_Name,Outcome_Name,Beta_female,SE_female,pvalue_female,Beta_male,SE_male,pvalue_male
Variable,Outcome,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
DR1TALCO,LBXSGTSI,Alcohol (gm),GGT (U/L),0.063842,0.010949,5.519438e-09,0.141966,0.010758,9.229674e-40
IRON_mg,LBXHCY,IRON_mg,Homocysteine (umol/L),-0.129165,0.009025,1.8586670000000002e-46,-0.063395,0.009299,9.267904e-12
LBXBCD,LBDNENO,Cadmium (ug/L),Segmented neutrophils number,0.111766,0.012172,4.22131e-20,0.204444,0.011277,1.886487e-73
LBXBCD,LBXCRP,Cadmium (ug/L),C-reactive protein(mg/dL),0.045688,0.010597,1.621206e-05,0.144338,0.013444,6.89869e-27
LBXBCD,LBXMCHSI,Cadmium (ug/L),Mean cell hemoglobin (pg),0.047227,0.008916,1.179853e-07,0.133952,0.011449,1.26998e-31
LBXBCD,LBXMCVSI,Cadmium (ug/L),Mean cell volume (fL),0.059618,0.009618,5.698453e-10,0.147208,0.009634,1.0369710000000001e-52
LBXBCD,LBXRBCSI,Cadmium (ug/L),Red cell count SI,0.126242,0.011069,3.931034e-30,0.02667,0.01061,0.01194537
LBXBCD,LBXWBCSI,Cadmium (ug/L),White blood cell count (SI),0.151626,0.011971,9.169748e-37,0.250308,0.01089,6.455535e-117
LBXBPB,LBXBAP,Lead (ug/dL),Bone alkaline phosphotase (ug/L),0.251242,0.019237,5.5440880000000006e-39,0.063058,0.018373,0.0005989093
LBXBPB,LBXRBCSI,Lead (ug/dL),Red cell count SI,0.186118,0.015065,4.596549e-35,0.070282,0.012946,5.675659e-08


### Quantitative differences in females

In [22]:
results[bool_quant][abs(betas_quant_females) > abs(betas_quant_males)].value_counts('Variable_Category')

Variable_Category
heavy metals        5
smoking behavior    2
nutrients           1
supplement use      1
dtype: int64

In [23]:
results[bool_quant][abs(betas_quant_females) > abs(betas_quant_males)].value_counts('Variable_Name')

Variable_Name
Lead (ug/dL)                         3
Current or Past Cigarette Smoker?    2
Cadmium (ug/L)                       1
Cesium, urine (ng/mL)                1
IRON_mg                              1
Lutein and zeaxanthin (ug/dL)        1
dtype: int64

In [24]:
results[bool_quant][abs(betas_quant_females) > abs(betas_quant_males)].value_counts('Outcome_Name')

Outcome_Name
Red cell count SI                   2
Albumin, urine (ug/mL)              1
Alkaline phosphotase (U/L)          1
Bone alkaline phosphotase (ug/L)    1
Direct HDL-Cholesterol (mg/dL)      1
Hematocrit (%)                      1
Hemoglobin (g/dL)                   1
Homocysteine (umol/L)               1
dtype: int64

### Quantitative differences in males

In [25]:
results[bool_quant][abs(betas_quant_females) < abs(betas_quant_males)].value_counts('Variable_Category')

Variable_Category
nutrients                6
heavy metals             5
smoking behavior         2
food component recall    1
pesticides               1
dtype: int64

In [26]:
results[bool_quant][abs(betas_quant_females) < abs(betas_quant_males)].value_counts('Variable_Name')

Variable_Name
Cadmium (ug/L)                        5
Current Cigarette Smoker?             2
Retinyl Palmitate (ug/dL)             2
g-Tocopherol (ug/dL)                  2
Alcohol (gm)                          1
Gamma-hexachlorocyclohexane (ng/g)    1
Vitamin A (ug/dL)                     1
b-Cryptoxanthin (ug/dL)               1
dtype: int64

In [27]:
results[bool_quant][abs(betas_quant_females) < abs(betas_quant_males)].value_counts('Outcome_Name')

Outcome_Name
Albumin (g/dL)                  2
Segmented neutrophils number    2
Triglyceride (mg/dL)            2
Triglycerides (mg/dL)           2
White blood cell count (SI)     2
C-reactive protein(mg/dL)       1
GGT (U/L)                       1
Mean cell hemoglobin (pg)       1
Mean cell volume (fL)           1
Phosphorus (mg/dL)              1
dtype: int64

## Qualitative differences

In [28]:
bool_qual = results['difference_type'] == 'Qualitative'
results[bool_qual].value_counts('Variable_Category')

Variable_Category
nutrients                3
food component recall    2
heavy metals             2
supplement use           2
phthalates               1
volatile compounds       1
dtype: int64

In [29]:
results[bool_qual].value_counts('Variable_Name')

Variable_Name
Lead (ug/dL)                     2
Lutein and zeaxanthin (ug/dL)    2
Blood m-/p-Xylene (ng/mL)        1
Caffeine (mg)                    1
FOLIC_ACID_mcg                   1
Folate, RBC (ng/mL RBC)          1
Food folate (mcg)                1
IRON_mg                          1
Mono-n-methyl phthalate          1
dtype: int64

In [30]:
results[bool_qual].value_counts('Outcome_Name')

Outcome_Name
Albumin (g/dL)                 5
C-reactive protein(mg/dL)      2
Mean cell hemoglobin (pg)      1
Methylmalonic acid (umol/L)    1
Osmolality (mOsml/L)           1
Red cell count SI              1
dtype: int64

In [31]:
results.loc[bool_qual, print_columns]

Unnamed: 0_level_0,Unnamed: 1_level_0,Variable_Name,Outcome_Name,Beta_female,SE_female,pvalue_female,Beta_male,SE_male,pvalue_male
Variable,Outcome,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
DR1TCAFF,LBXCRP,Caffeine (mg),C-reactive protein(mg/dL),-0.056827,0.013156,1.564501e-05,0.036605,0.012389,0.003131021
DR1TFF,LBXSAL,Food folate (mcg),Albumin (g/dL),-0.035356,0.009845,0.0003291015,0.042406,0.012819,0.000939576
FOLIC_ACID_mcg,LBXSAL,FOLIC_ACID_mcg,Albumin (g/dL),-0.033119,0.010927,0.002438115,0.056429,0.011356,6.725711e-07
IRON_mg,LBXSAL,IRON_mg,Albumin (g/dL),-0.038795,0.009411,3.747767e-05,0.03895,0.010093,0.0001138314
LBXBPB,LBXCRP,Lead (ug/dL),C-reactive protein(mg/dL),-0.070192,0.014206,7.769516e-07,0.049283,0.014772,0.0008490967
LBXBPB,LBXSOSSI,Lead (ug/dL),Osmolality (mOsml/L),0.048364,0.013449,0.0003229049,-0.055644,0.013723,5.017454e-05
LBXLUZ,LBXMCHSI,Lutein and zeaxanthin (ug/dL),Mean cell hemoglobin (pg),0.025958,0.012583,0.0391114,-0.058582,0.011109,1.337646e-07
LBXLUZ,LBXRBCSI,Lutein and zeaxanthin (ug/dL),Red cell count SI,-0.062828,0.015887,7.661209e-05,0.056506,0.014108,6.196329e-05
LBXRBF,LBXSAL,"Folate, RBC (ng/mL RBC)",Albumin (g/dL),-0.060484,0.013114,3.98624e-06,0.045353,0.016085,0.004807511
LBXVXY,LBXSAL,Blood m-/p-Xylene (ng/mL),Albumin (g/dL),0.080077,0.017379,4.071509e-06,-0.045524,0.019905,0.0221942
