# Nobel Twin Familial Cirrhosis Demograhic Table Values
This is the workflow to generate the counts for the demographic table. The categories we will be looking at are Group counts (G1P, G1R,... etc), Sex, BMI and Age. Sex will be male or female BMI will be split into underweight, normal, overweight, obese and not provided. Age will be split into 18-29, 30s, 40s, 50s and 60s.

**Author: Tobin Groth (tgroth@ucsd.edu)** 

In [5]:
%matplotlib inline
import pandas as pd
import numpy as np
import qiime2
import os
os.chdir('/Users/tgroth/Google Drive/knight_twin_NAFLD/serum_analysis')

In [17]:
meta = pd.read_csv('../metadata-matched.tsv', sep='\t')

In [18]:
#group counts
meta.ATTRIBUTE_groups.value_counts()

G1P    48
G1R    37
G3R    31
G3P    22
G2P    16
G2R    14
Name: ATTRIBUTE_groups, dtype: int64

### Non-NAFLD counts

In [9]:
#non-NAFLD sex counts
print(meta[meta.ATTRIBUTE_groups=='G1P'].ATTRIBUTE_BiologicalSex.value_counts())
print(meta[meta.ATTRIBUTE_groups=='G1R'].ATTRIBUTE_BiologicalSex.value_counts())

female    33
male      15
Name: ATTRIBUTE_BiologicalSex, dtype: int64
female    26
male      11
Name: ATTRIBUTE_BiologicalSex, dtype: int64


In [10]:
#non-NAFLD BMI counts
g1_bmi = list(meta[meta.ATTRIBUTE_groups=='G1P'].ATTRIBUTE_bmi)
g1_bmi = g1_bmi+list(meta[meta.ATTRIBUTE_groups=='G1R'].ATTRIBUTE_bmi)
#[underweight,normal,over,obese,NA]
bmi_counts = [0,0,0,0,0]
for i in g1_bmi:
    if 'Missing' in i:
        bmi_counts[4] = bmi_counts[4]+1
    i = float(i)
    if i < 18.5:
        bmi_counts[0] = bmi_counts[0]+1
    elif i<25:
        bmi_counts[1] = bmi_counts[1]+1
    elif i<30:
        bmi_counts[2] = bmi_counts[2]+1
    elif i>=30:
        bmi_counts[3] = bmi_counts[3]+1
print(sum(bmi_counts))
bmi_counts

85


[2, 45, 24, 14, 0]

In [11]:
#non-NAFLD age counts
g1_age = list(meta[meta.ATTRIBUTE_groups=='G1P'].ATTRIBUTE_age)
g1_age = g1_age+list(meta[meta.ATTRIBUTE_groups=='G1R'].ATTRIBUTE_age)
#[18-29,30s,40s,50s,>60s]
age_counts = [0,0,0,0,0]
for i in g1_age:
    i = float(i)
    if i < 29:
        age_counts[0] = age_counts[0]+1
    elif i<40:
        age_counts[1] = age_counts[1]+1
    elif i<50:
        age_counts[2] = age_counts[2]+1
    elif i<60:
        age_counts[3] = age_counts[3]+1
    elif i>=60:
        age_counts[4] = age_counts[4]+1
print(sum(age_counts))
age_counts

85


[34, 4, 1, 16, 30]

### NAFLD w/o AF Counts

In [12]:
#NAFLD without AF sex counts
print(meta[meta.ATTRIBUTE_groups=='G2P'].ATTRIBUTE_BiologicalSex.value_counts())
print(meta[meta.ATTRIBUTE_groups=='G2R'].ATTRIBUTE_BiologicalSex.value_counts())

female    11
male       5
Name: ATTRIBUTE_BiologicalSex, dtype: int64
female    8
male      6
Name: ATTRIBUTE_BiologicalSex, dtype: int64


In [13]:
#NAFLD w/o AF BMI counts
g2_bmi = list(meta[meta.ATTRIBUTE_groups=='G2P'].ATTRIBUTE_bmi)
g2_bmi = g2_bmi+list(meta[meta.ATTRIBUTE_groups=='G2R'].ATTRIBUTE_bmi)
#[underweight,normal,over,obese,NA]
bmi_counts = [0,0,0,0,0]
for i in g2_bmi:
    if 'Missing' in i:
        bmi_counts[4] = bmi_counts[4]+1
    i = float(i)
    if i < 18.5:
        bmi_counts[0] = bmi_counts[0]+1
    elif i<25:
        bmi_counts[1] = bmi_counts[1]+1
    elif i<30:
        bmi_counts[2] = bmi_counts[2]+1
    elif i>=30:
        bmi_counts[3] = bmi_counts[3]+1
print(sum(bmi_counts))
bmi_counts

30


[0, 7, 8, 15, 0]

In [14]:
#NAFLD w/o AF age counts
g2_age = list(meta[meta.ATTRIBUTE_groups=='G2P'].ATTRIBUTE_age)
g2_age = g2_age+list(meta[meta.ATTRIBUTE_groups=='G2R'].ATTRIBUTE_age)
#[18-29,30s,40s,50s,>60s]
age_counts = [0,0,0,0,0]
for i in g2_age:
    i = float(i)
    if i < 29:
        age_counts[0] = age_counts[0]+1
    elif i<40:
        age_counts[1] = age_counts[1]+1
    elif i<50:
        age_counts[2] = age_counts[2]+1
    elif i<60:
        age_counts[3] = age_counts[3]+1
    elif i>=60:
        age_counts[4] = age_counts[4]+1
print(sum(age_counts))
age_counts

30


[1, 3, 9, 7, 10]

### NAFLD-cirrhosis counts

In [19]:
#NAFLD-cirhossis sex counts
print(meta[meta.ATTRIBUTE_groups=='G3P'].ATTRIBUTE_BiologicalSex.value_counts())
print(meta[meta.ATTRIBUTE_groups=='G3R'].ATTRIBUTE_BiologicalSex.value_counts())

female    18
male       4
Name: ATTRIBUTE_BiologicalSex, dtype: int64
female    24
male       7
Name: ATTRIBUTE_BiologicalSex, dtype: int64


In [21]:
#NAFLD-cirrhosis BMI counts
g3_bmi = list(meta[meta.ATTRIBUTE_groups=='G3P'].ATTRIBUTE_bmi)
g3_bmi = g3_bmi+list(meta[meta.ATTRIBUTE_groups=='G3R'].ATTRIBUTE_bmi)
#[underweight,normal,over,obese,NA]
bmi_counts = [0,0,0,0,0]
for i in g3_bmi:
    i = float(i)
    if i < 18.5:
        bmi_counts[0] = bmi_counts[0]+1
    elif i<25:
        bmi_counts[1] = bmi_counts[1]+1
    elif i<30:
        bmi_counts[2] = bmi_counts[2]+1
    elif i>=30:
        bmi_counts[3] = bmi_counts[3]+1
print(sum(bmi_counts))
bmi_counts

53


[0, 5, 14, 34, 0]

In [22]:
#NAFLD-cirrhosis age counts
g3_age = list(meta[meta.ATTRIBUTE_groups=='G3P'].ATTRIBUTE_age)
g3_age = g3_age+list(meta[meta.ATTRIBUTE_groups=='G3R'].ATTRIBUTE_age)
#[18-29,30s,40s,50s,>60s]
age_counts = [0,0,0,0,0]
for i in g3_age:
    i = float(i)
    if i < 29:
        age_counts[0] = age_counts[0]+1
    elif i<40:
        age_counts[1] = age_counts[1]+1
    elif i<50:
        age_counts[2] = age_counts[2]+1
    elif i<60:
        age_counts[3] = age_counts[3]+1
    elif i>=60:
        age_counts[4] = age_counts[4]+1
print(sum(age_counts))
age_counts

53


[3, 7, 10, 7, 26]