In [1]:
# imports
import pandas as pd
import json
import requests


In [2]:
# create the array of 5 variants
# variants are 2 from SLC30A8 (t2d), 1 from LPA (MI), 2 from PPARG (t2d), 1 CAD
variants = ['8_118184783_C_T', '8_118217915_G_A', '6_160961137_T_C', '3_12236565_G_A', '3_12344730_C_G', '9_22029445_G_A']
print("the variant list is: {}".format(variants))

the variant list is: ['8_118184783_C_T', '8_118217915_G_A', '6_160961137_T_C', '3_12236565_G_A', '3_12344730_C_G', '9_22029445_G_A']


In [4]:
# build the query string
# this API returns the bottom line p_values for all phenotypes available 
# in p_value increasing order (most significant first)
# the 'var_id' parameter is the variant to pull the recults for in <chrom_pos_ref_alt> format
# the 'limit=10' parameter directs to only return the first 10 results
query_api = "http://public.type2diabeteskb.org/dccservices/graph/meta/variant/object?var_id={}&limit=10"

# create an empty data frame
master_data_frame = pd.DataFrame()

# for each variant, call the meta REST api and concatenate the data frames
for var in variants:
    # build the query and print
    var_query_api = query_api.format(var)
    print("the query for the var: {} is: {}".format(var, var_query_api))
    
    # make the REST call and capture the http response
    req = requests.get(var_query_api)
    
    # decode the http response into the json format
    req_json = req.json()
    
    # read the results into an individual pandas data frame, given it the 'data' section of the json
    var_data_frame = pd.DataFrame(req_json['data'])
    
    # add the results to the master data frame (concatenate the results with the previous ones)
    master_data_frame = pd.concat([master_data_frame, var_data_frame])
    
    # print the size of the master data frame
    print("after concatenation, the master data frame has a row count of: {}\n".format(master_data_frame.shape[0]))

the query for the var: 8_118184783_C_T is: http://public.type2diabeteskb.org/dccservices/graph/meta/variant/object?var_id=8_118184783_C_T&limit=10
after concatenation, the master data frame has a row count of: 10

the query for the var: 8_118217915_G_A is: http://public.type2diabeteskb.org/dccservices/graph/meta/variant/object?var_id=8_118217915_G_A&limit=10
after concatenation, the master data frame has a row count of: 20

the query for the var: 6_160961137_T_C is: http://public.type2diabeteskb.org/dccservices/graph/meta/variant/object?var_id=6_160961137_T_C&limit=10
after concatenation, the master data frame has a row count of: 30

the query for the var: 3_12236565_G_A is: http://public.type2diabeteskb.org/dccservices/graph/meta/variant/object?var_id=3_12236565_G_A&limit=10
after concatenation, the master data frame has a row count of: 40

the query for the var: 3_12344730_C_G is: http://public.type2diabeteskb.org/dccservices/graph/meta/variant/object?var_id=3_12344730_C_G&limit=10


after concatenation, the master data frame has a row count of: 50

the query for the var: 9_22029445_G_A is: http://public.type2diabeteskb.org/dccservices/graph/meta/variant/object?var_id=9_22029445_G_A&limit=10
after concatenation, the master data frame has a row count of: 60



In [5]:
# show the top 20 rows of the combined data frame
print("the first 20 rows of the combined data frame")
print(master_data_frame.head(15))


the first 20 rows of the combined data frame
  alt    beta chrom  number_samples        p_value     phenotype  \
0   T -0.1066     8         2237710  3.110000e-178           T2D   
1   T -0.1073     8         1177440   1.107000e-69     T2DadjBMI   
2   T -0.0184     8          214021   6.715000e-45         HBA1C   
3   T -0.0302     8          220129   5.513000e-34            FG   
4   T -0.0423     8           93146   1.746000e-19            BS   
5   T -0.0690     8           46186   4.915000e-11            PI   
6   T  0.0102     8         2491120   8.088000e-10           BMI   
7   T  0.0978     8            5567   1.144000e-05          PEAK   
8   T  0.0977     8            5506   1.303000e-05           AIR   
9   T  0.0976     8            5458   1.486000e-05  AIRadjBMInSI   
0   A -0.1059     8         1717700   1.682000e-57           T2D   
1   A -0.1220     8         1009440   9.442000e-40     T2DadjBMI   
2   A -0.0423     8           93146   1.149000e-15            BS   
3  

In [6]:
# trim the data frame to the p_value, std_err, beta, num_samples, phenotype and variant
filtered_data_frame = master_data_frame.filter(['var_id', 'p_value', 'std_err', 'beta', 'phenotype'], axis=1)

# display the new data frame's first 15 rows
print(filtered_data_frame.head(15))


            var_id        p_value  std_err    beta     phenotype
0  8_118184783_C_T  3.110000e-178   0.0034 -0.1066           T2D
1  8_118184783_C_T   1.107000e-69   0.0047 -0.1073     T2DadjBMI
2  8_118184783_C_T   6.715000e-45   0.0014 -0.0184         HBA1C
3  8_118184783_C_T   5.513000e-34   0.0019 -0.0302            FG
4  8_118184783_C_T   1.746000e-19   0.0047 -0.0423            BS
5  8_118184783_C_T   4.915000e-11   0.0110 -0.0690            PI
6  8_118184783_C_T   8.088000e-10   0.0011  0.0102           BMI
7  8_118184783_C_T   1.144000e-05   0.0223  0.0978          PEAK
8  8_118184783_C_T   1.303000e-05   0.0224  0.0977           AIR
9  8_118184783_C_T   1.486000e-05   0.0225  0.0976  AIRadjBMInSI
0  8_118217915_G_A   1.682000e-57   0.0047 -0.1059           T2D
1  8_118217915_G_A   9.442000e-40   0.0072 -0.1220     T2DadjBMI
2  8_118217915_G_A   1.149000e-15   0.0053 -0.0423            BS
3  8_118217915_G_A   5.422000e-15   0.0078 -0.0610         HBA1C
4  8_118217915_G_A   2.81

In [7]:
# sort the new data frame by ascending p_value (more significant first)
# this will not group the rows by variant
filtered_data_frame.sort_values('p_value', inplace=True)

# show the first 10 rows of the newly sorted data frame
print(filtered_data_frame.head(10))

            var_id        p_value  std_err    beta  phenotype
0  8_118184783_C_T  3.110000e-178   0.0034 -0.1066        T2D
1  8_118184783_C_T   1.107000e-69   0.0047 -0.1073  T2DadjBMI
0   9_22029445_G_A   1.595000e-59   0.0047  0.1054        CAD
0  8_118217915_G_A   1.682000e-57   0.0047 -0.1059        T2D
2  8_118184783_C_T   6.715000e-45   0.0014 -0.0184      HBA1C
1  8_118217915_G_A   9.442000e-40   0.0072 -0.1220  T2DadjBMI
3  8_118184783_C_T   5.513000e-34   0.0019 -0.0302         FG
0   3_12344730_C_G   3.695000e-28   0.0069 -0.1094        T2D
1   3_12344730_C_G   4.430000e-21   0.0103 -0.1266  T2DadjBMI
4  8_118184783_C_T   1.746000e-19   0.0047 -0.0423         BS
