In [1]:
# Import Dependencies
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, inspect
import pandas as pd
import numpy as np


In [2]:
#Create connection enginge
engine = create_engine("sqlite:///data/belly_button_biodiversity.sqlite")

session = Session(engine)

In [3]:
# Create inspector and connect
inspector = inspect(engine)

In [4]:
inspector.get_table_names()

['otu', 'samples', 'samples_metadata']

In [5]:
# Inspect the otu, samples and samples_metadata tables

In [6]:
columns = inspector.get_columns('otu')

for column in columns:
    print(column['name'], column['type'])

otu_id INTEGER
lowest_taxonomic_unit_found TEXT


In [7]:
columns = inspector.get_columns('samples')

for column in columns:
    print(column['name'], column['type'])

otu_id INTEGER
BB_940 INTEGER
BB_941 INTEGER
BB_943 INTEGER
BB_944 INTEGER
BB_945 INTEGER
BB_946 INTEGER
BB_947 INTEGER
BB_948 INTEGER
BB_949 INTEGER
BB_950 INTEGER
BB_952 INTEGER
BB_953 INTEGER
BB_954 INTEGER
BB_955 INTEGER
BB_956 INTEGER
BB_958 INTEGER
BB_959 INTEGER
BB_960 INTEGER
BB_961 INTEGER
BB_962 INTEGER
BB_963 INTEGER
BB_964 INTEGER
BB_966 INTEGER
BB_967 INTEGER
BB_968 INTEGER
BB_969 INTEGER
BB_970 INTEGER
BB_971 INTEGER
BB_972 INTEGER
BB_973 INTEGER
BB_974 INTEGER
BB_975 INTEGER
BB_978 INTEGER
BB_1233 INTEGER
BB_1234 INTEGER
BB_1235 INTEGER
BB_1236 INTEGER
BB_1237 INTEGER
BB_1238 INTEGER
BB_1242 INTEGER
BB_1243 INTEGER
BB_1246 INTEGER
BB_1253 INTEGER
BB_1254 INTEGER
BB_1258 INTEGER
BB_1259 INTEGER
BB_1260 INTEGER
BB_1264 INTEGER
BB_1265 INTEGER
BB_1273 INTEGER
BB_1275 INTEGER
BB_1276 INTEGER
BB_1277 INTEGER
BB_1278 INTEGER
BB_1279 INTEGER
BB_1280 INTEGER
BB_1281 INTEGER
BB_1282 INTEGER
BB_1283 INTEGER
BB_1284 INTEGER
BB_1285 INTEGER
BB_1286 INTEGER
BB_1287 INTEGER
BB_1288 IN

In [8]:
columns = inspector.get_columns('samples_metadata')

for column in columns:
    print(column['name'], column['type'])

SAMPLEID INTEGER
EVENT TEXT
ETHNICITY TEXT
GENDER TEXT
AGE INTEGER
WFREQ INTEGER
BBTYPE TEXT
LOCATION TEXT
COUNTRY012 TEXT
ZIP012 INTEGER
COUNTRY1319 TEXT
ZIP1319 INTEGER
DOG TEXT
CAT TEXT
IMPSURFACE013 INTEGER
NPP013 FLOAT
MMAXTEMP013 FLOAT
PFC013 FLOAT
IMPSURFACE1319 INTEGER
NPP1319 FLOAT
MMAXTEMP1319 FLOAT
PFC1319 FLOAT


In [9]:
#Create pandas df from sqlite table
sample_df = pd.read_sql_table('samples', session.bind)

In [10]:
sample = "BB_940"
sample_df = sample_df.loc[:,['otu_id',sample]].sort_values(sample, ascending=False)
sample_df

Unnamed: 0,otu_id,BB_940
1166,1167,163
2858,2859,126
481,482,113
2263,2264,78
40,41,71
1188,1189,51
351,352,50
188,189,47
2317,2318,40
1976,1977,40


In [11]:
Base = automap_base()

#Reflect tables
Base.prepare(engine, reflect=True)


In [12]:
otus = Base.classes.otu 
samples = Base.classes.samples
samples_metadata =  Base.classes.samples_metadata


In [13]:
results = session.query(samples_metadata.SAMPLEID).all()
names =  list(np.ravel(results))
names

[940,
 941,
 943,
 944,
 945,
 946,
 947,
 948,
 949,
 950,
 952,
 953,
 954,
 955,
 956,
 958,
 959,
 960,
 961,
 962,
 963,
 964,
 966,
 967,
 968,
 969,
 970,
 971,
 972,
 973,
 974,
 975,
 978,
 1233,
 1234,
 1235,
 1236,
 1237,
 1238,
 1242,
 1243,
 1246,
 1253,
 1254,
 1258,
 1259,
 1260,
 1264,
 1265,
 1273,
 1275,
 1276,
 1277,
 1278,
 1279,
 1280,
 1281,
 1282,
 1283,
 1284,
 1285,
 1286,
 1287,
 1288,
 1289,
 1290,
 1291,
 1292,
 1293,
 1294,
 1295,
 1296,
 1297,
 1298,
 1308,
 1309,
 1310,
 1374,
 1415,
 1439,
 1441,
 1443,
 1486,
 1487,
 1489,
 1490,
 1491,
 1494,
 1495,
 1497,
 1499,
 1500,
 1501,
 1502,
 1503,
 1504,
 1505,
 1506,
 1507,
 1508,
 1510,
 1511,
 1512,
 1513,
 1514,
 1515,
 1516,
 1517,
 1518,
 1519,
 1521,
 1524,
 1526,
 1527,
 1530,
 1531,
 1532,
 1533,
 1534,
 1535,
 1536,
 1537,
 1539,
 1540,
 1541,
 1542,
 1543,
 1544,
 1545,
 1546,
 1547,
 1548,
 1549,
 1550,
 1551,
 1552,
 1553,
 1554,
 1555,
 1556,
 1557,
 1558,
 1561,
 1562,
 1563,
 1564,
 1572,
 157

In [14]:
samples_df = pd.read_sql_table('samples', session.bind)

In [16]:
sample_df = samples_df.loc[:, ['otu_id', sample]].sort_values(sample, ascending=False)
otu_ids_sorted = list(sample_df.iloc[:10, 0])
samples_sorted = list(sample_df.iloc[:10, 1])

In [17]:
samples_sorted

[163, 126, 113, 78, 71, 51, 50, 47, 40, 40]

In [18]:
otu_ids_sorted

[1167, 2859, 482, 2264, 41, 1189, 352, 189, 2318, 1977]

In [22]:
results = session.query(otus.lowest_taxonomic_unit_found).all()
otu_descriptions = list(np.ravel(results))
otu_descriptions_sorted = []
    


In [24]:
for index in range(len(otu_ids_sorted)):
    otu_id = otu_ids_sorted[index]
    otu_descriptions_sorted.append(otu_descriptions[otu_id])

In [25]:
otu_id

1977

In [26]:
otu_descriptions

['Archaea;Euryarchaeota;Halobacteria;Halobacteriales;Halobacteriaceae;Halococcus',
 'Archaea;Euryarchaeota;Halobacteria;Halobacteriales;Halobacteriaceae;Halococcus',
 'Archaea;Euryarchaeota;Halobacteria;Halobacteriales;Halobacteriaceae;Natronorubrum',
 'Archaea;Euryarchaeota;Methanobacteria;Methanobacteriales;Methanobacteriaceae;Methanobrevibacter',
 'Archaea;Euryarchaeota;Methanobacteria;Methanobacteriales;Methanobacteriaceae;Methanobrevibacter',
 'Archaea;Euryarchaeota;Methanobacteria;Methanobacteriales;Methanobacteriaceae;Methanobrevibacter',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bact

In [27]:
response_object = {'otu_ids': otu_ids_sorted,'sample_values': samples_sorted,'otu_descriptions' : otu_descriptions_sorted}

In [28]:
response_object

{'otu_descriptions': ['Bacteria;Bacteroidetes;Bacteroidia;Bacteroidales;Porphyromonadaceae;Porphyromonas',
  'Bacteria;Firmicutes;Clostridia;Clostridiales;IncertaeSedisXI;Peptoniphilus',
  'Bacteria',
  'Bacteria;Firmicutes;Clostridia;Clostridiales;IncertaeSedisXI',
  'Bacteria',
  'Bacteria;Bacteroidetes;Bacteroidia;Bacteroidales;Porphyromonadaceae;Porphyromonas',
  'Bacteria',
  'Bacteria',
  'Bacteria;Firmicutes;Clostridia;Clostridiales;IncertaeSedisXI;Anaerococcus',
  'Bacteria;Firmicutes;Clostridia;Clostridiales'],
 'otu_ids': [1167, 2859, 482, 2264, 41, 1189, 352, 189, 2318, 1977],
 'sample_values': [163, 126, 113, 78, 71, 51, 50, 47, 40, 40]}

In [29]:
wfreq = 0
sample_id = sample.split('_')[1]
print(sample_id)

940


In [31]:
results = session.query(samples_metadata).filter(samples_metadata.SAMPLEID == sample_id).all()

In [34]:
for sample in results:
    wfreq = sample.WFREQ
    print(wfreq)

2


In [35]:
results = session.query(otus.lowest_taxonomic_unit_found).all()
otu_descriptions = list(np.ravel(results))

In [36]:
otu_descriptions

['Archaea;Euryarchaeota;Halobacteria;Halobacteriales;Halobacteriaceae;Halococcus',
 'Archaea;Euryarchaeota;Halobacteria;Halobacteriales;Halobacteriaceae;Halococcus',
 'Archaea;Euryarchaeota;Halobacteria;Halobacteriales;Halobacteriaceae;Natronorubrum',
 'Archaea;Euryarchaeota;Methanobacteria;Methanobacteriales;Methanobacteriaceae;Methanobrevibacter',
 'Archaea;Euryarchaeota;Methanobacteria;Methanobacteriales;Methanobacteriaceae;Methanobrevibacter',
 'Archaea;Euryarchaeota;Methanobacteria;Methanobacteriales;Methanobacteriaceae;Methanobrevibacter',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bacteria',
 'Bact

In [39]:
samples_df = pd.read_sql_table('samples', session.bind)
samples_df ['otu_descriptions'] = pd.Series(otu_descriptions)
sample_df = samples_df.loc[:, ['otu_id', sample,'otu_descriptions']].sort_values(sample, ascending=False)
otu_ids_sorted = list(sample_df.iloc[:, 0])
samples_sorted = list(sample_df.iloc[:, 1])
otu_descriptions_sorted = list(sample_df.iloc[:,2])

In [40]:
response_object = {'otu_ids': otu_ids_sorted,'sample_values': samples_sorted,'otu_descriptions' : otu_descriptions_sorted}

In [45]:
response_object

{'otu_descriptions': ['Archaea;Euryarchaeota;Halobacteria;Halobacteriales;Halobacteriaceae;Halococcus',
  'Archaea;Euryarchaeota;Halobacteria;Halobacteriales;Halobacteriaceae;Halococcus',
  'Archaea;Euryarchaeota;Halobacteria;Halobacteriales;Halobacteriaceae;Natronorubrum',
  'Archaea;Euryarchaeota;Methanobacteria;Methanobacteriales;Methanobacteriaceae;Methanobrevibacter',
  'Archaea;Euryarchaeota;Methanobacteria;Methanobacteriales;Methanobacteriaceae;Methanobrevibacter',
  'Archaea;Euryarchaeota;Methanobacteria;Methanobacteriales;Methanobacteriaceae;Methanobrevibacter',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  'Bacteria',
  