## Data Engineering

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, inspect
from sqlalchemy import func, desc
from matplotlib.ticker import NullFormatter
import matplotlib.dates as mdates
from datetime import datetime, timedelta
import seaborn as sns
from flask import Flask, jsonify
import datetime as dt

In [3]:
engine = create_engine("sqlite:///belly_button_biodiversity.sqlite", echo=False)

In [4]:
Base = automap_base()
Base.prepare(engine, reflect=True)
Base.classes.keys()

['otu', 'samples', 'samples_metadata']

In [5]:
Otu = Base.classes.otu
Samples = Base.classes.samples
Samples_MD = Base.classes.samples_metadata

In [6]:
session = Session(engine)

In [7]:
samples_query = session.query(Samples)
all_samples = pd.read_sql(samples_query.statement, samples_query.session.bind)
all_samples.head()

Unnamed: 0,otu_id,BB_940,BB_941,BB_943,BB_944,BB_945,BB_946,BB_947,BB_948,BB_949,...,BB_1562,BB_1563,BB_1564,BB_1572,BB_1573,BB_1574,BB_1576,BB_1577,BB_1581,BB_1601
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Sample Names

In [8]:
def get_sample_names():
    samples_query = session.query(Samples)
    samples_df = pd.read_sql(samples_query.statement, samples_query.session.bind)
    return list(samples_df.columns[1:])

### List of OTU Descriptions

In [9]:
def otu_descriptions():
    otu_query = session.query(Otu)
    otu_df = pd.read_sql(otu_query.statement, otu_query.session.bind)
    return list(otu_df['lowest_taxonomic_unit_found'].values)

### MetaData for a given sample

In [21]:
def get_metadata(sample): 
    nobs_data = int(sample[3:])
    meta_query = session.query(Samples_MD)
    meta_df = pd.read_sql(meta_query.statement, meta_query.session.bind)
    target_sample = meta_df.loc[meta_df['SAMPLEID'] == nobs_data]
    list_index = list(target_sample.index)
    target_index = list_index[0]
    target_sample_dict = {
            'AGE' : int(target_sample['AGE'][target_index]),
            'BBTYPE' : target_sample['BBTYPE'][target_index],
            'ETHNICITY' : target_sample['ETHNICITY'][target_index],
            'GENDER' : target_sample['GENDER'][target_index],
            'LOCATION' : target_sample['LOCATION'][target_index],
            'SAMPLEID': int(target_sample['SAMPLEID'][target_index])
        }
    return target_sample_dict

In [22]:
get_metadata('BB_940')

{'AGE': 24,
 'BBTYPE': 'I',
 'ETHNICITY': 'Caucasian',
 'GENDER': 'F',
 'LOCATION': 'Beaufort/NC',
 'SAMPLEID': 940}

### Weekly Washing Frequency (Number)

In [11]:
def get_wfreq(sample):
    nobs_data = int(sample[3:])
    meta_query = session.query(Samples_MD)
    meta_df = pd.read_sql(meta_query.statement, meta_query.session.bind)
    target_sample = meta_df.loc[meta_df['SAMPLEID'] == nobs_data]
    list_index = list(target_sample.index)
    target_index = list_index[0]
    wfreq = target_sample['WFREQ'][target_index]
    return wfreq

### OTU IDs and Sample Values for a given sample.

In [15]:
def get_sample_values(sample):
    samples_query = session.query(Samples)
    all_samples = pd.read_sql(samples_query.statement, samples_query.session.bind)
    data = all_samples[['otu_id', sample]]
    data = data.loc[data[sample]>0]
    data.columns=['otu_id','samples']
    data = data.sort_values('samples',ascending=False)

    otu_ids = []
    samples = []
    for i in range(0,len(data)):
        otu_ids.append(str(data['otu_id'].iloc[i]))
        samples.append(str(data['samples'].iloc[i]))

    sample_values_dict = [{
           "otu_id" : otu_ids,
           "samples" : samples
    }]
    return sample_values_dict