In [387]:
import os
import mysql.connector
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re

from matplotlib.legend import Legend

import json

%matplotlib inline

In [192]:
user_name = os.environ.get('DB_USER')
password = os.environ.get('DB_PASSWORD')
DB_NAME = 'BacterialGrowth'

# Database functions

In [289]:
user_name = os.environ.get('DB_USER')
password = os.environ.get('DB_PASSWORD')

DB_NAME = 'BacterialGrowth'

def execute(phrase):
    """This function create a connection to the database and execute a command.
    :param phrase: str SQL sentence to execute
    :return: list of str received from the database after command execution.
    """
    try:
        cnx = mysql.connector.connect(user=user_name, password=password,host='localhost',database=DB_NAME)
        cnx.get_warnings = True
        cursor = cnx.cursor()
        cursor.execute(phrase)
        res = []
        for row in cursor:
            res.append(row)

        warnings = cursor.fetchwarnings()
        if warnings: 
            for i in range(len(warnings)):
                print("\t ** Warning - "+warnings[i][2])
        cursor.close()
        cnx.commit()
        cnx.close()
        return res
    except mysql.connector.Error as err:
        print("Something went wrong: {}".format(err))
        print(phrase)

def addRecord(table, args):
    """ 
    This function adds a new entry into the indicated table.

    :table: table of the DB
    :args: dictionary with the data to insert
    :return: id of the inserted record
    """
    # Insert into table
    fields, values = getInsertFieldsValues(args)
    phrase = "INSERT IGNORE INTO " +table+" "+fields+" VALUES "+values
    res = execute(phrase)
    
    # Get the name of the primary key field
    phrase = "SHOW KEYS FROM "+table+" WHERE Key_name = 'PRIMARY'"
    res = execute(phrase)
    pk = res[0][4]
    
    # Get the value of the primary key (this will return the value both if it was inserted or ignored)
    where_clause = getWhereClause(args)
    phrase = "SELECT "+pk+" FROM "+table+" "+where_clause
    res = execute(phrase)
    last_id = res[0][0]
    
    return last_id

def countRecords(table, args):
    phrase = "SELECT COUNT(*) FROM " + table
    if args:
        where_clause = getWhereClause(args)
        phrase = phrase+" "+where_clause
    res = execute(phrase)
    return res

def getAllRecords(table, **args):
    phrase = "SELECT * FROM " + table
    if args:
        where_clause = getWhereClause(args)
        phrase = phrase+" "+where_clause
    res = execute(phrase)
    return res

def getFiles(fields, args):
    fields_clause = getSelectFields(fields)
    where_clause = getWhereClause(args)
    
    phrase = "SELECT "+fields_clause+" FROM TechnicalReplicate "+where_clause
    res = execute(phrase)
    return res

def getRecords(table, fields, args):
    fields_clause = getSelectFields(fields)
    where_clause = getWhereClause(args)
    
    phrase = "SELECT "+fields_clause+" FROM "+table+" "+where_clause
    res = execute(phrase)
    return res

In [290]:
# DATABASE SUPPLEMENTARY FUNCTIONS
# =================================

def getInsertFieldsValues(args):
    fields = "("
    values = "("
    for key, val in args.items():
        fields = fields + key + ','
        values = values + "'" +str(val) + "',"
    fields = fields[:-1] + ')'
    values = values[:-1] + ')'
    return [fields, values]

def getSelectFields(args):
    clause = ""
    for field in args:
        clause = clause + field + ", "
    clause = clause[:-2]
    return clause

def getWhereClause(args):
    if len(args) == 0:
        clause = ''
    else:
        clause = "WHERE ("
        for key, val in args.items():
            if key == 'bacteriaSpecies':
                clause = clause + key +" IN "+ str(val) + " AND "
            elif val == 'null':
                clause = clause + key + " IS NULL AND "
            elif val == 'not null':
                clause = clause + key + " IS NOT NULL AND "
            else:
                clause = clause + key + "= '" + str(val) + "' AND "
        
        clause = clause[:-5] + ')'
    
    return clause

def getJoinClause(table_from, table_to, field):
    clause = "JOIN "+table_to+" ON "+table_to+"."+field+" = "+table_from+"."+field
    return clause

def getGroupByClause(field):
    clause = "GROUP BY " + field
    return clause

def getHavingClause(agg_function, field, operator, quant, distinct=False):
    clause = "HAVING "+agg_function
    if distinct == False:
        clause = clause + "("+field+") "+operator+" "+str(quant)
    elif distinct == True:
        clause = clause +"(DISTINCT "+field+") "+operator+" "+str(quant)
    return clause

In [246]:
def getExperimentsWithBacteria(join_args, where_args):
    
    field = 'BacteriaCommunity.experimentId'
    table = 'BacteriaCommunity'
    
    phrase = "SELECT "+field+" FROM "+table+" "
        
    for arg in join_args:
        join_clause = getJoinClause(arg['table_from'], arg['table_to'], arg['field'])
        phrase = phrase+join_clause + " "
    
    where_clause = getWhereClause(where_args)
    groupby_clause = getGroupByClause(field)
    for key, val in where_args.items():
        if key == 'bacteriaSpecies':
            having_clause = getHavingClause('COUNT', key, '=', len(val), distinct=True)
    
    phrase = phrase +where_clause+" "+groupby_clause+" "+having_clause
    res = execute(phrase)
    
    return res

In [268]:
def getExperimentsWithMetabolites(join_args, where_args):
    
    field = 'Experiment.experimentId'
    table = 'Experiment'
    
    phrase = "SELECT DISTINCT "+field+" FROM "+table+" "
    
    for arg in join_args:
        join_clause = getJoinClause(arg['table_from'], arg['table_to'], arg['field'])
        phrase = phrase+join_clause + " "

    
    where_clause = getWhereClause(where_args)
    
    phrase = phrase +where_clause
    res = execute(phrase)
    
    return res

## 1st query: get all the data from experiments in which X bacteria species are present

```
SELECT BacteriaCommunity.experimentId
FROM BacteriaCommunity
JOIN Bacteria ON Bacteria.bacteriaId = BacteriaCommunity.bacteriaId
JOIN Experiment ON Experiment.experimentId = BacteriaCommunity.experimentId
WHERE (Bacteria.bacteriaSpecies IN ('BT', 'RI'))
GROUP BY experimentId
HAVING COUNT(DISTINCT Bacteria.bacteriaSpecies) = 2;
```

In [326]:
file_types = ['abundanceFile', 'metabolitesFile', 'phFile']
join_args = [{'table_from': 'BacteriaCommunity', 'table_to': 'Bacteria', 'field': 'bacteriaId'}, 
            {'table_from': 'BacteriaCommunity', 'table_to': 'Experiment', 'field': 'experimentId'}]
where_args = {'bacteriaSpecies':('BT', 'RI')}

experiment_ids = getExperimentsWithBacteria(join_args, where_args)
experiment_ids

[(101,), (102,)]

In [325]:
file_types = ['abundanceFile', 'metabolitesFile', 'phFile']
join_args = [{'table_from': 'BacteriaCommunity', 'table_to': 'Bacteria', 'field': 'bacteriaId'}, 
            {'table_from': 'BacteriaCommunity', 'table_to': 'Experiment', 'field': 'experimentId'}]

where_args = {'bacteriaSpecies':('BT', 'RI'), 'Experiment.initialPh':'3'}
where_args = {'bacteriaSpecies':('BT', 'RI'), 'Experiment.initialPh':'5'}
where_args = {'bacteriaSpecies':('BT', 'RI'), 'Experiment.experimentId':'102'}

experiment_ids = getExperimentsWithBacteria(join_args, where_args)
experiment_ids

[(102,)]

## 2nd query: get all the data from experiments in which metabolites were measures

```
SELECT DISTINCT TechnicalReplicate.experimentId 
FROM TechnicalReplicate 
WHERE (metabolitesFile IS NOT NULL)
```

In [324]:
file_types = ['abundanceFile', 'metabolitesFile', 'phFile']
where_args = {'metabolitesFile':'not null', 'initialPh': '5'}
join_args = [{'table_from': 'Experiment', 'table_to': 'TechnicalReplicate', 'field': 'experimentId'}]

experiment_ids = getExperimentsWithMetabolites(join_args, where_args)
experiment_ids

[(101,)]

## 3rd query: get all the data from experiments in which X bacteria species are present and in which metabolites were measured

```
SELECT BacteriaCommunity.experimentId 
FROM BacteriaCommunity 
JOIN Bacteria ON Bacteria.bacteriaId = BacteriaCommunity.bacteriaId 
JOIN Experiment ON Experiment.experimentId = BacteriaCommunity.experimentId 
JOIN TechnicalReplicate ON TechnicalReplicate.experimentId = Experiment.experimentId
WHERE (bacteriaSpecies IN ('BT', 'RI') AND metabolitesFile IS NOT NULL) 
GROUP BY BacteriaCommunity.experimentId 
HAVING COUNT(DISTINCT bacteriaSpecies) = 2
```

In [323]:
file_types = ['abundanceFile', 'metabolitesFile', 'phFile']
where_args = {'bacteriaSpecies':('BT', 'RI'), 'metabolitesFile':'not null', 'initialPh': '5'}
join_args = [{'table_from': 'BacteriaCommunity', 'table_to': 'Bacteria', 'field': 'bacteriaId'}, 
            {'table_from': 'BacteriaCommunity', 'table_to': 'Experiment', 'field': 'experimentId'},
            {'table_from': 'Experiment', 'table_to': 'TechnicalReplicate', 'field': 'experimentId'}]

experiment_ids = getExperimentsWithBacteria(join_args, where_args)
experiment_ids

[(101,)]

## 4th: using the previous results (3rd), keep only those in which metabolite X is measured

In [349]:
metabolites_list = ['Glucose', 'Pyruvate']

file_types = ['abundanceFile', 'metabolitesFile', 'phFile']
where_args = {'bacteriaSpecies':('BT', 'RI'), 'metabolitesFile':'not null'}
join_args = [{'table_from': 'BacteriaCommunity', 'table_to': 'Bacteria', 'field': 'bacteriaId'}, 
            {'table_from': 'BacteriaCommunity', 'table_to': 'Experiment', 'field': 'experimentId'},
            {'table_from': 'Experiment', 'table_to': 'TechnicalReplicate', 'field': 'experimentId'}]

experiment_ids = getExperimentsWithBacteria(join_args, where_args)
experiment_ids

[(101,), (102,)]

In [377]:
exp_metadata_fields = ['plateId', 'plateColumn', 'plateRow', 'initialPh', 'initialTemperature', 'inoculumConcentration', 'inoculumVolume', 'carbonSource', 'antibiotic']
pert_metadata_fields = ['property', 'newValue', 'startTime', 'endTime']

results_dict = {}
for experiment_id in experiment_ids:
    
    results_dict[experiment_id[0]] = {'metadata':{}}
    
    exp_metadata = getRecords('Experiment', exp_metadata_fields, {'experimentId':id_args['experimentId']})
    exp_metadata_dict = dict(zip(exp_metadata_fields, exp_metadata[0]))
    results_dict[experiment_id[0]]['metadata'] = exp_metadata_dict
    
    perturbation_ids = getRecords('Perturbation', ['perturbationId'], {'experimentId':experiment_id[0]})
    
    if len(metabolites_list) != 0:
        res = getFiles({'metabolitesFile'}, {'experimentId':experiment_id[0], 'perturbationId': 'null'})
        for i, files in enumerate(res):
            headers = pd.read_csv(files[0], sep=" ").columns
            if set(metabolites_list).issubset(set(headers.tolist())):
                id_args = {'experimentId':experiment_id[0], 'perturbationId': 'null'}
    else:
        id_args = {'experimentId':experiment_id[0], 'perturbationId': 'null'}
        
    files_res = getFiles(file_types, id_args)
    results_dict[experiment_id[0]]['0'] = {'files': ''}
    results_dict[experiment_id[0]]['0']['files'] = files_res
    
    # Each perturbations
    for perturbation_id in perturbation_ids:
        
        if len(metabolites_list) != 0:
            res = getFiles({'metabolitesFile'}, {'experimentId':experiment_id[0], 'perturbationId': perturbation_id[0]})
            for i, files in enumerate(res):
                headers = pd.read_csv(files[0], sep=" ").columns
                if set(metabolites_list).issubset(set(headers.tolist())):
                    id_args = {'experimentId':experiment_id[0], 'perturbationId': perturbation_id[0]}
        else:
            id_args = {'experimentId':experiment_id[0], 'perturbationId': perturbation_id[0]}
            
    
        files_res = getFiles(file_types, id_args)
        pert_metadata = getRecords('Perturbation', pert_metadata_fields, {'perturbationId':id_args['perturbationId']})
        pert_metadata_dict = dict(zip(pert_metadata_fields, pert_metadata[0]))

        results_dict[experiment_id[0]][perturbation_id[0]] = {'metadata': '', 'files': ''}
        results_dict[experiment_id[0]][perturbation_id[0]]['metadata'] = pert_metadata_dict
        results_dict[experiment_id[0]][perturbation_id[0]]['files'] = files_res

In [378]:
results_dict

{101: {'metadata': {'plateId': 1,
   'plateColumn': 1,
   'plateRow': 'D',
   'initialPh': 1.0,
   'initialTemperature': 1.0,
   'inoculumConcentration': 1,
   'inoculumVolume': 1,
   'carbonSource': 1,
   'antibiotic': '1'},
  '0': {'files': [('/Users/julia/bacterialGrowth_thesis/Data/experiments/101/1/abundance_file.txt',
     '/Users/julia/bacterialGrowth_thesis/Data/experiments/101/1/metabolites_file.txt',
     '/Users/julia/bacterialGrowth_thesis/Data/experiments/101/1/pH_file.txt'),
    ('/Users/julia/bacterialGrowth_thesis/Data/experiments/101/2/abundance_file.txt',
     '/Users/julia/bacterialGrowth_thesis/Data/experiments/101/2/metabolites_file.txt',
     '/Users/julia/bacterialGrowth_thesis/Data/experiments/101/2/pH_file.txt'),
    ('/Users/julia/bacterialGrowth_thesis/Data/experiments/101/3/abundance_file.txt',
     '/Users/julia/bacterialGrowth_thesis/Data/experiments/101/3/metabolites_file.txt',
     '/Users/julia/bacterialGrowth_thesis/Data/experiments/101/3/pH_file.txt')

In [385]:
print(results_dict[101]['metadata'])
print('\n')
print(results_dict[101]['0'])
print('\n')
print(results_dict[101]['101.1']['metadata'])
print('\n')
print(results_dict[101]['101.1']['files'])

{'plateId': 1, 'plateColumn': 1, 'plateRow': 'D', 'initialPh': 1.0, 'initialTemperature': 1.0, 'inoculumConcentration': 1, 'inoculumVolume': 1, 'carbonSource': 1, 'antibiotic': '1'}


{'files': [('/Users/julia/bacterialGrowth_thesis/Data/experiments/101/1/abundance_file.txt', '/Users/julia/bacterialGrowth_thesis/Data/experiments/101/1/metabolites_file.txt', '/Users/julia/bacterialGrowth_thesis/Data/experiments/101/1/pH_file.txt'), ('/Users/julia/bacterialGrowth_thesis/Data/experiments/101/2/abundance_file.txt', '/Users/julia/bacterialGrowth_thesis/Data/experiments/101/2/metabolites_file.txt', '/Users/julia/bacterialGrowth_thesis/Data/experiments/101/2/pH_file.txt'), ('/Users/julia/bacterialGrowth_thesis/Data/experiments/101/3/abundance_file.txt', '/Users/julia/bacterialGrowth_thesis/Data/experiments/101/3/metabolites_file.txt', '/Users/julia/bacterialGrowth_thesis/Data/experiments/101/3/pH_file.txt'), ('/Users/julia/bacterialGrowth_thesis/Data/experiments/101/4/abundance_file.txt', '/U

In [506]:
from zipfile import ZipFile

In [510]:
with open('file.txt', 'w') as file:
    for key, val in results_dict.items():
        experiment_id = key
        header = 'EXPERIMENT '+str(experiment_id)
        file.write('-'*80+'\n')
        file.write(header+'\n')
        file.write('-'*80+'\n')
        
        
        for key2, val2 in results_dict[experiment_id].items():
            
            # Experiment metadata
            if key2 == 'metadata':
                file.write('Metadata:\n')
                lines = writeMetadata(results_dict[experiment_id][key2], 'experiment')
                file.write(lines)
            
            # Experiment perturbations
            else:
                perturbation_id = key2
                pert = 'Perturbation '+key2
                file.write('\n'+pert+'\n')
                file.write('-'*80+'\n')
                
                for key3, val3 in results_dict[experiment_id][perturbation_id].items():
                    if key3 == 'metadata':
                        file.write('Metadata:\n')
                        lines = writeMetadata(results_dict[experiment_id][perturbation_id]['metadata'], 'perturbation')
                        file.write(lines)
                    
                    if key3 == 'files':
                        file.write('Files:\n')
                        for fl in results_dict[experiment_id][perturbation_id]['files']:
                            file.write('\t'+fl[0]+'\n')
                            
                            with ZipFile('000_myZip.zip', 'w') as archive:
                                archive.write(fl[0])
                                archive.printdir()
                
                
        file.write('\n\n\n')

with ZipFile('000_myZip.zip', 'w') as archive:
    archive.write('file.txt')
    archive.printdir()
    

File Name                                             Modified             Size
Users/julia/bacterialGrowth_thesis/Data/experiments/101/1/abundance_file.txt 2023-04-17 13:52:06          343
File Name                                             Modified             Size
Users/julia/bacterialGrowth_thesis/Data/experiments/101/2/abundance_file.txt 2023-04-17 13:52:06          341
File Name                                             Modified             Size
Users/julia/bacterialGrowth_thesis/Data/experiments/101/3/abundance_file.txt 2023-04-17 13:52:06          343
File Name                                             Modified             Size
Users/julia/bacterialGrowth_thesis/Data/experiments/101/4/abundance_file.txt 2023-04-17 13:52:06          342
File Name                                             Modified             Size
Users/julia/bacterialGrowth_thesis/Data/experiments/101/5/abundance_file.txt 2023-04-17 13:52:07          341
File Name                                         

In [452]:
def writeMetadata(dict, type):
    
    if type == 'experiment':
        lines = '\tPlate id: '+str(dict['plateId'])+'\n'
        lines = lines + '\tPlate location: '+str(dict['plateColumn'])+dict['plateRow']+'\n'
        lines = lines + '\tInitial pH: '+str(dict['initialPh'])+'\n'
        lines = lines + '\tInitial temperature: '+str(dict['initialTemperature'])+'\n'
    
    elif type == 'perturbation':
        lines = '\tProperty perturbed: '+dict['property']+'\n'
        lines = lines + '\tNew value: '+str(dict['newValue'])+'\n'
        lines = lines + '\tStarting time (minutes): '+str(dict['startTime'])+'\n'
        lines = lines + '\tEnding time (minutes): '+str(dict['endTime'])+'\n'
        
    return lines

# Write PDF report