# From database to tpc.pdf

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# read the two databases:

db1 = pd.read_csv('updated_db_prokaryotic.csv', encoding='ISO-8859-1')
db2 = pd.read_csv('unpublished_db_prokaryotic.csv')

### Classify traits in to Fundamental Metabolic traits (fm_traits) or Growth traits (gro_traits)

In [None]:
# db1 classification
db1_traits = db1['StandardisedTraitName'].values

dic = {}

for i in range(len(db1_traits)):
    new_key = db1_traits[i]
    new_val = 5
    dic[new_key] = new_val
    
db1_traits = list(dic.keys())
fm_traits1 = [db1_traits[2],db1_traits[3], db1_traits[5], db1_traits[6],db1_traits[8], db1_traits[10], db1_traits[11], db1_traits[12], db1_traits[14],db1_traits[15], db1_traits[17], db1_traits[18], db1_traits[20], db1_traits[21], db1_traits[22],db1_traits[23], db1_traits[24], db1_traits[25], db1_traits[26], db1_traits[27]]
gro_traits1 = [db1_traits[1],db1_traits[9]]

bool_fmt = db1['StandardisedTraitName'].isin(fm_traits1)
bool_grt = db1['StandardisedTraitName'].isin(gro_traits1)

In [None]:
print("Initial", db1.shape)
      
db1['LabelTrait'] = db1['StandardisedTraitName']

unwanted = []

for i in range(len(db1)):
    if bool_fmt.iloc[i] == True:
        db1.iloc[i, 174] = 'fmt'
    elif bool_grt.iloc[i] == True:
        db1.iloc[i, 174] = 'growth'
    else:
        unwanted.append(i)
        
db1 = db1.drop(unwanted)

print("Final", db1.shape)


In [None]:
print("Initial", db2.shape)

db2['LabelTrait'] = db2['Trait']

for i in range(len(db2)):
    if db2.iloc[i, 3] == 'Specific Growth Rate':
        db2.iloc[i, 12] = 'growth'
    else:
        db2.iloc[i, 12] = 'fmt'

print("Final", db2.shape)

### Isolation of important columns: ID, TraitValue, TraitUnit, Temp, TempUnit

In [None]:
# db1 isolation of important columns
col_name = ['Strain','Trait','TraitValue', 'TraitUnits', 'Temperature', 'TemperatureUnits', 'LabelTrait','Genus', 'Species']

clean_db1 = db1[['OriginalID','StandardisedTraitName','OriginalTraitValue', 'OriginalTraitUnit', 'ConTemp', 'ConTempUnit', 'LabelTrait', 'ConGenus', 'ConSpecies']]
clean_db1.columns = col_name

clean_db2 = db2[['Strain','Trait','TraitValue', 'TraitUnits', 'Temperature', 'TemperatureUnits', 'LabelTrait', 'Genus', 'Species']]

db = pd.concat([clean_db1, clean_db2], axis=0)


### Classify strain-specific data

In [None]:
# fixing the indices to allow for loops
db['Ind'] = db['TraitUnits']

for i in range(len(db)):
    db.iloc[i, 9] = i

db = db.set_index('Ind')

In [None]:
# renaming strains based on Label traits

for i in range(len(db)):
    if db.iloc[i, 6] == 'growth':
        db.iloc[i, 0] = "gro_" + db.iloc[i, 0]
    else:
        db.iloc[i, 0] = "fmt_" + db.iloc[i, 0]

In [None]:
# create 'strain_dic' where all data is strain- and trait-specific
strain_dic = {}

for i in range(len(db)):
    if db.iloc[i, 6] == 'growth':
        new_key = db.iloc[i, 0]
        new_val = db[db['Strain'] == new_key]
        strain_dic[new_key] = new_val
    elif db.iloc[i, 6] == 'fmt':
        new_key = db.iloc[i, 0]
        new_val = db[db['Strain'] == new_key]
        strain_dic[new_key] = new_val 

### Plotting data

In [None]:
## STEP 1

%cd \Users\\aleja\\workspace\\actual\\prokaryotic_data\pdf_maker

In [None]:
# create a graph and save as ".png"
%matplotlib inline
from PIL import Image

def plot_tpc(strain_list):  # here strain_id is a key from 'strain_dic'
    ##############################
       
    for i in range(len(strain_list)):
        strain_name = strain_list[i]
        data_strain = strain_dic[strain_name]
        fig = plt.figure(num=None, figsize=(8, 6), dpi=300, edgecolor='k')
        x = data_strain['Temperature']
        y = data_strain['TraitValue']
        ax = fig.add_subplot(111)
        ax.plot(x, y, 'ro')
        ax.set_xlabel("Temperature")
        ax.set_ylabel(data_strain.iloc[1, 1] + " in " + str(db.iloc[1, 3]))
        fig.savefig(strain_name + ".png") 
        
        im_path = strain_name + '.png'
        im = Image.open(im_path)        
              
        rgb_im = im.convert('RGB')
        rgb_im.save(strain_name + '.jpg')
    return('Nais')
    
    ###############################

plot_tpc(list_strain)

In [None]:
# create function to make pdf with image inside
from reportlab.pdfgen import canvas  

def make_pdf(strain_list):
    
    ####################################
    
    for i in range(len(strain_list)):
        strain_name = strain_list[i]
        data_strain = strain_dic[strain_name]
        filename = strain_name + '.pdf'
        title_small = data_strain.iloc[1,-3] + ' TPC'
        title = title_small.upper()
        subtitle = "The temperature dependence of " + str(data_strain.iloc[1, 1]) + " in " + str(data_strain.iloc[1, 7]) + " " + str(data_strain.iloc[1, -1])
        pdf = canvas.Canvas(filename)
        pdf.setTitle(filename)
    
    # insert title and subtitle in file
        pdf.drawCentredString(300,770,title)  
        pdf.drawCentredString(290, 720, subtitle)
    
    # add line separating
        pdf.line(30, 710, 550, 710)

    #add image
        im = strain_name + '.jpg'
        pdf.drawInlineImage(im, x=0, y=240, width=(round(800/1.3)),height=(round(600/1.3)))
        pdf.save()
    return ('Nais')
    ######################################


make_pdf(list_strain)