# Aminoacid properties

In [1]:
from Bio.SeqUtils.ProtParam import ProteinAnalysis
import pandas as pd

## Manually inserted data

In [2]:
amino = {'A': (88.6,'Non-polar, aliphatic'),
         'R': (173.4,'Positively charged'),
         'N': (114.1,'Polar, non-charged'),
         'D': (111.1,'Negatively charged'),
         'C': (108.5, 'Polar, non-charged'),
         'Q': (143.8,'Polar, non-charged'),
         'E': (138.4,'Negatively charged'),
         'G': (60.1,'Non-polar, aliphatic'),
         'H': (153.2, 'Positively charged'),
         'I': (166.7,'Non-polar, aliphatic'),
         'L': (166.7,'Non-polar, aliphatic'),
         'K': (168.6,'Positively charged'),
         'M': (162.9,'Polar, non-charged'),
         'F': (189.9,'Aromatic'),
         'P': (112.7,'Non-polar, aliphatic'),
         'S': (89.0,'Polar, non-charged'),
         'T': (116.1,'Polar, non-charged'),
         'W': (227.8,'Aromatic'),
         'Y': (193.6,'Polar, Aromatic'),
         'V': (140.0, 'Non-polar, aliphatic')}
props = ['class', 'gravy', 'isoelectric_point', 'charge_at_pH', 'volume']

## Data from Biopython

In [3]:
# Building dataframe
df = pd.DataFrame(columns=props, index=amino.keys())
for aa, data in amino.items():
    df['volume'][aa] = data[0]
    df['class'][aa] = data[1]
    X = ProteinAnalysis(aa)
    df['gravy'][aa] = X.gravy()
    df['isoelectric_point'][aa] = X.isoelectric_point()
    df['charge_at_pH'][aa] = X.charge_at_pH(7)
df

Unnamed: 0,class,gravy,isoelectric_point,charge_at_pH,volume
A,"Non-polar, aliphatic",1.8,5.570017,-0.204125,88.6
R,Positively charged,-4.5,9.750021,0.760092,173.4
N,"Polar, non-charged",-3.5,5.525,-0.239898,114.1
D,Negatively charged,-3.5,4.299381,-1.235597,111.1
C,"Polar, non-charged",2.5,5.518123,-0.249799,108.5
Q,"Polar, non-charged",-3.5,5.525,-0.239898,143.8
E,Negatively charged,-3.5,4.599264,-1.157935,138.4
G,"Non-polar, aliphatic",-0.4,5.525,-0.239898,60.1
H,Positively charged,-3.2,6.741127,-0.152724,153.2
I,"Non-polar, aliphatic",4.5,5.525,-0.239898,166.7


In [4]:
df

Unnamed: 0,class,gravy,isoelectric_point,charge_at_pH,volume
A,"Non-polar, aliphatic",1.8,5.570017,-0.204125,88.6
R,Positively charged,-4.5,9.750021,0.760092,173.4
N,"Polar, non-charged",-3.5,5.525,-0.239898,114.1
D,Negatively charged,-3.5,4.299381,-1.235597,111.1
C,"Polar, non-charged",2.5,5.518123,-0.249799,108.5
Q,"Polar, non-charged",-3.5,5.525,-0.239898,143.8
E,Negatively charged,-3.5,4.599264,-1.157935,138.4
G,"Non-polar, aliphatic",-0.4,5.525,-0.239898,60.1
H,Positively charged,-3.2,6.741127,-0.152724,153.2
I,"Non-polar, aliphatic",4.5,5.525,-0.239898,166.7


## Saving Dataframe

In [5]:
aa_props_df_path = '../data/aa_props_dataframe.pkl'
df.to_pickle(aa_props_df_path)

Testing pickled file

In [6]:
teste = pd.read_pickle(aa_props_df_path)
teste

Unnamed: 0,class,gravy,isoelectric_point,charge_at_pH,volume
A,"Non-polar, aliphatic",1.8,5.570017,-0.204125,88.6
R,Positively charged,-4.5,9.750021,0.760092,173.4
N,"Polar, non-charged",-3.5,5.525,-0.239898,114.1
D,Negatively charged,-3.5,4.299381,-1.235597,111.1
C,"Polar, non-charged",2.5,5.518123,-0.249799,108.5
Q,"Polar, non-charged",-3.5,5.525,-0.239898,143.8
E,Negatively charged,-3.5,4.599264,-1.157935,138.4
G,"Non-polar, aliphatic",-0.4,5.525,-0.239898,60.1
H,Positively charged,-3.2,6.741127,-0.152724,153.2
I,"Non-polar, aliphatic",4.5,5.525,-0.239898,166.7
