In [1]:
import numpy
import os

####################################################################################################################

def open_xyz(xyz_filename):
    """
    This function opens an xyz file, seperates the coordinates and the symbols and recasts the coordinates as floats.
    this function returns two thinds: symbols and coordinates
    """
    xyz_file = numpy.genfromtxt(fname=xyz_filename, skip_header=2, dtype='unicode')
    symbols = xyz_file[:, 0]
    coordinates = xyz_file[:, 1:]
    coordinates = coordinates.astype(numpy.float)
    return symbols, coordinates

####################################################################################################################

def calculate_distance(atom1_coord, atom2_coord):
    """
    Calculates the distance between two points in 3D space.
    Inputs: coordinates of two atoms
    Return: distance between the atoms
    """
    x_distance = atom1_coord[0] - atom2_coord[0]
    y_distance = atom1_coord[1] - atom2_coord[1]
    z_distance = atom1_coord[2] - atom2_coord[2]
    bond_length_12 = numpy.sqrt(x_distance ** 2 + y_distance ** 2 + z_distance ** 2)
    return bond_length_12

####################################################################################################################

def bond_check(distance, min_val=0, max_val=1.5):
    """
    Check if a distance is a bond based on a minimum and maximum bond length
    """
    if distance>min_val and distance<=max_val:
        return True
    else:
        return False
####################################################################################################################

In [362]:
import pandas as pd
# https://medium.com/dunder-data/selecting-subsets-of-data-in-pandas-6fcd0170be9c

meta_data = pd.read_csv("./data/r_metadata_AMV_200131.csv", index_col=0)
meta_data.head(5)

Unnamed: 0_level_0,Rf.values,X,Sample 1,Sample 2,Sample 3,Sample 4,Kon A,Kon B,Kon C
Sample,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
sample input,,[ul],47.0,57.0,68.0,46.0,,,
PC,1.06,[pmol],50.0,50.0,50.0,50.0,50.0,,
SM,1.35,[pmol],50.0,50.0,50.0,50.0,50.0,,
PE,1.74,[pmol],25.0,25.0,25.0,25.0,25.0,,
PS,1.13,[pmol],25.0,25.0,25.0,25.0,25.0,,


In [186]:
meta_data.loc['Chol']

Rf.values       NaN
X            [pmol]
sample 9        NaN
sample 10       NaN
sample 11       NaN
sample 12       NaN
Kon.A           NaN
Kon.B            50
Kon.C           100
Name: Chol, dtype: object

In [189]:
meta_data.loc[:, 'sample 9']

Sample
sample input     47.0
PC               50.0
SM               50.0
PE               25.0
PS               25.0
PI               30.0
PG               10.0
PA               10.0
HexCer            5.0
Cer               5.0
Hex2Cer           5.0
DAG              25.0
TAG              24.0
CE               25.0
LPC               5.0
CA              100.0
Chol              NaN
plPE16-0          NaN
plPE18-0          NaN
plPE18-1          NaN
Name: sample 9, dtype: float64

In [358]:
LV_file = os.path.join('data', '200131_XtrIL_AMV_SM.txt')

In [374]:
col_names = list(pd.read_table(LV_file, skiprows=1,
                             usecols = lambda column : 'Unnamed' not in column))
lipid_data = pd.read_table(LV_file, skiprows=2, index_col=0, 
                        usecols = lambda column : column not in ['Sample ID', 'PIS m/z', 'Polarity', 'View Type', '(ScanName)'])

lipid_data.columns = col_names[1:]
lipid_data.head(5)

Unnamed: 0_level_0,Sample 1,Sample 2,Sample 3,Sample 4,Kon A
LipidName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
IS SM 31:1;2,16918390.0,16480000.0,19617000.0,11470650.0,9783000.0
IS SM 35:1;2,24308110.0,20993120.0,20665120.0,16448730.0,18196670.0
IS SM 43:1;2,11466930.0,10470340.0,10359050.0,7902165.0,7947878.0
SM 34:2;2,383947.0,343446.1,480206.1,447483.5,0.0
SM 34:1;2,4298558.0,3181287.0,7392958.0,6834820.0,41125.0


In [408]:
print(lipid_data.index.values)
print(lipid_data.columns.values)

['IS SM 31:1;2' 'IS SM 35:1;2' 'IS SM 43:1;2' 'SM 34:2;2' 'SM 34:1;2'
 'SM 34:0;2' 'SM 36:2;2' 'SM 36:1;2' 'SM 36:0;2' 'SM 38:2;2' 'SM 38:1;2'
 'SM 38:0;2' 'SM 40:2;2' 'SM 40:1;2' 'SM 40:0;2' 'SM 42:3;2' 'SM 42:2;2'
 'SM 42:1;2' 'SM 42:0;2']
['Sample 1' 'Sample 2' 'Sample 3' 'Sample 4' 'Kon A']


In [430]:
def open_LVfile(LVfile):
    """
    This function opens a single text file, removes redundant columns and renames the columns.
    this function returns cleaner table.
    """
    col_names = list(pd.read_table(LVfile, skiprows=1, usecols = lambda column : 'Unnamed' not in column))
    lipid_data = pd.read_table(LVfile, skiprows=2, index_col=0, usecols = lambda column : column not in ['Sample ID', 'PIS m/z', 'Polarity', 'View Type', '(ScanName)'])
    lipid_data.columns = col_names[1:]
    return lipid_data

In [448]:
LV_file = os.path.join('data', '200131_XtrIL_AMV_SM.txt')
open_LVfile(LV_file)

Unnamed: 0_level_0,Sample 1,Sample 2,Sample 3,Sample 4,Kon A
LipidName,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
IS SM 31:1;2,16918390.0,16480000.0,19617000.0,11470650.0,9783000.0
IS SM 35:1;2,24308110.0,20993120.0,20665120.0,16448730.0,18196670.0
IS SM 43:1;2,11466930.0,10470340.0,10359050.0,7902165.0,7947878.0
SM 34:2;2,383947.0,343446.1,480206.1,447483.5,0.0
SM 34:1;2,4298558.0,3181287.0,7392958.0,6834820.0,41125.0
SM 34:0;2,942596.0,678063.5,422822.8,366594.9,102873.2
SM 36:2;2,241021.9,218398.1,85287.27,111209.9,32038.09
SM 36:1;2,572021.8,505689.2,94039.46,243262.0,22295.51
SM 36:0;2,0.0,0.0,0.0,0.0,10228.71
SM 38:2;2,20922.47,114711.9,66541.91,21313.24,9799.904


In [446]:
import glob 
file_location = os.path.join('data', '*.txt')
filenames = glob.glob(file_location)

for i in range(0,len(filenames)):
    data = open_LVfile(filenames[i])
    print(data.head(1))

           Sample 1  Sample 2  Sample 3  Sample 4    Kon A     Kon B     Kon C
LipidName                                                                     
Chol       30724625  16071750  35192875  34937125  2477500  18795250  34101000
               Sample 1    Sample 2      Sample 3    Sample 4         Kon A
LipidName                                                                  
IS Chol 9:0  16869875.0  15516250.0  1.517075e+07  14463125.0  1.236293e+07
                Sample 1   Sample 2   Sample 3   Sample 4      Kon A
LipidName                                                           
IS Cer 32:1;2  7649000.0  6535500.0  6837875.0  5810750.0  3859250.0
             Sample 1  Sample 2  Sample 3  Sample 4   Kon A
LipidName                                                  
IS DAG 34:0   3111875   3493625   3389500   1725750  900375
                   Sample 1  Sample 2  Sample 3  Sample 4   Kon A
LipidName                                                        
IS Hex2Cer 35:1;