In [2]:
import sys
import os
import pandas as pd
import numpy as np
from Bio import SeqIO
import re

In [3]:
dataset = 'SFR2015'

print('Loading raw data for', dataset, '...')
data = pd.read_csv('/Users/maryamkoddus/Documents/maryam-ko-QMUL-MSc-Project/01_input_data/raw_data/aab3138_table_s2.csv', header=0)
print('Raw data loaded.')
data

Loading raw data for SFR2015 ...
Raw data loaded.


Unnamed: 0,sequence,modsites,ref,charge,xcorr_max,dcn_max,ppm_min,localization_max,localization_min,HL_log2_ratio_avg_a,HL_log2_ratio_std_a,HL_log2_ratio_avg_b,HL_log2_ratio_std_b,HL_log2_ratio_avg_c,HL_log2_ratio_std_c,average,pvlaue
0,R.S#SSSLLAS#PGHISVK.E,S736:S743,sp|A0FGR8|ESYT2_HUMAN,23,4.04,0.32,4.73,0.93,,-0.78,0.00,-0.48,0.00,-0.52,0.69,-0.60,0.02
1,R.SSSS#LLAS#PGHISVK.E,S739:S743,sp|A0FGR8|ESYT2_HUMAN,23,5.72,0.44,4.35,0.99,,-0.05,0.32,-0.09,0.59,-0.87,0.16,-0.34,0.33
2,R.SSSSLLAS#PGHISVK.E,S743,sp|A0FGR8|ESYT2_HUMAN,23,5.71,0.49,4.29,1.00,,-0.01,0.26,0.68,0.00,0.08,0.46,0.25,0.37
3,R.SSSS#LLASPGHISVK.E,S739,sp|A0FGR8|ESYT2_HUMAN,3,4.50,0.42,4.01,0.94,0.25,-0.04,0.00,,,0.13,0.00,0.05,0.68
4,R.SS#SSLLAS#PGHISVK.E,S737:S743,sp|A0FGR8|ESYT2_HUMAN,23,4.42,0.43,5.05,0.95,,0.20,0.00,-0.48,0.49,,,-0.14,0.76
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23905,K.DMASLPSES#NEPK.R,S742,sp|Q9Y6Y8|S23IP_HUMAN,2,2.98,0.45,5.67,0.50,0.50,,,0.52,0.00,1.27,0.00,0.90,0.25
23906,K.DM*ASLPSES#NEPK.R,S742,sp|Q9Y6Y8|S23IP_HUMAN,23,2.47,0.28,4.57,0.99,,0.38,0.12,1.19,1.10,-0.15,0.17,0.47,0.35
23907,K.DM*ASLPS#ESNEPK.R,S740,sp|Q9Y6Y8|S23IP_HUMAN,2,3.47,0.40,3.78,1.00,0.93,0.26,0.00,2.29,0.00,0.02,0.00,0.86,0.36
23908,K.DM*ASLPSES#NEPKR.K,S742,sp|Q9Y6Y8|S23IP_HUMAN,23,3.96,0.44,4.89,0.93,0.50,1.56,0.00,-0.11,0.00,-0.30,0.00,0.38,0.58


In [4]:
data['Amino acid'] = data['modsites'].str.extract(r'([A-Z])')  # Extracts the amino acid letter
data['Position'] = data['modsites'].str.extract(r'(\d+)')  # Extracts the position number
data['Gene Name'] = data['ref'].str.split('|').str[2].str.split('_').str[0]  # Extracts gene name (before '_')
print(data[['sequence', 'modsites', 'ref', 'Amino acid', 'Position', 'Gene Name']])


                    sequence   modsites                    ref Amino acid  \
0      R.S#SSSLLAS#PGHISVK.E  S736:S743  sp|A0FGR8|ESYT2_HUMAN          S   
1      R.SSSS#LLAS#PGHISVK.E  S739:S743  sp|A0FGR8|ESYT2_HUMAN          S   
2       R.SSSSLLAS#PGHISVK.E       S743  sp|A0FGR8|ESYT2_HUMAN          S   
3       R.SSSS#LLASPGHISVK.E       S739  sp|A0FGR8|ESYT2_HUMAN          S   
4      R.SS#SSLLAS#PGHISVK.E  S737:S743  sp|A0FGR8|ESYT2_HUMAN          S   
...                      ...        ...                    ...        ...   
23905     K.DMASLPSES#NEPK.R       S742  sp|Q9Y6Y8|S23IP_HUMAN          S   
23906    K.DM*ASLPSES#NEPK.R       S742  sp|Q9Y6Y8|S23IP_HUMAN          S   
23907    K.DM*ASLPS#ESNEPK.R       S740  sp|Q9Y6Y8|S23IP_HUMAN          S   
23908   K.DM*ASLPSES#NEPKR.K       S742  sp|Q9Y6Y8|S23IP_HUMAN          S   
23909     K.DMAS#LPSESNEPK.R       S737  sp|Q9Y6Y8|S23IP_HUMAN          S   

      Position Gene Name  
0          736     ESYT2  
1          739     ES

In [5]:
# filter data to keep only those with localization probability >= 0.85
data = data[data['localization_max'] >= 0.85] 
data = data[data['localization_min'] >= 0.85] 


# Filtering out semi-colons from 'Amino acid', 'Positions within proteins', and 'Gene names' columns
data = data[~data['Amino acid'].str.contains(';', na=False)]
data = data[~data['Position'].str.contains(';', na=False)]
data = data[~data['Gene Name'].str.contains(';', na=False)]
data

Unnamed: 0,sequence,modsites,ref,charge,xcorr_max,dcn_max,ppm_min,localization_max,localization_min,HL_log2_ratio_avg_a,HL_log2_ratio_std_a,HL_log2_ratio_avg_b,HL_log2_ratio_std_b,HL_log2_ratio_avg_c,HL_log2_ratio_std_c,average,pvlaue,Amino acid,Position,Gene Name
10,R.SS#SSLLASPGHISVK.E,S737,sp|A0FGR8|ESYT2_HUMAN,23,3.04,0.22,5.26,0.87,0.87,,,,,-0.90,0.00,-0.90,,S,737,ESYT2
26,K.VTFQPPSS#IGCR.K,S562,sp|A0MZ66|SHOT1_HUMAN,2,2.52,0.34,5.46,0.94,0.93,,,-0.91,0.00,-0.01,0.00,-0.46,0.49,S,562,SHOT1
32,K.TLEAEFNSPS#PPT#PEPGEGPR.K,S534:T537,sp|A0MZ66|SHOT1_HUMAN,23,4.85,0.37,6.36,1.00,1.00,0.27,0.00,,,,,0.27,,S,534,SHOT1
37,R.GELEPQLGS#PQQKPK.R,S1707,sp|A3KMH1|VWA8_HUMAN,3,3.70,0.32,5.44,1.00,1.00,-0.32,0.00,-0.95,0.00,-0.39,0.00,-0.55,0.11,S,1707,VWA8
38,R.VVSNDDGS#ISYESR.S,S904,sp|A3KN83|SBNO1_HUMAN,2,4.24,0.41,6.00,1.00,1.00,0.11,0.00,0.47,0.00,0.69,0.00,0.42,0.13,S,904,SBNO1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23886,R.SVAVS#DEEEVEEEAERR.K,S743,sp|Q9Y6X9|MORC2_HUMAN,3,3.94,0.29,5.27,1.00,1.00,,,-1.37,0.00,,,-1.37,,S,743,MORC2
23901,K.QVVEAEKVVES#PDFSKDEDYLGK.V,S926,sp|Q9Y6Y8|S23IP_HUMAN,3,5.35,0.40,6.32,1.00,0.99,2.80,0.00,0.83,0.00,2.48,0.00,2.04,0.08,S,926,S23IP
23902,K.VVES#PDFSKDEDYLGK.V,S926,sp|Q9Y6Y8|S23IP_HUMAN,23,4.82,0.51,4.67,1.00,1.00,1.06,0.51,0.76,1.08,1.05,0.46,0.95,0.01,S,926,S23IP
23904,K.DMASLPS#ESNEPK.R,S740,sp|Q9Y6Y8|S23IP_HUMAN,2,3.15,0.39,6.39,0.94,0.86,,,0.52,0.00,1.27,0.00,0.90,0.25,S,740,S23IP


In [6]:
# filter data
data['sequence'] = data['sequence'].str.replace('_', '')
data['sequence'] = data['sequence'].str.replace(r'[#.]', '', regex=True)
data

Unnamed: 0,sequence,modsites,ref,charge,xcorr_max,dcn_max,ppm_min,localization_max,localization_min,HL_log2_ratio_avg_a,HL_log2_ratio_std_a,HL_log2_ratio_avg_b,HL_log2_ratio_std_b,HL_log2_ratio_avg_c,HL_log2_ratio_std_c,average,pvlaue,Amino acid,Position,Gene Name
10,RSSSSLLASPGHISVKE,S737,sp|A0FGR8|ESYT2_HUMAN,23,3.04,0.22,5.26,0.87,0.87,,,,,-0.90,0.00,-0.90,,S,737,ESYT2
26,KVTFQPPSSIGCRK,S562,sp|A0MZ66|SHOT1_HUMAN,2,2.52,0.34,5.46,0.94,0.93,,,-0.91,0.00,-0.01,0.00,-0.46,0.49,S,562,SHOT1
32,KTLEAEFNSPSPPTPEPGEGPRK,S534:T537,sp|A0MZ66|SHOT1_HUMAN,23,4.85,0.37,6.36,1.00,1.00,0.27,0.00,,,,,0.27,,S,534,SHOT1
37,RGELEPQLGSPQQKPKR,S1707,sp|A3KMH1|VWA8_HUMAN,3,3.70,0.32,5.44,1.00,1.00,-0.32,0.00,-0.95,0.00,-0.39,0.00,-0.55,0.11,S,1707,VWA8
38,RVVSNDDGSISYESRS,S904,sp|A3KN83|SBNO1_HUMAN,2,4.24,0.41,6.00,1.00,1.00,0.11,0.00,0.47,0.00,0.69,0.00,0.42,0.13,S,904,SBNO1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23886,RSVAVSDEEEVEEEAERRK,S743,sp|Q9Y6X9|MORC2_HUMAN,3,3.94,0.29,5.27,1.00,1.00,,,-1.37,0.00,,,-1.37,,S,743,MORC2
23901,KQVVEAEKVVESPDFSKDEDYLGKV,S926,sp|Q9Y6Y8|S23IP_HUMAN,3,5.35,0.40,6.32,1.00,0.99,2.80,0.00,0.83,0.00,2.48,0.00,2.04,0.08,S,926,S23IP
23902,KVVESPDFSKDEDYLGKV,S926,sp|Q9Y6Y8|S23IP_HUMAN,23,4.82,0.51,4.67,1.00,1.00,1.06,0.51,0.76,1.08,1.05,0.46,0.95,0.01,S,926,S23IP
23904,KDMASLPSESNEPKR,S740,sp|Q9Y6Y8|S23IP_HUMAN,2,3.15,0.39,6.39,0.94,0.86,,,0.52,0.00,1.27,0.00,0.90,0.25,S,740,S23IP


In [7]:
def match_seq_to_genename(dataset, seq_column):
    '''
    Maps amino acid sequences to gene names using the loaded fasta file.
    
    args:
    =====
    dataset: <pd.Dataframe> with a column of amino acid sequences
    seq_column: <str> column name containing amino acid sequences
    
    out:
    ====
    dataset: <pd.Dataframe> with an additional column containing gene names
    '''    

    fasta_sequence = list(SeqIO.parse(open(f'/Users/maryamkoddus/Documents/maryam-ko-QMUL-MSc-Project/01_input_data/raw_data/UP000005640_9606.fasta'), "fasta"))
    
    
    gene_dict = {}
    
    # iterate over rows in seq_column
    for i in dataset[seq_column]:
        print(i)
        i_str = str(i)
        for seq_record in fasta_sequence:
            matches = re.findall(i_str, str(seq_record.seq))
            if matches:
                print(f"Match found for sequence: {seq_record}")
                gene_name_match = re.search(r"GN=(\w+)", seq_record.description)
                print('Gene name match:', gene_name_match)
                # gene_name_match = re.search("GN=(\w+)", seq_record.description)
                if gene_name_match:
                    gene_name = gene_name_match.group(1)
                    gene_dict[i] = gene_name
                    print(f"Match found: {i_str} -> {gene_name}")
                else: 
                    print(f"No gene name found in description for sequence: {i_str}")
    
    # map sequences to gene names           
    dataset['GeneName'] = dataset[seq_column].map(gene_dict) 
    print('Amino acid sequences matched to gene names.')
    return dataset 

In [8]:
data = match_seq_to_genename(data, 'sequence')

RSSSSLLASPGHISVKE
Match found for sequence: ID: sp|A0FGR8|ESYT2_HUMAN
Name: sp|A0FGR8|ESYT2_HUMAN
Description: sp|A0FGR8|ESYT2_HUMAN Extended synaptotagmin-2 OS=Homo sapiens OX=9606 GN=ESYT2 PE=1 SV=1
Number of features: 0
Seq('MTANRDAALSSHRHPGCAQRPRTPTFASSSQRRSAFGFDDGNFPGLGERSHAPG...AMT')
Gene name match: <re.Match object; span=(71, 79), match='GN=ESYT2'>
Match found: RSSSSLLASPGHISVKE -> ESYT2
KVTFQPPSSIGCRK
Match found for sequence: ID: sp|A0MZ66|SHOT1_HUMAN
Name: sp|A0MZ66|SHOT1_HUMAN
Description: sp|A0MZ66|SHOT1_HUMAN Shootin-1 OS=Homo sapiens OX=9606 GN=SHTN1 PE=1 SV=4
Number of features: 0
Seq('MNSSDEEKQLQLITSLKEQAIGEYEDLRAENQKTKEKCDKIRQERDEAVKKLEE...SNC')
Gene name match: <re.Match object; span=(56, 64), match='GN=SHTN1'>
Match found: KVTFQPPSSIGCRK -> SHTN1
KTLEAEFNSPSPPTPEPGEGPRK
Match found for sequence: ID: sp|A0MZ66|SHOT1_HUMAN
Name: sp|A0MZ66|SHOT1_HUMAN
Description: sp|A0MZ66|SHOT1_HUMAN Shootin-1 OS=Homo sapiens OX=9606 GN=SHTN1 PE=1 SV=4
Number of features: 0
Seq('MNSS

In [9]:
data['Phosphosite'] = data['Amino acid'].astype(str) + '(' + data['Position'].astype(str) + ')'
data

Unnamed: 0,sequence,modsites,ref,charge,xcorr_max,dcn_max,ppm_min,localization_max,localization_min,HL_log2_ratio_avg_a,...,HL_log2_ratio_std_b,HL_log2_ratio_avg_c,HL_log2_ratio_std_c,average,pvlaue,Amino acid,Position,Gene Name,GeneName,Phosphosite
10,RSSSSLLASPGHISVKE,S737,sp|A0FGR8|ESYT2_HUMAN,23,3.04,0.22,5.26,0.87,0.87,,...,,-0.90,0.00,-0.90,,S,737,ESYT2,ESYT2,S(737)
26,KVTFQPPSSIGCRK,S562,sp|A0MZ66|SHOT1_HUMAN,2,2.52,0.34,5.46,0.94,0.93,,...,0.00,-0.01,0.00,-0.46,0.49,S,562,SHOT1,SHTN1,S(562)
32,KTLEAEFNSPSPPTPEPGEGPRK,S534:T537,sp|A0MZ66|SHOT1_HUMAN,23,4.85,0.37,6.36,1.00,1.00,0.27,...,,,,0.27,,S,534,SHOT1,SHTN1,S(534)
37,RGELEPQLGSPQQKPKR,S1707,sp|A3KMH1|VWA8_HUMAN,3,3.70,0.32,5.44,1.00,1.00,-0.32,...,0.00,-0.39,0.00,-0.55,0.11,S,1707,VWA8,VWA8,S(1707)
38,RVVSNDDGSISYESRS,S904,sp|A3KN83|SBNO1_HUMAN,2,4.24,0.41,6.00,1.00,1.00,0.11,...,0.00,0.69,0.00,0.42,0.13,S,904,SBNO1,SBNO1,S(904)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23886,RSVAVSDEEEVEEEAERRK,S743,sp|Q9Y6X9|MORC2_HUMAN,3,3.94,0.29,5.27,1.00,1.00,,...,0.00,,,-1.37,,S,743,MORC2,MORC2,S(743)
23901,KQVVEAEKVVESPDFSKDEDYLGKV,S926,sp|Q9Y6Y8|S23IP_HUMAN,3,5.35,0.40,6.32,1.00,0.99,2.80,...,0.00,2.48,0.00,2.04,0.08,S,926,S23IP,SEC23IP,S(926)
23902,KVVESPDFSKDEDYLGKV,S926,sp|Q9Y6Y8|S23IP_HUMAN,23,4.82,0.51,4.67,1.00,1.00,1.06,...,1.08,1.05,0.46,0.95,0.01,S,926,S23IP,SEC23IP,S(926)
23904,KDMASLPSESNEPKR,S740,sp|Q9Y6Y8|S23IP_HUMAN,2,3.15,0.39,6.39,0.94,0.86,,...,0.00,1.27,0.00,0.90,0.25,S,740,S23IP,SEC23IP,S(740)


In [10]:
print(data.columns)

Index(['sequence', 'modsites', 'ref', 'charge', 'xcorr_max', 'dcn_max',
       'ppm_min', 'localization_max', 'localization_min',
       'HL_log2_ratio_avg_a', 'HL_log2_ratio_std_a', 'HL_log2_ratio_avg_b',
       'HL_log2_ratio_std_b', 'HL_log2_ratio_avg_c', 'HL_log2_ratio_std_c',
       'average', 'pvlaue', 'Amino acid', 'Position', 'Gene Name', 'GeneName',
       'Phosphosite'],
      dtype='object')


In [11]:
print(data.isna().sum())

sequence                  0
modsites                  0
ref                       0
charge                    0
xcorr_max                 0
dcn_max                   0
ppm_min                   0
localization_max          0
localization_min          0
HL_log2_ratio_avg_a    1617
HL_log2_ratio_std_a    1617
HL_log2_ratio_avg_b    1414
HL_log2_ratio_std_b    1414
HL_log2_ratio_avg_c    1297
HL_log2_ratio_std_c    1297
average                   0
pvlaue                 1493
Amino acid                0
Position                  0
Gene Name                 0
GeneName                280
Phosphosite               0
dtype: int64


In [12]:
# Keep only 'Phosphosite' and ratio columns
keepcols = ['Phosphosite'] + ['GeneName'] + [col for col in data.columns if 'HL_log2_ratio_avg_' in col or 'HL_log2_ratio_std_' in col]
data = data[keepcols]
data

Unnamed: 0,Phosphosite,GeneName,HL_log2_ratio_avg_a,HL_log2_ratio_std_a,HL_log2_ratio_avg_b,HL_log2_ratio_std_b,HL_log2_ratio_avg_c,HL_log2_ratio_std_c
10,S(737),ESYT2,,,,,-0.90,0.00
26,S(562),SHTN1,,,-0.91,0.00,-0.01,0.00
32,S(534),SHTN1,0.27,0.00,,,,
37,S(1707),VWA8,-0.32,0.00,-0.95,0.00,-0.39,0.00
38,S(904),SBNO1,0.11,0.00,0.47,0.00,0.69,0.00
...,...,...,...,...,...,...,...,...
23886,S(743),MORC2,,,-1.37,0.00,,
23901,S(926),SEC23IP,2.80,0.00,0.83,0.00,2.48,0.00
23902,S(926),SEC23IP,1.06,0.51,0.76,1.08,1.05,0.46
23904,S(740),SEC23IP,,,0.52,0.00,1.27,0.00


In [13]:
def create_phos_ID(dataset):
    '''
    Concatenates GeneName and Phosphosite columns.
    
    args:
    =====
    dataset: <pd.Dataframe> with columns 'GeneName' and 'Phosphosite'
    
    out:
    ====
    dataset: <pd.Dataframe> with 'phosphosite_ID' column and 'GeneName' + 'Phosphosite' columns dropped
    '''
    dataset.loc[:, 'phosphosite_ID'] = dataset['GeneName'].astype(str) + '_' + dataset['Phosphosite'].astype(str)
    dataset = dataset.drop(columns=['Phosphosite', 'GeneName'])
    print('Phosphosite IDs created.')
    return dataset

data = create_phos_ID(data) # call function to create phosphosite_ID column

print('Phosphosite IDs created.')

Phosphosite IDs created.
Phosphosite IDs created.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataset.loc[:, 'phosphosite_ID'] = dataset['GeneName'].astype(str) + '_' + dataset['Phosphosite'].astype(str)


In [14]:
def clean_phosID_col(data):
    data = data[~data.phosphosite_ID.str.contains('nan', case=False, na=False)]
    data = data[~data.phosphosite_ID.str.contains(';', case=False, na=False)]
    data = data[~data.phosphosite_ID.str.contains('-', case=False, na=False)]

    # Add this line to remove decimals from phosphosite_ID (e.g., S123.0 -> S123)
    data['phosphosite_ID'] = data['phosphosite_ID'].apply(lambda x: re.sub(r'\((\d+)\.0+\)', r'(\1)', x))
    
    data_grouped = data.groupby(by='phosphosite_ID')
    
    if len(data) != len(data_grouped):
        numeric_cols = data.select_dtypes(include=[np.number]).columns.tolist()
        non_numeric_cols = data.columns.difference(numeric_cols + ['phosphosite_ID']).tolist()
        data_numeric = data_grouped[numeric_cols].mean()
        data_categorical = data_grouped[non_numeric_cols].first().reset_index()
        
        # Merge numeric and non-numeric parts
        data = pd.merge(data_categorical, data_numeric, on='phosphosite_ID')
        print('Phosphosites with multiple measurements have been averaged')
    else:
        print('There are no phosphosites with multiple measurements')

    # Replace inf values with NaNs
    data = data.replace([np.inf, -np.inf], np.nan)
    
    # Ensure phosphosite_ID is first column
    if data.columns[0] != 'phosphosite_ID':
        phosphosite_ID = data.pop('phosphosite_ID')
        data.insert(0, 'phosphosite_ID', phosphosite_ID)

    return data

In [15]:
data = clean_phosID_col(data)
print("After cleaning phosphosite_ID column:")
data

Phosphosites with multiple measurements have been averaged
After cleaning phosphosite_ID column:


Unnamed: 0,phosphosite_ID,HL_log2_ratio_avg_a,HL_log2_ratio_std_a,HL_log2_ratio_avg_b,HL_log2_ratio_std_b,HL_log2_ratio_avg_c,HL_log2_ratio_std_c
0,AAAS_T(57),1.170,0.00,0.170,0.00,0.370,0.000
1,AAK1_S(14),-1.160,0.00,,,,
2,AAK1_T(389),-2.950,0.00,,,,
3,AAK1_T(620),-1.510,0.00,,,,
4,AARSD1_S(174),0.880,0.35,0.810,0.00,0.750,0.640
...,...,...,...,...,...,...,...
4079,ZYX_S(169),-0.490,0.00,-0.630,0.00,,
4080,ZYX_S(267),-0.085,0.04,2.255,0.00,-0.705,0.295
4081,ZYX_S(281),0.600,1.16,-0.100,0.16,0.840,0.910
4082,ZYX_T(179),-1.620,0.74,0.030,0.48,0.430,0.210


In [16]:
data.to_csv(f'/Users/maryamkoddus/Documents/maryam-ko-QMUL-MSc-Project/01_input_data/PreprocessedDatasets/SFR2015.csv', index=False)


print(dataset, 'has been saved to CSV successfully!', data)

SFR2015 has been saved to CSV successfully!      phosphosite_ID  HL_log2_ratio_avg_a  HL_log2_ratio_std_a  \
0        AAAS_T(57)                1.170                 0.00   
1        AAK1_S(14)               -1.160                 0.00   
2       AAK1_T(389)               -2.950                 0.00   
3       AAK1_T(620)               -1.510                 0.00   
4     AARSD1_S(174)                0.880                 0.35   
...             ...                  ...                  ...   
4079     ZYX_S(169)               -0.490                 0.00   
4080     ZYX_S(267)               -0.085                 0.04   
4081     ZYX_S(281)                0.600                 1.16   
4082     ZYX_T(179)               -1.620                 0.74   
4083     ZYX_T(306)               -2.130                 0.00   

      HL_log2_ratio_avg_b  HL_log2_ratio_std_b  HL_log2_ratio_avg_c  \
0                   0.170                 0.00                0.370   
1                     NaN        