# Spectral Data Transformation

In [17]:
# Data transformation pipeline for raw mass spectrometry data
#
# Pseudocode:
#
# 1. Read datafile
# 2. Modify column headings
# 3. Clean and normalize data
# 4. Add additional columns
# 5. Save file
#
#


In [18]:
# Load libraries

import pandas as pd
import numpy as np
from array import *


In [25]:
# Load raw data
inFilename = '/datasets/spectral_data-csv/spectral_data.csv'
outFilename = '/datasets/spectral_data-csv/spectral_data_mod.csv'

rawFile=pd.read_csv(inFilename)

In [20]:
# Modify column headings

rawFile.columns = ["record","response_time","mass_charge","response_rate","response_width","response_group"]

In [21]:
# Clean data

modFile = rawFile.replace(np.nan, 0)

In [22]:
# Create normalized columns

modFile['norm_resp_rate'] = [element / max(modFile['response_rate']) for element in modFile['response_rate']]
modFile['norm_resp_width'] = [element / max(modFile['response_width']) for element in modFile['response_width']]


In [23]:
# Add response half-width intervals

modFile['response_low'] = modFile['response_time'] - modFile['response_width']/2
modFile['response_high'] = modFile['response_time'] + modFile['response_width']/2

In [26]:
modFile.to_csv(outFilename)