# Data Preprocessing

This notebook covers the steps for preprocessing the mass spectrometry data.

In [1]:
# Import necessary libraries
import pandas as pd
from src.data.load_data import load_csv
from src.data.preprocess_data import clean_data, normalize_data, filter_data

## Load Data

In [2]:
# Load CSV data
csv_path = '../data/raw/sample.csv'
df_csv = load_csv(csv_path)
df_csv.head()

## Clean Data

In [3]:
# Clean the data by handling missing values and duplicates
df_clean = clean_data(df_csv)
df_clean.head()

## Normalize Data

In [4]:
# Normalize the data using z-score normalization
df_normalized = normalize_data(df_clean, method='z-score')
df_normalized.head()

## Filter Data

In [5]:
# Filter the data by removing rows with values below a threshold
df_filtered = filter_data(df_normalized, threshold=0.0)
df_filtered.head()

## Save Processed Data

In [6]:
# Save the processed data to a new CSV file
processed_path = '../data/processed/processed_sample.csv'
df_filtered.to_csv(processed_path, index=False)