In [None]:
# the ! before our command lets us run things outside of python
!pip install pandas matplotlib
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# Basic see how the data is read from the csv
sec_data = pd.read_csv('SEC-chromatogram.csv', header=None)
print(sec_data)

In [None]:
# Removes the column numbering and uses the first row as header
sec_data = pd.read_csv('SEC-chromatogram.csv', header=0)
print(sec_data)

In [None]:
# Uses row 3 as the header, giving us ml and mAU as the header names
# Remember: Python is a 0 indexed programming language!
sec_data = pd.read_csv('SEC-chromatogram.csv', header=2)
print(sec_data)

In [None]:
### Create a figure for the chromatogram ###
## Show all the data

import matplotlib.pyplot as plt

plt.figure(figsize=(10, 10))                # Set the size of the figure
plt.plot(sec_data['ml'], sec_data['mAU'])   # Choose which data to plot
plt.xlim(0, 400)                            # X axis range
plt.ylim(0, 1000)                           # Y axis range

# Titles and labels
plt.xlabel('Elution Volume (mL)')
plt.ylabel('A280 (mAU)')
plt.title('SEC Chromatogram')

# Show your figure
plt.show()

# Zoom in on a specific region to better see the peaks #

### Try editing these values ###
X_START = 190
X_END = 255

Y_START = 0
Y_END = 600
#################################

plt.figure(figsize=(10, 10))
plt.plot(sec_data['ml'], sec_data['mAU'])
plt.xlabel(f'Elution Volume (mL)')
plt.ylabel('A280 (mAU)')
plt.title(f'SEC Chromatogram (Zoomed to {X_START}-{X_END})')
plt.xlim(X_START, X_END)
plt.ylim(Y_START, Y_END)
plt.show()

In [None]:
### Find the maxima of the largest 2 peaks

# Filter data between x=190 and x=220 for the first peak
peak1_data = sec_data[(sec_data['ml'] >= 190) & (sec_data['ml'] <= 220)]
peak1_max_mau = peak1_data['mAU'].max()
peak1_max_ml = sec_data.loc[peak1_data['mAU'].idxmax(), 'ml']

# Print the results
print(f"Peak 1 maximum: {peak1_max_mau} mAU at {peak1_max_ml} mL")


In [None]:
# Try to find the maximum of the second peak
peak2_data = 
peak2_max_mau = peak2_data['mAU'].max()
peak2_max_ml = sec_data.loc[peak2_data['mAU'].idxmax(), 'ml']

print(f"Peak 2 maximum: {peak2_max_mau} mAU at {peak2_max_ml} mL")

In [None]:
### Calculate peak areas to estimate the protein concentration
import numpy as np
import matplotlib.pyplot as plt

# Calculate area under peak 1
peak1_area = np.trapezoid(peak1_data['mAU'], peak1_data['ml'])

# Calculate area under peak 2
peak2_area = np.trapezoid(peak2_data['mAU'], peak2_data['ml'])

# Print the results
print(f"Peak 1 area: {peak1_area:.2f} mAU·mL")
print(f"Peak 2 area: {peak2_area:.2f} mAU·mL")


In [None]:
# Plot the chromatogram with shaded peak areas
plt.figure(figsize=(10, 10))
plt.plot(sec_data['ml'], sec_data['mAU'], label='SEC Chromatogram')
plt.xlim(100, 360)
plt.ylim(0, 600)

# This fills in the area 
# alpha is the transparency of the shaded areas
# :.2f prints the peak area in the legend to 2 decimal places
plt.fill_between(peak1_data['ml'], peak1_data['mAU'], alpha=0.3, label=f'Peak 1 ({peak1_area:.2f} mAU·mL)')
plt.fill_between(peak2_data['ml'], peak2_data['mAU'], alpha=0.3, label=f'Peak 2 ({peak2_area:.2f} mAU·mL)')

plt.xlabel('Elution Volume (mL)')
plt.ylabel('A280 (mAU)')
plt.title('SEC Chromatogram with Peak Areas')

plt.legend()
plt.show()


### Extension Task

In [None]:
### EXTENSION TASK ###

# What if you wanted to add more than 2 peaks?
# Try adding start and end x values for the other chromatogram peaks.

peaks = [
    (190,220),
    (,),
    (,),
    (,),
    (,)
]

# We can iterate over each peak we've specified using a for loop
# This means you can specify as many peaks as we like!
for peak in peaks:
    peak_data = sec_data[(sec_data['ml'] >= peak[0]) & (sec_data['ml'] <= peak[1])]
    peak_max_mau = peak_data['mAU'].max()
    peak_max_ml = sec_data.loc[peak_data['mAU'].idxmax(), 'ml']
    print(f"Peak between {peak[0]} mL and {peak[1]} mL maximum: {peak_max_mau} mAU at {peak_max_ml} mL")


In [None]:
# Numpy uses the perpendicular drop method when calculating areas under curves using the trapezoidal rule.
# This does not account for baseline changes, which is critical for accurate peak area calculations in chromatography.
# How could you modify the code to account for baseline correction?

plt.figure(figsize=(10, 6))
plt.plot(sec_data['ml'], sec_data['mAU'])

for n, peak in enumerate(peaks):
    # Select data for the current peak
    peak_data = sec_data[(sec_data['ml'] >= peak[0]) & (sec_data['ml'] <= peak[1])]

    # Baseline correction using linear interpolation between the start and end of the peak
    baseline_start = peak_data['mAU'].iloc[0]
    baseline_end = peak_data['mAU'].iloc[-1]
    baseline = np.linspace(baseline_start, baseline_end, len(peak_data))

    # Calculate baseline-corrected area
    peak_corrected = peak_data['mAU'] - baseline
    peak_area = np.trapezoid(peak_corrected, peak_data['ml'])

    # Add shaded area to the plot
    plt.fill_between(
        peak_data['ml'],
        baseline,
        peak_data['mAU'],
        alpha=0.3,
        label=f'Peak {n+1} ({peak[0]}-{peak[1]} mL) = {peak_area:.2f} mAU·mL)'
    )

plt.xlabel('Elution Volume (mL)')
plt.ylabel('A280 (mAU)')
plt.title('SEC Chromatogram with Peak Areas')
plt.xlim(100, 360)
plt.ylim(0, 600)
plt.legend()
plt.show()