<a href="https://colab.research.google.com/github/jwasswa2023/ChloroFinder/blob/main/RSCRIPT_FOR_PATROON.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Script automatically generated on Sat Aug  2 08:10:23 2025

library(patRoon)

# -------------------------
# initialization
# -------------------------

workPath <- "/Users/josephwasswa/Desktop/pATROON"
setwd(workPath)

# Load analysis table
anaInfo <- read.csv("analyses.csv")

# -------------------------
# features
# -------------------------

# Set OpenMS path with exact location
options(patRoon.path.OpenMS = "/Applications/OpenMS-3.4.1/bin")

# Find all features
# NOTE: see the reference manual for many more options
fList <- findFeatures(anaInfo, "openms", noiseThrInt = 1000, chromSNR = 3, chromFWHM = 10, minFWHM = 1, maxFWHM = 30)

# Group and align features between analyses
fGroups <- groupFeatures(fList, "openms", rtalign = TRUE)

# Basic rule based filtering
fGroups <- filter(fGroups, preAbsMinIntensity = 300, absMinIntensity = 300000, relMinReplicateAbundance = 1,
                  maxReplicateIntRSD = 0.75, blankThreshold = 5, removeBlanks = TRUE,
                  retentionRange = NULL, mzRange = NULL)
###fGroups <- fGroups[, 1:25]


# -------------------------
# annotation
# -------------------------
options(patRoon.path.MetFragCL = "/Users/josephwasswa/Downloads/MetFragCommandLine-2.6.6.jar")

# Retrieve MS peak lists
avgMSListParams <- getDefAvgPListParams(clusterMzWindow = 0.005)
mslists <- generateMSPeakLists(fGroups, "mzr", maxMSRtWindow = 5, precursorMzWindow = 4,
                               avgFeatParams = avgMSListParams,
                               avgFGroupParams = avgMSListParams)
# Rule based filtering of MS peak lists. You may want to tweak this. See the manual for more information.
mslists <- filter(mslists, absMSIntThr = NULL, absMSMSIntThr = 1000, relMSIntThr = NULL, relMSMSIntThr = 0.2,
                  topMSPeaks = NULL, topMSMSPeaks = 15)

# Calculate formula candidates
formulas <- generateFormulas(fGroups, mslists, "genform", relMzDev = 5, adduct = "[M+H]+", elements = "CHNOPCl",
                             oc = FALSE, calculateFeatures = TRUE,
                             featThresholdAnn = 0.75)


# Calculate compound structure candidates
compounds <- generateCompounds(fGroups, mslists, "metfrag", dbRelMzDev = 5, fragRelMzDev = 5, fragAbsMzDev = 0.002,
                               adduct = "[M+H]+", database = "pubchem",
                               maxCandidatesToStop = 500)
compounds <- addFormulaScoring(compounds, formulas, updateScore = TRUE)



# -------------------------
# reporting
# -------------------------

reportCSV(fGroups, path = "report", formulas = formulas, compounds = compounds, components = NULL)
reportHTML(fGroups, path = "report", formulas = formulas, compounds = compounds, MSPeakLists = mslists,
components = NULL, reportPlots = c("chord", "venn", "upset", "eics", "formulas"),
selfContained = FALSE, openReport = TRUE)

# -------------------------
# reporting
# -------------------------

# Advanced report settings can be edited in the report.yml file.
####report(fGroups, MSPeakLists = mslists, formulas = formulas, compounds = compounds, components = NULL,
####settingsFile = "report.yml", openReport = TRUE)



# Step 1: Extract compound annotations FIRST
df_compounds <- as.data.frame(compounds)

# Step 2: Select relevant annotation columns
selected_cols <- c("group", "compoundName", "SMILES", "neutral_formula", "score")
df_selected <- df_compounds[, selected_cols]

# Step 3: Save SMILES + formula info only
write.csv(df_selected, "metfrag_structures_smiles_formulas.csv", row.names = FALSE)

# Step 4: Extract feature group info (mz and retention time)
df_fgroups <- as.data.frame(fGroups)
df_fgroups_subset <- df_fgroups[, c("group", "mz", "ret")]

# Step 5: Merge with m/z and RT
df_merged <- merge(df_selected, df_fgroups_subset, by = "group")

# Step 6: Optional: Convert RT to minutes
df_merged$ret_min <- df_merged$ret / 60

# Step 7: Save full table
write.csv(df_merged, "metfrag_structures_with_mz_rt.csv", row.names = FALSE)

# Step 8: Preview result
head(df_merged)