diff --git a/playground.fsx b/playground.fsx
index aa85dbf..ea6f5c6 100644
--- a/playground.fsx
+++ b/playground.fsx
@@ -7,12 +7,9 @@
//File.Copy(dllBasePath + "/FsSpreadsheet.ExcelIO/bin/Debug/netstandard2.0/FsSpreadsheet.ExcelIO.dll", dllBasePath + "/FsSpreadsheet.ExcelIO/bin/Debug/netstandard2.0/FsSpreadsheet.ExcelIO_Copy.dll", true)
//File.Copy(@"C:\Repos\nfdi4plants\ArcGraphModel\src\ArcGraphModel\bin\Debug\net6.0\ArcGraphModel.dll", @"C:\Repos\nfdi4plants\ArcGraphModel\src\ArcGraphModel\bin\Debug\net6.0\ArcGraphModel_Copy.dll", true)
-#r "nuget: DocumentFormat.OpenXml"
#r "nuget: FSharpAux"
#r "nuget: FsOboParser"
-#r "nuget: FsSpreadsheet, 3.1.1"
-#r "nuget: FsSpreadsheet.ExcelIO, 3.1.1"
-#r "nuget: FSharp.FGL"
+#r "nuget: FsSpreadsheet.ExcelIO, 4.1.0"
#r "nuget: FSharp.FGL.ArrayAdjacencyGraph"
open DocumentFormat.OpenXml
@@ -45,6 +42,32 @@ open FsOboParser
open ControlledVocabulary
open type ControlledVocabulary.ParamBase
open ARCTokenization
+open ARCTokenization.StructuralOntology
+
+System.IO.Directory.GetCurrentDirectory()
+let fakePath = CvParam(cvTerm = AFSO.``File Path``, v = System.IO.Directory.GetCurrentDirectory() + "/tests/ARCTokenization.Tests/Fixtures/correct/investigation_simple.xlsx")
+
+let fakePath = CvParam(cvTerm = AFSO.``File Path``, v = "tests/ARCTokenization.Tests/Fixtures/correct/assay_simple.xlsx")
+let actual = ParamBasedParsers.parseIsaMetadataSheetFromCvp "assay_simple.xlsx" Assay.parseMetadataSheetFromFile [fakePath] |> Seq.head
+actual.Length
+let exp =
+ ARCMock.AssayMetadataTokens(
+ Assay_File_Name = [@"measurement1\isa.assay.xlsx"],
+ Assay_Performer_First_Name = ["Oliver"; "Marius"],
+ Assay_Performer_Last_Name = ["Maus"; "Katz"],
+ Assay_Performer_Mid_Initials = [""; "G."],
+ Assay_Performer_Email = ["maus@nfdi4plants.org"],
+ Assay_Performer_Affiliation = ["RPTU University of Kaiserslautern"],
+ Assay_Performer_Roles = ["research assistant"],
+ Assay_Performer_Roles_Term_Accession_Number = ["http://purl.org/spar/scoro/research-assistant"],
+ Assay_Performer_Roles_Term_Source_REF = ["scoro"]
+ )
+ |> List.concat
+exp.Length
+actual |> List.fold (fun acc ip -> $"{acc}\n{ip.Name}") "" |> printfn "%s"
+exp |> List.iter (fun ip -> printfn $"{ip.Name}")
+for i = 0 to 33 do
+ printfn $"{List.tryItem i actual |> Option.map (fun x -> x.Name) |> Option.defaultValue System.String.Empty}\t{List.tryItem i exp |> Option.map (fun x -> x.Name) |> Option.defaultValue System.String.Empty}"
let testAccession1 = "TO:00000001"
let testName1 = "Test"
diff --git a/src/ARCTokenization/ARCTokenization.fsproj b/src/ARCTokenization/ARCTokenization.fsproj
index 8810615..5746aac 100644
--- a/src/ARCTokenization/ARCTokenization.fsproj
+++ b/src/ARCTokenization/ARCTokenization.fsproj
@@ -33,6 +33,7 @@
+
diff --git a/src/ARCTokenization/Globals.fs b/src/ARCTokenization/Globals.fs
new file mode 100644
index 0000000..081103c
--- /dev/null
+++ b/src/ARCTokenization/Globals.fs
@@ -0,0 +1,15 @@
+module Globals
+
+open ARCtrl
+open ARCtrl.ISA
+
+let [] INVESTIGATION_FILE_NAME = Path.InvestigationFileName
+let [] INVESTIGATION_METADATA_SHEET_NAME = ARCtrl.ISA.Spreadsheet.ArcInvestigation.metaDataSheetName
+
+let [] STUDY_FILE_NAME = Path.StudyFileName
+let [] STUDY_METADATA_SHEET_NAME = ARCtrl.ISA.Spreadsheet.ArcStudy.metaDataSheetName
+let [] STUDY_OBSOLETE_METADATA_SHEET_NAME= ARCtrl.ISA.Spreadsheet.ArcStudy.obsoleteMetaDataSheetName
+
+let [] ASSAY_FILE_NAME = Path.AssayFileName
+let [] ASSAY_METADATA_SHEET_NAME = ARCtrl.ISA.Spreadsheet.ArcAssay.metaDataSheetName
+let [] ASSAY_OBSOLETE_METADATA_SHEET_NAME = ARCtrl.ISA.Spreadsheet.ArcAssay.obsoleteMetaDataSheetName
diff --git a/src/ARCTokenization/TopLevelParsers.fs b/src/ARCTokenization/TopLevelParsers.fs
index 9638df1..80aa336 100644
--- a/src/ARCTokenization/TopLevelParsers.fs
+++ b/src/ARCTokenization/TopLevelParsers.fs
@@ -5,6 +5,59 @@ open FSharpAux
open FsSpreadsheet
open FsSpreadsheet.ExcelIO
+module internal ISA =
+
+ open System.IO
+
+ let tryParseMetadataSheetFromToken (isaFileName: string) (isaMdsParsingF: string -> IParam list) (absFileToken: IParam) =
+
+ let cvpStr = Param.getValueAsString absFileToken
+ //printfn $"cvpStr: {cvpStr}"
+ //if String.contains isaFileName cvpStr then
+ if Path.GetFileName cvpStr = isaFileName then
+ try
+ Some (isaMdsParsingF cvpStr)
+ with _ ->
+ None
+ else None
+
+ let parseMetadataSheetsFromTokens (isaFileName: string) (isaMdsParsingF: string -> IParam list) (absFileTokens: #IParam seq) =
+ absFileTokens
+ |> Seq.choose (fun token -> tryParseMetadataSheetFromToken isaFileName isaMdsParsingF token)
+
+ //type löl =
+
+
+ // static member parseStudyMetadataSheetFromCvp absFileTokens =
+ // parseMetadataSheetsFromCvps "isa.study.xlsx" ARCTokenization.Study.parseMetadataSheetfromFile absFileTokens
+
+ // static member parseAssayMetadataSheetFromCvp absFileTokens =
+ // parseMetadataSheetsFromCvps "isa.assay.xlsx" ARCTokenization.Assay.parseMetadataSheetFromFile absFileTokens
+
+ // static member tryParseIsaMetadataSheetFromCvp (isaFileName : string) isaMdsParsingF absFileTokens =
+ // absFileTokens
+ // |> Seq.choose (
+ // fun cvp ->
+ // let cvpStr = Param.getValueAsString cvp
+ // //printfn $"cvpStr: {cvpStr}"
+ // //if String.contains isaFileName cvpStr then
+ // if isaFileName = Path.GetFileName cvpStr then
+ // try Some (isaMdsParsingF cvpStr)
+ // with _ -> None
+ // else None
+ // )
+
+ // static member tryParseInvestigationMetadataSheetFromCvp (absFileTokens : #IParam seq) =
+ // try ParamBasedParsers.tryParseIsaMetadataSheetFromCvp "isa.investigation.xlsx" ARCTokenization.Investigation.parseMetadataSheetFromFile absFileTokens
+ // |> Seq.concat
+ // with _ -> Seq.empty
+
+ // static member tryParseStudyMetadataSheetFromCvp (absFileTokens : #IParam seq) =
+ // ParamBasedParsers.tryParseIsaMetadataSheetFromCvp "isa.study.xlsx" ARCTokenization.Study.parseMetadataSheetfromFile absFileTokens
+
+ // static member tryParseAssayMetadataSheetFromCvp (absFileTokens : #IParam seq) =
+ // ParamBasedParsers.tryParseIsaMetadataSheetFromCvp "isa.assay.xlsx" ARCTokenization.Assay.parseMetadataSheetFromFile absFileTokens
+
type FileSystem =
///
@@ -54,68 +107,143 @@ type FileSystem =
type Investigation =
///
- /// Parses the metadata sheet from an ISA Investigation XLSX file as a row-based 2D list of `IParam`s.
+ /// Returns a function that parses the metadata sheet from an ISA Investigation XLSX file as a row-based 2D list of `IParam`s.
///
/// The path to the investigation xlsx file
- /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("Assay") in the workbook
+ /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("isa_investigation") in the workbook
static member parseMetadataRowsFromFile(
- path: string,
?UseLastSheetOnIncorrectName: bool
) =
-
- let useLastSheetOnIncorrectName = defaultArg UseLastSheetOnIncorrectName false
+ fun (path: string) ->
+ let useLastSheetOnIncorrectName = defaultArg UseLastSheetOnIncorrectName false
- FsWorkbook.fromXlsxFile path
- |> Workbook.getInvestigationMetadataSheet useLastSheetOnIncorrectName
- |> Worksheet.parseRowsWith (Tokenization.convertMetadataTokens MetadataSheet.parseInvestigationKey)
+ FsWorkbook.fromXlsxFile path
+ |> Workbook.getInvestigationMetadataSheet useLastSheetOnIncorrectName
+ |> Worksheet.parseRowsWith (Tokenization.convertMetadataTokens MetadataSheet.parseInvestigationKey)
///
- /// Parses the metadata sheet from an ISA Study XLSX file as a flat list of `IParam`s.
+ /// Returns a function that parses the metadata sheet from an ISA Investigation XLSX file at a given path as a flat list of `IParam`s.
///
- /// The path to the study xlsx file
- /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("Assay") in the workbook
+ /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("isa_investigation") in the workbook
static member parseMetadataSheetFromFile(
- path: string,
?UseLastSheetOnIncorrectName: bool
) =
- Investigation.parseMetadataRowsFromFile(
- path = path,
- ?UseLastSheetOnIncorrectName = UseLastSheetOnIncorrectName
- )
- |> List.concat
+ fun (path: string) ->
+ path
+ |> Investigation.parseMetadataRowsFromFile(
+ ?UseLastSheetOnIncorrectName = UseLastSheetOnIncorrectName
+ )
+ |> List.concat
+
+ ///
+ /// Returns a function that returns Some flat IParam list representing the investigation metadata if the given token contains a filepath with the standard investigation file name ("isa.investigation.xlsx") or None otherwise.
+ ///
+ /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("isa_investigation") in the workbook
+ /// The name of the investigation file, note that this should not be set if the file follows spec (as "isa.investigation.xlsx" is the default)
+ static member tryParseMetadataSheetFromToken(
+ ?UseLastSheetOnIncorrectName: bool,
+ ?FileName: string
+ ) =
+ let fileName = defaultArg FileName Globals.INVESTIGATION_FILE_NAME
+
+ fun (token: #IParam) ->
+ ISA.tryParseMetadataSheetFromToken
+ fileName
+ (Investigation.parseMetadataSheetFromFile(?UseLastSheetOnIncorrectName = UseLastSheetOnIncorrectName))
+ token
+
+
+ ///
+ /// Returns a function that parses all metadata sheets from all the tokens containing a filepath with the standard investigation file name ("isa.investigation.xlsx")
+ /// in a given collection of tokens as a 2D list containing the individual Investigation metadata as a flat list of `IParam`s.
+ ///
+ /// if no tokens contain such a file path, the result will be an empty list.
+ ///
+ /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("isa_investigation") in the workbook
+ /// The name of the investigation file, note that this should not be set if the file follows spec (as "isa.investigation.xlsx" is the default)
+ static member parseMetadataSheetsFromTokens(
+ ?UseLastSheetOnIncorrectName: bool,
+ ?FileName: string
+ ) =
+ let fileName = defaultArg FileName Globals.INVESTIGATION_FILE_NAME
+
+ fun (tokens: #seq<#IParam>) ->
+ ISA.parseMetadataSheetsFromTokens
+ fileName
+ (Investigation.parseMetadataSheetFromFile(?UseLastSheetOnIncorrectName = UseLastSheetOnIncorrectName))
+ tokens
+ |> List.ofSeq
type Study =
///
- /// Parses the metadata sheet from an ISA Study XLSX file as a row-based 2D list of `IParam`s.
+ /// Returns a function that parses the metadata sheet from an ISA Study XLSX file as a row-based 2D list of `IParam`s.
///
- /// The path to the study xlsx file
- /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("Assay") in the workbook
+ /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("isa_study") in the workbook
static member parseMetadataRowsFromFile(
- path: string,
?UseLastSheetOnIncorrectName: bool
) =
+ fun (path: string) ->
- let useLastSheetOnIncorrectName = defaultArg UseLastSheetOnIncorrectName false
+ let useLastSheetOnIncorrectName = defaultArg UseLastSheetOnIncorrectName false
- FsWorkbook.fromXlsxFile path
- |> Workbook.getStudyMetadataSheet useLastSheetOnIncorrectName
- |> Worksheet.parseRowsWith (Tokenization.convertMetadataTokens MetadataSheet.parseStudyKey)
+ FsWorkbook.fromXlsxFile path
+ |> Workbook.getStudyMetadataSheet useLastSheetOnIncorrectName
+ |> Worksheet.parseRowsWith (Tokenization.convertMetadataTokens MetadataSheet.parseStudyKey)
///
- /// Parses the metadata sheet from an ISA Study XLSX file as a flat list of `IParam`s.
+ /// Returns a function that parses the metadata sheet from an ISA Study XLSX file at a given path as a flat list of `IParam`s.
///
- /// The path to the study xlsx file
- /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("Assay") in the workbook
- static member parseMetadataSheetfromFile(
- path: string,
+ /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("isa_study") in the workbook
+ static member parseMetadataSheetFromFile(
?UseLastSheetOnIncorrectName: bool
) =
- Study.parseMetadataRowsFromFile(
- path = path,
- ?UseLastSheetOnIncorrectName = UseLastSheetOnIncorrectName
- )
- |> List.concat
+ fun (path: string) ->
+ path
+ |> Study.parseMetadataRowsFromFile(
+ ?UseLastSheetOnIncorrectName = UseLastSheetOnIncorrectName
+ )
+ |> List.concat
+
+ ///
+ /// Returns a function that returns Some flat IParam list representing the study metadata if the given token contains a filepath with the standard study file name ("isa.study.xlsx") or None otherwise.
+ ///
+ /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("isa_study") in the workbook
+ /// The name of the study file, note that this should not be set if the file follows spec (as "isa.study.xlsx" is the default)
+ static member tryParseMetadataSheetFromToken(
+ ?UseLastSheetOnIncorrectName: bool,
+ ?FileName: string
+ ) =
+ let fileName = defaultArg FileName Globals.STUDY_FILE_NAME
+
+ fun (token: #IParam) ->
+ ISA.tryParseMetadataSheetFromToken
+ fileName
+ (Study.parseMetadataSheetFromFile(?UseLastSheetOnIncorrectName = UseLastSheetOnIncorrectName))
+ token
+
+
+ ///
+ /// Returns a function that parses all metadata sheets from all the tokens containing a filepath with the standard study file name ("isa.study.xlsx")
+ /// in a given collection of tokens as a 2D list containing the individual study metadata as a flat list of `IParam`s.
+ ///
+ /// if no tokens contain such a file path, the result will be an empty list.
+ ///
+ /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("isa_study") in the workbook
+ /// The name of the study file, note that this should not be set if the file follows spec (as "isa.study.xlsx" is the default)
+ static member parseMetadataSheetsFromTokens(
+ ?UseLastSheetOnIncorrectName: bool,
+ ?FileName: string
+ ) =
+ let fileName = defaultArg FileName Globals.STUDY_FILE_NAME
+
+ fun (tokens: #seq<#IParam>) ->
+ ISA.parseMetadataSheetsFromTokens
+ fileName
+ (Study.parseMetadataSheetFromFile(?UseLastSheetOnIncorrectName = UseLastSheetOnIncorrectName))
+ tokens
+ |> List.ofSeq
+
///
/// Parses all annotation tables from an ISA Study XLSX file as a
@@ -141,35 +269,72 @@ type Study =
type Assay =
///
- /// Parses the metadata sheet from an ISA Assay XLSX file as a row-based 2D list of `IParam`s.
+ /// Returns a function that parses the metadata sheet from an ISA Assay XLSX file as a row-based 2D list of `IParam`s.
///
- /// The path to the assay xlsx file
- /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("Assay") in the workbook
+ /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("isa_assay") in the workbook
static member parseMetadataRowsFromFile(
- path: string,
?UseLastSheetOnIncorrectName: bool
) =
+ fun (path: string) ->
- let useLastSheetOnIncorrectName = defaultArg UseLastSheetOnIncorrectName false
+ let useLastSheetOnIncorrectName = defaultArg UseLastSheetOnIncorrectName false
- FsWorkbook.fromXlsxFile path
- |> Workbook.getAssayMetadataSheet useLastSheetOnIncorrectName
- |> Worksheet.parseRowsWith (Tokenization.convertMetadataTokens MetadataSheet.parseAssayKey)
+ FsWorkbook.fromXlsxFile path
+ |> Workbook.getAssayMetadataSheet useLastSheetOnIncorrectName
+ |> Worksheet.parseRowsWith (Tokenization.convertMetadataTokens MetadataSheet.parseAssayKey)
///
- /// Parses the metadata sheet from an ISA Assay XLSX file as a flat list of `IParam`s.
+ /// Returns a function that parses the metadata sheet from an ISA Assay XLSX file at a given path as a flat list of `IParam`s.
///
- /// The path to the assay xlsx file
- /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("Assay") in the workbook
+ /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("isa_assay") in the workbook
static member parseMetadataSheetFromFile(
- path: string,
?UseLastSheetOnIncorrectName: bool
) =
- Assay.parseMetadataRowsFromFile(
- path = path,
- ?UseLastSheetOnIncorrectName = UseLastSheetOnIncorrectName
- )
- |> List.concat
+ fun (path: string) ->
+ path
+ |> Assay.parseMetadataRowsFromFile(
+ ?UseLastSheetOnIncorrectName = UseLastSheetOnIncorrectName
+ )
+ |> List.concat
+
+ ///
+ /// Returns a function that returns Some flat IParam list representing the assay metadata if the given token contains a filepath with the standard assay file name ("isa.assay.xlsx") or None otherwise.
+ ///
+ /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("isa_assay") in the workbook
+ /// The name of the assay file, note that this should not be set if the file follows spec (as "isa.assay.xlsx" is the default)
+ static member tryParseMetadataSheetFromToken(
+ ?UseLastSheetOnIncorrectName: bool,
+ ?FileName: string
+ ) =
+ let fileName = defaultArg FileName Globals.ASSAY_FILE_NAME
+
+ fun (token: #IParam) ->
+ ISA.tryParseMetadataSheetFromToken
+ fileName
+ (Assay.parseMetadataSheetFromFile(?UseLastSheetOnIncorrectName = UseLastSheetOnIncorrectName))
+ token
+
+
+ ///
+ /// Returns a function that parses all metadata sheets from all the tokens containing a filepath with the standard assay file name ("isa.assay.xlsx")
+ /// in a given collection of tokens as a 2D list containing the individual assay metadata as a flat list of `IParam`s.
+ ///
+ /// if no tokens contain such a file path, the result will be an empty list.
+ ///
+ /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("isa_assay") in the workbook
+ /// The name of the assay file, note that this should not be set if the file follows spec (as "isa.assay.xlsx" is the default)
+ static member parseMetadataSheetsFromTokens(
+ ?UseLastSheetOnIncorrectName: bool,
+ ?FileName: string
+ ) =
+ let fileName = defaultArg FileName Globals.ASSAY_FILE_NAME
+
+ fun (tokens: #seq<#IParam>) ->
+ ISA.parseMetadataSheetsFromTokens
+ fileName
+ (Assay.parseMetadataSheetFromFile(?UseLastSheetOnIncorrectName = UseLastSheetOnIncorrectName))
+ tokens
+ |> List.ofSeq
///
/// Parses all annotation tables from an ISA Assay XLSX file as a
diff --git a/src/ARCTokenization/Workbook.fs b/src/ARCTokenization/Workbook.fs
index fa7d553..d99e36d 100644
--- a/src/ARCTokenization/Workbook.fs
+++ b/src/ARCTokenization/Workbook.fs
@@ -9,7 +9,7 @@ module Workbook =
let getInvestigationMetadataSheet (useLastSheetOnIncorrectName: bool) investigation =
try
- FsWorkbook.getWorksheetByName "isa_investigation" investigation
+ FsWorkbook.getWorksheetByName Globals.INVESTIGATION_METADATA_SHEET_NAME investigation
with _ ->
if useLastSheetOnIncorrectName then
FsWorkbook.getWorksheets investigation
@@ -19,22 +19,22 @@ module Workbook =
let getStudyMetadataSheet (useLastSheetOnIncorrectName: bool) study =
try
- FsWorkbook.tryGetWorksheetByName "Study" study
- |> Option.defaultValue (FsWorkbook.getWorksheetByName "isa_study" study)
+ FsWorkbook.tryGetWorksheetByName Globals.STUDY_OBSOLETE_METADATA_SHEET_NAME study
+ |> Option.defaultValue (FsWorkbook.getWorksheetByName Globals.STUDY_METADATA_SHEET_NAME study)
with _ ->
if useLastSheetOnIncorrectName then
FsWorkbook.getWorksheets study
|> Seq.last
else
- failwith "No worksheet named 'Study' or 'isa_study' found in the workbook"
+ failwith $"No worksheet named {Globals.STUDY_OBSOLETE_METADATA_SHEET_NAME} or {Globals.STUDY_METADATA_SHEET_NAME} found in the workbook"
let getAssayMetadataSheet (useLastSheetOnIncorrectName: bool) assay =
try
- FsWorkbook.tryGetWorksheetByName "Assay" assay
- |> Option.defaultValue (FsWorkbook.getWorksheetByName "isa_assay" assay)
+ FsWorkbook.tryGetWorksheetByName Globals.ASSAY_OBSOLETE_METADATA_SHEET_NAME assay
+ |> Option.defaultValue (FsWorkbook.getWorksheetByName Globals.ASSAY_METADATA_SHEET_NAME assay)
with _ ->
if useLastSheetOnIncorrectName then
FsWorkbook.getWorksheets assay
|> Seq.last
else
- failwith "No worksheet named 'Assay' or 'isa_assay' found in the workbook"
\ No newline at end of file
+ failwith $"No worksheet named {Globals.ASSAY_OBSOLETE_METADATA_SHEET_NAME} or {Globals.ASSAY_METADATA_SHEET_NAME} found in the workbook"
\ No newline at end of file
diff --git a/tests/ARCTokenization.Tests/Fixtures/correct/assay_simple.xlsx b/tests/ARCTokenization.Tests/Fixtures/correct/assay_simple.xlsx
new file mode 100644
index 0000000..d3ad4bc
Binary files /dev/null and b/tests/ARCTokenization.Tests/Fixtures/correct/assay_simple.xlsx differ
diff --git a/tests/ARCTokenization.Tests/Fixtures/correct/study_simple.xlsx b/tests/ARCTokenization.Tests/Fixtures/correct/study_simple.xlsx
new file mode 100644
index 0000000..7dd9d68
Binary files /dev/null and b/tests/ARCTokenization.Tests/Fixtures/correct/study_simple.xlsx differ
diff --git a/tests/ARCTokenization.Tests/IntegrationTests/AssayMetadata.fs b/tests/ARCTokenization.Tests/IntegrationTests/AssayMetadata.fs
index f2482ad..d0b78f7 100644
--- a/tests/ARCTokenization.Tests/IntegrationTests/AssayMetadata.fs
+++ b/tests/ARCTokenization.Tests/IntegrationTests/AssayMetadata.fs
@@ -12,4 +12,33 @@ module AssayMetadata =
let allExpectedMetadataTermsEmpty =
Terms.AssayMetadata.nonObsoleteNonRootCvTerms
- |> List.map (fun p -> CvParam(p, ParamValue.CvValue (CvTerm.create(accession = "AGMO:00000001", name = "Metadata Section Key", ref = "AGMO")), []))
\ No newline at end of file
+ |> List.map (fun p -> CvParam(p, ParamValue.CvValue (CvTerm.create(accession = "AGMO:00000001", name = "Metadata Section Key", ref = "AGMO")), []))
+
+ open ARCTokenization.StructuralOntology
+
+ let allExpectedMetadataTermsFull =
+ ARCMock.AssayMetadataTokens(
+ Assay_File_Name = [@"measurement1\isa.assay.xlsx"],
+ Assay_Performer_First_Name = ["Oliver"; "Marius"],
+ Assay_Performer_Last_Name = ["Maus"; "Katz"],
+ Assay_Performer_Mid_Initials = [""; "G."],
+ Assay_Performer_Email = ["maus@nfdi4plants.org"],
+ Assay_Performer_Affiliation = ["RPTU University of Kaiserslautern"],
+ Assay_Performer_Roles = ["research assistant"],
+ Assay_Performer_Roles_Term_Accession_Number = ["http://purl.org/spar/scoro/research-assistant"],
+ Assay_Performer_Roles_Term_Source_REF = ["scoro"]
+ )
+ |> List.concat // use flat list
+
+ []
+ let ``Simple study is parsed from filepath CvParam with all structural ontology terms in order`` () =
+ let fakePath = CvParam(cvTerm = AFSO.``File Path``, v = "Fixtures/correct/assay_simple.xlsx")
+ let actual =
+ [fakePath]
+ |> Assay.parseMetadataSheetsFromTokens(
+ FileName = "assay_simple.xlsx"
+ )
+ |> Seq.head
+ Assert.All((List.zip allExpectedMetadataTermsFull actual), (fun (expected,actual) ->
+ CvParam.structuralEquality (expected) (actual :?> CvParam)
+ ))
\ No newline at end of file
diff --git a/tests/ARCTokenization.Tests/IntegrationTests/InvestigationMetadata.fs b/tests/ARCTokenization.Tests/IntegrationTests/InvestigationMetadata.fs
index 6fb8120..e198d0a 100644
--- a/tests/ARCTokenization.Tests/IntegrationTests/InvestigationMetadata.fs
+++ b/tests/ARCTokenization.Tests/IntegrationTests/InvestigationMetadata.fs
@@ -10,8 +10,8 @@ module InvestigationMetadata =
open TestUtils
- let parsedInvestigationMetadataEmpty = Investigation.parseMetadataSheetFromFile "Fixtures/incorrect/investigation_empty.xlsx"
- let parsedInvestigationMetadataSimple = Investigation.parseMetadataSheetFromFile "Fixtures/correct/investigation_simple.xlsx"
+ let parsedInvestigationMetadataEmpty = Investigation.parseMetadataSheetFromFile() "Fixtures/incorrect/investigation_empty.xlsx"
+ let parsedInvestigationMetadataSimple = Investigation.parseMetadataSheetFromFile() "Fixtures/correct/investigation_simple.xlsx"
let allExpectedMetadataTermsEmpty =
Terms.InvestigationMetadata.nonObsoleteNonRootCvTerms
@@ -55,4 +55,20 @@ module InvestigationMetadata =
let ``Simple investigation is parsed with all structural ontology terms in order`` () =
Assert.All((List.zip allExpectedMetadataTermsFull parsedInvestigationMetadataSimple), (fun (expected,actual) ->
CvParam.structuralEquality (expected) (actual :?> CvParam)
+ ))
+
+ open ARCTokenization.StructuralOntology
+
+ []
+ let ``Simple investigation is parsed from filepath CvParam with all structural ontology terms in order`` () =
+ let fakePath = CvParam(cvTerm = AFSO.``File Path``, v = "Fixtures/correct/investigation_simple.xlsx")
+ let actual =
+ [fakePath]
+ |> Investigation.parseMetadataSheetsFromTokens(
+ FileName = "investigation_simple.xlsx"
+ )
+ |> Seq.head
+
+ Assert.All((List.zip allExpectedMetadataTermsFull actual), (fun (expected,actual) ->
+ CvParam.structuralEquality (expected) (actual :?> CvParam)
))
\ No newline at end of file
diff --git a/tests/ARCTokenization.Tests/IntegrationTests/StudyMetadata.fs b/tests/ARCTokenization.Tests/IntegrationTests/StudyMetadata.fs
index 287cbf7..3404ab4 100644
--- a/tests/ARCTokenization.Tests/IntegrationTests/StudyMetadata.fs
+++ b/tests/ARCTokenization.Tests/IntegrationTests/StudyMetadata.fs
@@ -12,4 +12,28 @@ module StudyMetadata =
let allExpectedMetadataTermsEmpty =
Terms.StudyMetadata.nonObsoleteNonRootCvTerms
- |> List.map (fun p -> CvParam(p, ParamValue.CvValue (CvTerm.create(accession = "AGMO:00000001", name = "Metadata Section Key", ref = "AGMO")), []))
\ No newline at end of file
+ |> List.map (fun p -> CvParam(p, ParamValue.CvValue (CvTerm.create(accession = "AGMO:00000001", name = "Metadata Section Key", ref = "AGMO")), []))
+
+ let allExpectedMetadataTermsFull =
+ ARCMock.StudyMetadataTokens(
+ Study_Identifier = ["experiment1_material"],
+ Study_Title = ["Prototype for experimental data"],
+ Study_Description = ["In this a devised study to have an exemplary experimental material description."],
+ Study_File_Name = [@"experiment1_material\isa.study.xlsx"]
+ )
+ |> List.concat // use flat list
+
+ open ARCTokenization.StructuralOntology
+
+ []
+ let ``Simple study is parsed from filepath CvParam with all structural ontology terms in order`` () =
+ let fakePath = CvParam(cvTerm = AFSO.``File Path``, v = "Fixtures/correct/study_simple.xlsx")
+ let actual =
+ [fakePath]
+ |> Study.parseMetadataSheetsFromTokens(
+ FileName = "study_simple.xlsx"
+ )
+ |> Seq.head
+ Assert.All((List.zip allExpectedMetadataTermsFull actual), (fun (expected,actual) ->
+ CvParam.structuralEquality (expected) (actual :?> CvParam)
+ ))
\ No newline at end of file