diff --git a/playground.fsx b/playground.fsx index aa85dbf..ea6f5c6 100644 --- a/playground.fsx +++ b/playground.fsx @@ -7,12 +7,9 @@ //File.Copy(dllBasePath + "/FsSpreadsheet.ExcelIO/bin/Debug/netstandard2.0/FsSpreadsheet.ExcelIO.dll", dllBasePath + "/FsSpreadsheet.ExcelIO/bin/Debug/netstandard2.0/FsSpreadsheet.ExcelIO_Copy.dll", true) //File.Copy(@"C:\Repos\nfdi4plants\ArcGraphModel\src\ArcGraphModel\bin\Debug\net6.0\ArcGraphModel.dll", @"C:\Repos\nfdi4plants\ArcGraphModel\src\ArcGraphModel\bin\Debug\net6.0\ArcGraphModel_Copy.dll", true) -#r "nuget: DocumentFormat.OpenXml" #r "nuget: FSharpAux" #r "nuget: FsOboParser" -#r "nuget: FsSpreadsheet, 3.1.1" -#r "nuget: FsSpreadsheet.ExcelIO, 3.1.1" -#r "nuget: FSharp.FGL" +#r "nuget: FsSpreadsheet.ExcelIO, 4.1.0" #r "nuget: FSharp.FGL.ArrayAdjacencyGraph" open DocumentFormat.OpenXml @@ -45,6 +42,32 @@ open FsOboParser open ControlledVocabulary open type ControlledVocabulary.ParamBase open ARCTokenization +open ARCTokenization.StructuralOntology + +System.IO.Directory.GetCurrentDirectory() +let fakePath = CvParam(cvTerm = AFSO.``File Path``, v = System.IO.Directory.GetCurrentDirectory() + "/tests/ARCTokenization.Tests/Fixtures/correct/investigation_simple.xlsx") + +let fakePath = CvParam(cvTerm = AFSO.``File Path``, v = "tests/ARCTokenization.Tests/Fixtures/correct/assay_simple.xlsx") +let actual = ParamBasedParsers.parseIsaMetadataSheetFromCvp "assay_simple.xlsx" Assay.parseMetadataSheetFromFile [fakePath] |> Seq.head +actual.Length +let exp = + ARCMock.AssayMetadataTokens( + Assay_File_Name = [@"measurement1\isa.assay.xlsx"], + Assay_Performer_First_Name = ["Oliver"; "Marius"], + Assay_Performer_Last_Name = ["Maus"; "Katz"], + Assay_Performer_Mid_Initials = [""; "G."], + Assay_Performer_Email = ["maus@nfdi4plants.org"], + Assay_Performer_Affiliation = ["RPTU University of Kaiserslautern"], + Assay_Performer_Roles = ["research assistant"], + Assay_Performer_Roles_Term_Accession_Number = ["http://purl.org/spar/scoro/research-assistant"], + Assay_Performer_Roles_Term_Source_REF = ["scoro"] + ) + |> List.concat +exp.Length +actual |> List.fold (fun acc ip -> $"{acc}\n{ip.Name}") "" |> printfn "%s" +exp |> List.iter (fun ip -> printfn $"{ip.Name}") +for i = 0 to 33 do + printfn $"{List.tryItem i actual |> Option.map (fun x -> x.Name) |> Option.defaultValue System.String.Empty}\t{List.tryItem i exp |> Option.map (fun x -> x.Name) |> Option.defaultValue System.String.Empty}" let testAccession1 = "TO:00000001" let testName1 = "Test" diff --git a/src/ARCTokenization/ARCTokenization.fsproj b/src/ARCTokenization/ARCTokenization.fsproj index 8810615..5746aac 100644 --- a/src/ARCTokenization/ARCTokenization.fsproj +++ b/src/ARCTokenization/ARCTokenization.fsproj @@ -33,6 +33,7 @@ + diff --git a/src/ARCTokenization/Globals.fs b/src/ARCTokenization/Globals.fs new file mode 100644 index 0000000..081103c --- /dev/null +++ b/src/ARCTokenization/Globals.fs @@ -0,0 +1,15 @@ +module Globals + +open ARCtrl +open ARCtrl.ISA + +let [] INVESTIGATION_FILE_NAME = Path.InvestigationFileName +let [] INVESTIGATION_METADATA_SHEET_NAME = ARCtrl.ISA.Spreadsheet.ArcInvestigation.metaDataSheetName + +let [] STUDY_FILE_NAME = Path.StudyFileName +let [] STUDY_METADATA_SHEET_NAME = ARCtrl.ISA.Spreadsheet.ArcStudy.metaDataSheetName +let [] STUDY_OBSOLETE_METADATA_SHEET_NAME= ARCtrl.ISA.Spreadsheet.ArcStudy.obsoleteMetaDataSheetName + +let [] ASSAY_FILE_NAME = Path.AssayFileName +let [] ASSAY_METADATA_SHEET_NAME = ARCtrl.ISA.Spreadsheet.ArcAssay.metaDataSheetName +let [] ASSAY_OBSOLETE_METADATA_SHEET_NAME = ARCtrl.ISA.Spreadsheet.ArcAssay.obsoleteMetaDataSheetName diff --git a/src/ARCTokenization/TopLevelParsers.fs b/src/ARCTokenization/TopLevelParsers.fs index 9638df1..80aa336 100644 --- a/src/ARCTokenization/TopLevelParsers.fs +++ b/src/ARCTokenization/TopLevelParsers.fs @@ -5,6 +5,59 @@ open FSharpAux open FsSpreadsheet open FsSpreadsheet.ExcelIO +module internal ISA = + + open System.IO + + let tryParseMetadataSheetFromToken (isaFileName: string) (isaMdsParsingF: string -> IParam list) (absFileToken: IParam) = + + let cvpStr = Param.getValueAsString absFileToken + //printfn $"cvpStr: {cvpStr}" + //if String.contains isaFileName cvpStr then + if Path.GetFileName cvpStr = isaFileName then + try + Some (isaMdsParsingF cvpStr) + with _ -> + None + else None + + let parseMetadataSheetsFromTokens (isaFileName: string) (isaMdsParsingF: string -> IParam list) (absFileTokens: #IParam seq) = + absFileTokens + |> Seq.choose (fun token -> tryParseMetadataSheetFromToken isaFileName isaMdsParsingF token) + + //type löl = + + + // static member parseStudyMetadataSheetFromCvp absFileTokens = + // parseMetadataSheetsFromCvps "isa.study.xlsx" ARCTokenization.Study.parseMetadataSheetfromFile absFileTokens + + // static member parseAssayMetadataSheetFromCvp absFileTokens = + // parseMetadataSheetsFromCvps "isa.assay.xlsx" ARCTokenization.Assay.parseMetadataSheetFromFile absFileTokens + + // static member tryParseIsaMetadataSheetFromCvp (isaFileName : string) isaMdsParsingF absFileTokens = + // absFileTokens + // |> Seq.choose ( + // fun cvp -> + // let cvpStr = Param.getValueAsString cvp + // //printfn $"cvpStr: {cvpStr}" + // //if String.contains isaFileName cvpStr then + // if isaFileName = Path.GetFileName cvpStr then + // try Some (isaMdsParsingF cvpStr) + // with _ -> None + // else None + // ) + + // static member tryParseInvestigationMetadataSheetFromCvp (absFileTokens : #IParam seq) = + // try ParamBasedParsers.tryParseIsaMetadataSheetFromCvp "isa.investigation.xlsx" ARCTokenization.Investigation.parseMetadataSheetFromFile absFileTokens + // |> Seq.concat + // with _ -> Seq.empty + + // static member tryParseStudyMetadataSheetFromCvp (absFileTokens : #IParam seq) = + // ParamBasedParsers.tryParseIsaMetadataSheetFromCvp "isa.study.xlsx" ARCTokenization.Study.parseMetadataSheetfromFile absFileTokens + + // static member tryParseAssayMetadataSheetFromCvp (absFileTokens : #IParam seq) = + // ParamBasedParsers.tryParseIsaMetadataSheetFromCvp "isa.assay.xlsx" ARCTokenization.Assay.parseMetadataSheetFromFile absFileTokens + type FileSystem = /// @@ -54,68 +107,143 @@ type FileSystem = type Investigation = /// - /// Parses the metadata sheet from an ISA Investigation XLSX file as a row-based 2D list of `IParam`s. + /// Returns a function that parses the metadata sheet from an ISA Investigation XLSX file as a row-based 2D list of `IParam`s. /// /// The path to the investigation xlsx file - /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("Assay") in the workbook + /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("isa_investigation") in the workbook static member parseMetadataRowsFromFile( - path: string, ?UseLastSheetOnIncorrectName: bool ) = - - let useLastSheetOnIncorrectName = defaultArg UseLastSheetOnIncorrectName false + fun (path: string) -> + let useLastSheetOnIncorrectName = defaultArg UseLastSheetOnIncorrectName false - FsWorkbook.fromXlsxFile path - |> Workbook.getInvestigationMetadataSheet useLastSheetOnIncorrectName - |> Worksheet.parseRowsWith (Tokenization.convertMetadataTokens MetadataSheet.parseInvestigationKey) + FsWorkbook.fromXlsxFile path + |> Workbook.getInvestigationMetadataSheet useLastSheetOnIncorrectName + |> Worksheet.parseRowsWith (Tokenization.convertMetadataTokens MetadataSheet.parseInvestigationKey) /// - /// Parses the metadata sheet from an ISA Study XLSX file as a flat list of `IParam`s. + /// Returns a function that parses the metadata sheet from an ISA Investigation XLSX file at a given path as a flat list of `IParam`s. /// - /// The path to the study xlsx file - /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("Assay") in the workbook + /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("isa_investigation") in the workbook static member parseMetadataSheetFromFile( - path: string, ?UseLastSheetOnIncorrectName: bool ) = - Investigation.parseMetadataRowsFromFile( - path = path, - ?UseLastSheetOnIncorrectName = UseLastSheetOnIncorrectName - ) - |> List.concat + fun (path: string) -> + path + |> Investigation.parseMetadataRowsFromFile( + ?UseLastSheetOnIncorrectName = UseLastSheetOnIncorrectName + ) + |> List.concat + + /// + /// Returns a function that returns Some flat IParam list representing the investigation metadata if the given token contains a filepath with the standard investigation file name ("isa.investigation.xlsx") or None otherwise. + /// + /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("isa_investigation") in the workbook + /// The name of the investigation file, note that this should not be set if the file follows spec (as "isa.investigation.xlsx" is the default) + static member tryParseMetadataSheetFromToken( + ?UseLastSheetOnIncorrectName: bool, + ?FileName: string + ) = + let fileName = defaultArg FileName Globals.INVESTIGATION_FILE_NAME + + fun (token: #IParam) -> + ISA.tryParseMetadataSheetFromToken + fileName + (Investigation.parseMetadataSheetFromFile(?UseLastSheetOnIncorrectName = UseLastSheetOnIncorrectName)) + token + + + /// + /// Returns a function that parses all metadata sheets from all the tokens containing a filepath with the standard investigation file name ("isa.investigation.xlsx") + /// in a given collection of tokens as a 2D list containing the individual Investigation metadata as a flat list of `IParam`s. + /// + /// if no tokens contain such a file path, the result will be an empty list. + /// + /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("isa_investigation") in the workbook + /// The name of the investigation file, note that this should not be set if the file follows spec (as "isa.investigation.xlsx" is the default) + static member parseMetadataSheetsFromTokens( + ?UseLastSheetOnIncorrectName: bool, + ?FileName: string + ) = + let fileName = defaultArg FileName Globals.INVESTIGATION_FILE_NAME + + fun (tokens: #seq<#IParam>) -> + ISA.parseMetadataSheetsFromTokens + fileName + (Investigation.parseMetadataSheetFromFile(?UseLastSheetOnIncorrectName = UseLastSheetOnIncorrectName)) + tokens + |> List.ofSeq type Study = /// - /// Parses the metadata sheet from an ISA Study XLSX file as a row-based 2D list of `IParam`s. + /// Returns a function that parses the metadata sheet from an ISA Study XLSX file as a row-based 2D list of `IParam`s. /// - /// The path to the study xlsx file - /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("Assay") in the workbook + /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("isa_study") in the workbook static member parseMetadataRowsFromFile( - path: string, ?UseLastSheetOnIncorrectName: bool ) = + fun (path: string) -> - let useLastSheetOnIncorrectName = defaultArg UseLastSheetOnIncorrectName false + let useLastSheetOnIncorrectName = defaultArg UseLastSheetOnIncorrectName false - FsWorkbook.fromXlsxFile path - |> Workbook.getStudyMetadataSheet useLastSheetOnIncorrectName - |> Worksheet.parseRowsWith (Tokenization.convertMetadataTokens MetadataSheet.parseStudyKey) + FsWorkbook.fromXlsxFile path + |> Workbook.getStudyMetadataSheet useLastSheetOnIncorrectName + |> Worksheet.parseRowsWith (Tokenization.convertMetadataTokens MetadataSheet.parseStudyKey) /// - /// Parses the metadata sheet from an ISA Study XLSX file as a flat list of `IParam`s. + /// Returns a function that parses the metadata sheet from an ISA Study XLSX file at a given path as a flat list of `IParam`s. /// - /// The path to the study xlsx file - /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("Assay") in the workbook - static member parseMetadataSheetfromFile( - path: string, + /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("isa_study") in the workbook + static member parseMetadataSheetFromFile( ?UseLastSheetOnIncorrectName: bool ) = - Study.parseMetadataRowsFromFile( - path = path, - ?UseLastSheetOnIncorrectName = UseLastSheetOnIncorrectName - ) - |> List.concat + fun (path: string) -> + path + |> Study.parseMetadataRowsFromFile( + ?UseLastSheetOnIncorrectName = UseLastSheetOnIncorrectName + ) + |> List.concat + + /// + /// Returns a function that returns Some flat IParam list representing the study metadata if the given token contains a filepath with the standard study file name ("isa.study.xlsx") or None otherwise. + /// + /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("isa_study") in the workbook + /// The name of the study file, note that this should not be set if the file follows spec (as "isa.study.xlsx" is the default) + static member tryParseMetadataSheetFromToken( + ?UseLastSheetOnIncorrectName: bool, + ?FileName: string + ) = + let fileName = defaultArg FileName Globals.STUDY_FILE_NAME + + fun (token: #IParam) -> + ISA.tryParseMetadataSheetFromToken + fileName + (Study.parseMetadataSheetFromFile(?UseLastSheetOnIncorrectName = UseLastSheetOnIncorrectName)) + token + + + /// + /// Returns a function that parses all metadata sheets from all the tokens containing a filepath with the standard study file name ("isa.study.xlsx") + /// in a given collection of tokens as a 2D list containing the individual study metadata as a flat list of `IParam`s. + /// + /// if no tokens contain such a file path, the result will be an empty list. + /// + /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("isa_study") in the workbook + /// The name of the study file, note that this should not be set if the file follows spec (as "isa.study.xlsx" is the default) + static member parseMetadataSheetsFromTokens( + ?UseLastSheetOnIncorrectName: bool, + ?FileName: string + ) = + let fileName = defaultArg FileName Globals.STUDY_FILE_NAME + + fun (tokens: #seq<#IParam>) -> + ISA.parseMetadataSheetsFromTokens + fileName + (Study.parseMetadataSheetFromFile(?UseLastSheetOnIncorrectName = UseLastSheetOnIncorrectName)) + tokens + |> List.ofSeq + /// /// Parses all annotation tables from an ISA Study XLSX file as a @@ -141,35 +269,72 @@ type Study = type Assay = /// - /// Parses the metadata sheet from an ISA Assay XLSX file as a row-based 2D list of `IParam`s. + /// Returns a function that parses the metadata sheet from an ISA Assay XLSX file as a row-based 2D list of `IParam`s. /// - /// The path to the assay xlsx file - /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("Assay") in the workbook + /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("isa_assay") in the workbook static member parseMetadataRowsFromFile( - path: string, ?UseLastSheetOnIncorrectName: bool ) = + fun (path: string) -> - let useLastSheetOnIncorrectName = defaultArg UseLastSheetOnIncorrectName false + let useLastSheetOnIncorrectName = defaultArg UseLastSheetOnIncorrectName false - FsWorkbook.fromXlsxFile path - |> Workbook.getAssayMetadataSheet useLastSheetOnIncorrectName - |> Worksheet.parseRowsWith (Tokenization.convertMetadataTokens MetadataSheet.parseAssayKey) + FsWorkbook.fromXlsxFile path + |> Workbook.getAssayMetadataSheet useLastSheetOnIncorrectName + |> Worksheet.parseRowsWith (Tokenization.convertMetadataTokens MetadataSheet.parseAssayKey) /// - /// Parses the metadata sheet from an ISA Assay XLSX file as a flat list of `IParam`s. + /// Returns a function that parses the metadata sheet from an ISA Assay XLSX file at a given path as a flat list of `IParam`s. /// - /// The path to the assay xlsx file - /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("Assay") in the workbook + /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("isa_assay") in the workbook static member parseMetadataSheetFromFile( - path: string, ?UseLastSheetOnIncorrectName: bool ) = - Assay.parseMetadataRowsFromFile( - path = path, - ?UseLastSheetOnIncorrectName = UseLastSheetOnIncorrectName - ) - |> List.concat + fun (path: string) -> + path + |> Assay.parseMetadataRowsFromFile( + ?UseLastSheetOnIncorrectName = UseLastSheetOnIncorrectName + ) + |> List.concat + + /// + /// Returns a function that returns Some flat IParam list representing the assay metadata if the given token contains a filepath with the standard assay file name ("isa.assay.xlsx") or None otherwise. + /// + /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("isa_assay") in the workbook + /// The name of the assay file, note that this should not be set if the file follows spec (as "isa.assay.xlsx" is the default) + static member tryParseMetadataSheetFromToken( + ?UseLastSheetOnIncorrectName: bool, + ?FileName: string + ) = + let fileName = defaultArg FileName Globals.ASSAY_FILE_NAME + + fun (token: #IParam) -> + ISA.tryParseMetadataSheetFromToken + fileName + (Assay.parseMetadataSheetFromFile(?UseLastSheetOnIncorrectName = UseLastSheetOnIncorrectName)) + token + + + /// + /// Returns a function that parses all metadata sheets from all the tokens containing a filepath with the standard assay file name ("isa.assay.xlsx") + /// in a given collection of tokens as a 2D list containing the individual assay metadata as a flat list of `IParam`s. + /// + /// if no tokens contain such a file path, the result will be an empty list. + /// + /// Wether or not to try parse the last sheet as metadata sheet when there is no sheet with the correct name ("isa_assay") in the workbook + /// The name of the assay file, note that this should not be set if the file follows spec (as "isa.assay.xlsx" is the default) + static member parseMetadataSheetsFromTokens( + ?UseLastSheetOnIncorrectName: bool, + ?FileName: string + ) = + let fileName = defaultArg FileName Globals.ASSAY_FILE_NAME + + fun (tokens: #seq<#IParam>) -> + ISA.parseMetadataSheetsFromTokens + fileName + (Assay.parseMetadataSheetFromFile(?UseLastSheetOnIncorrectName = UseLastSheetOnIncorrectName)) + tokens + |> List.ofSeq /// /// Parses all annotation tables from an ISA Assay XLSX file as a diff --git a/src/ARCTokenization/Workbook.fs b/src/ARCTokenization/Workbook.fs index fa7d553..d99e36d 100644 --- a/src/ARCTokenization/Workbook.fs +++ b/src/ARCTokenization/Workbook.fs @@ -9,7 +9,7 @@ module Workbook = let getInvestigationMetadataSheet (useLastSheetOnIncorrectName: bool) investigation = try - FsWorkbook.getWorksheetByName "isa_investigation" investigation + FsWorkbook.getWorksheetByName Globals.INVESTIGATION_METADATA_SHEET_NAME investigation with _ -> if useLastSheetOnIncorrectName then FsWorkbook.getWorksheets investigation @@ -19,22 +19,22 @@ module Workbook = let getStudyMetadataSheet (useLastSheetOnIncorrectName: bool) study = try - FsWorkbook.tryGetWorksheetByName "Study" study - |> Option.defaultValue (FsWorkbook.getWorksheetByName "isa_study" study) + FsWorkbook.tryGetWorksheetByName Globals.STUDY_OBSOLETE_METADATA_SHEET_NAME study + |> Option.defaultValue (FsWorkbook.getWorksheetByName Globals.STUDY_METADATA_SHEET_NAME study) with _ -> if useLastSheetOnIncorrectName then FsWorkbook.getWorksheets study |> Seq.last else - failwith "No worksheet named 'Study' or 'isa_study' found in the workbook" + failwith $"No worksheet named {Globals.STUDY_OBSOLETE_METADATA_SHEET_NAME} or {Globals.STUDY_METADATA_SHEET_NAME} found in the workbook" let getAssayMetadataSheet (useLastSheetOnIncorrectName: bool) assay = try - FsWorkbook.tryGetWorksheetByName "Assay" assay - |> Option.defaultValue (FsWorkbook.getWorksheetByName "isa_assay" assay) + FsWorkbook.tryGetWorksheetByName Globals.ASSAY_OBSOLETE_METADATA_SHEET_NAME assay + |> Option.defaultValue (FsWorkbook.getWorksheetByName Globals.ASSAY_METADATA_SHEET_NAME assay) with _ -> if useLastSheetOnIncorrectName then FsWorkbook.getWorksheets assay |> Seq.last else - failwith "No worksheet named 'Assay' or 'isa_assay' found in the workbook" \ No newline at end of file + failwith $"No worksheet named {Globals.ASSAY_OBSOLETE_METADATA_SHEET_NAME} or {Globals.ASSAY_METADATA_SHEET_NAME} found in the workbook" \ No newline at end of file diff --git a/tests/ARCTokenization.Tests/Fixtures/correct/assay_simple.xlsx b/tests/ARCTokenization.Tests/Fixtures/correct/assay_simple.xlsx new file mode 100644 index 0000000..d3ad4bc Binary files /dev/null and b/tests/ARCTokenization.Tests/Fixtures/correct/assay_simple.xlsx differ diff --git a/tests/ARCTokenization.Tests/Fixtures/correct/study_simple.xlsx b/tests/ARCTokenization.Tests/Fixtures/correct/study_simple.xlsx new file mode 100644 index 0000000..7dd9d68 Binary files /dev/null and b/tests/ARCTokenization.Tests/Fixtures/correct/study_simple.xlsx differ diff --git a/tests/ARCTokenization.Tests/IntegrationTests/AssayMetadata.fs b/tests/ARCTokenization.Tests/IntegrationTests/AssayMetadata.fs index f2482ad..d0b78f7 100644 --- a/tests/ARCTokenization.Tests/IntegrationTests/AssayMetadata.fs +++ b/tests/ARCTokenization.Tests/IntegrationTests/AssayMetadata.fs @@ -12,4 +12,33 @@ module AssayMetadata = let allExpectedMetadataTermsEmpty = Terms.AssayMetadata.nonObsoleteNonRootCvTerms - |> List.map (fun p -> CvParam(p, ParamValue.CvValue (CvTerm.create(accession = "AGMO:00000001", name = "Metadata Section Key", ref = "AGMO")), [])) \ No newline at end of file + |> List.map (fun p -> CvParam(p, ParamValue.CvValue (CvTerm.create(accession = "AGMO:00000001", name = "Metadata Section Key", ref = "AGMO")), [])) + + open ARCTokenization.StructuralOntology + + let allExpectedMetadataTermsFull = + ARCMock.AssayMetadataTokens( + Assay_File_Name = [@"measurement1\isa.assay.xlsx"], + Assay_Performer_First_Name = ["Oliver"; "Marius"], + Assay_Performer_Last_Name = ["Maus"; "Katz"], + Assay_Performer_Mid_Initials = [""; "G."], + Assay_Performer_Email = ["maus@nfdi4plants.org"], + Assay_Performer_Affiliation = ["RPTU University of Kaiserslautern"], + Assay_Performer_Roles = ["research assistant"], + Assay_Performer_Roles_Term_Accession_Number = ["http://purl.org/spar/scoro/research-assistant"], + Assay_Performer_Roles_Term_Source_REF = ["scoro"] + ) + |> List.concat // use flat list + + [] + let ``Simple study is parsed from filepath CvParam with all structural ontology terms in order`` () = + let fakePath = CvParam(cvTerm = AFSO.``File Path``, v = "Fixtures/correct/assay_simple.xlsx") + let actual = + [fakePath] + |> Assay.parseMetadataSheetsFromTokens( + FileName = "assay_simple.xlsx" + ) + |> Seq.head + Assert.All((List.zip allExpectedMetadataTermsFull actual), (fun (expected,actual) -> + CvParam.structuralEquality (expected) (actual :?> CvParam) + )) \ No newline at end of file diff --git a/tests/ARCTokenization.Tests/IntegrationTests/InvestigationMetadata.fs b/tests/ARCTokenization.Tests/IntegrationTests/InvestigationMetadata.fs index 6fb8120..e198d0a 100644 --- a/tests/ARCTokenization.Tests/IntegrationTests/InvestigationMetadata.fs +++ b/tests/ARCTokenization.Tests/IntegrationTests/InvestigationMetadata.fs @@ -10,8 +10,8 @@ module InvestigationMetadata = open TestUtils - let parsedInvestigationMetadataEmpty = Investigation.parseMetadataSheetFromFile "Fixtures/incorrect/investigation_empty.xlsx" - let parsedInvestigationMetadataSimple = Investigation.parseMetadataSheetFromFile "Fixtures/correct/investigation_simple.xlsx" + let parsedInvestigationMetadataEmpty = Investigation.parseMetadataSheetFromFile() "Fixtures/incorrect/investigation_empty.xlsx" + let parsedInvestigationMetadataSimple = Investigation.parseMetadataSheetFromFile() "Fixtures/correct/investigation_simple.xlsx" let allExpectedMetadataTermsEmpty = Terms.InvestigationMetadata.nonObsoleteNonRootCvTerms @@ -55,4 +55,20 @@ module InvestigationMetadata = let ``Simple investigation is parsed with all structural ontology terms in order`` () = Assert.All((List.zip allExpectedMetadataTermsFull parsedInvestigationMetadataSimple), (fun (expected,actual) -> CvParam.structuralEquality (expected) (actual :?> CvParam) + )) + + open ARCTokenization.StructuralOntology + + [] + let ``Simple investigation is parsed from filepath CvParam with all structural ontology terms in order`` () = + let fakePath = CvParam(cvTerm = AFSO.``File Path``, v = "Fixtures/correct/investigation_simple.xlsx") + let actual = + [fakePath] + |> Investigation.parseMetadataSheetsFromTokens( + FileName = "investigation_simple.xlsx" + ) + |> Seq.head + + Assert.All((List.zip allExpectedMetadataTermsFull actual), (fun (expected,actual) -> + CvParam.structuralEquality (expected) (actual :?> CvParam) )) \ No newline at end of file diff --git a/tests/ARCTokenization.Tests/IntegrationTests/StudyMetadata.fs b/tests/ARCTokenization.Tests/IntegrationTests/StudyMetadata.fs index 287cbf7..3404ab4 100644 --- a/tests/ARCTokenization.Tests/IntegrationTests/StudyMetadata.fs +++ b/tests/ARCTokenization.Tests/IntegrationTests/StudyMetadata.fs @@ -12,4 +12,28 @@ module StudyMetadata = let allExpectedMetadataTermsEmpty = Terms.StudyMetadata.nonObsoleteNonRootCvTerms - |> List.map (fun p -> CvParam(p, ParamValue.CvValue (CvTerm.create(accession = "AGMO:00000001", name = "Metadata Section Key", ref = "AGMO")), [])) \ No newline at end of file + |> List.map (fun p -> CvParam(p, ParamValue.CvValue (CvTerm.create(accession = "AGMO:00000001", name = "Metadata Section Key", ref = "AGMO")), [])) + + let allExpectedMetadataTermsFull = + ARCMock.StudyMetadataTokens( + Study_Identifier = ["experiment1_material"], + Study_Title = ["Prototype for experimental data"], + Study_Description = ["In this a devised study to have an exemplary experimental material description."], + Study_File_Name = [@"experiment1_material\isa.study.xlsx"] + ) + |> List.concat // use flat list + + open ARCTokenization.StructuralOntology + + [] + let ``Simple study is parsed from filepath CvParam with all structural ontology terms in order`` () = + let fakePath = CvParam(cvTerm = AFSO.``File Path``, v = "Fixtures/correct/study_simple.xlsx") + let actual = + [fakePath] + |> Study.parseMetadataSheetsFromTokens( + FileName = "study_simple.xlsx" + ) + |> Seq.head + Assert.All((List.zip allExpectedMetadataTermsFull actual), (fun (expected,actual) -> + CvParam.structuralEquality (expected) (actual :?> CvParam) + )) \ No newline at end of file