In [None]:
#r "nuget: Deedle.Interactive"
#r "nuget: FSharpAux"

In [None]:
type Publication = {
    PubType   : string
    Publisher : string
    Title     : string
    Year      : int
    Authors   : string
    DOI       : string
    OpenAccess: bool
}

In [None]:
open Deedle
let df = Frame.ReadCsv(@"pubs.txt", separators = "\t", hasHeaders = true)
df

In [None]:
let pubs =
    df
    |> Frame.mapRows (fun rk os ->
        {
            PubType   = os.GetAs<string>("Typ")
            Publisher = os.GetAs<string>("Publisher")
            Title     = os.GetAs<string>("Titel")
            Year      = os.GetAs<int>("Jahr")
            Authors   = os.GetAs<string>("Autoren").Split(" | ", StringSplitOptions.TrimEntries) |> String.concat " and "
            DOI       = os.GetAs<string>("DOI/PID/URL")
            OpenAccess= os.GetAs<string>("Open Access") = "open access"
        }
    )
    |> Series.values
    |> Array.ofSeq

pubs


In [None]:
let cleanType (t: string) =
    match t with
    | "Journal Article" -> "Journal"
    | "Conference Paper" -> "Conference"
    | "Book" -> "Book"
    | "Book Chapter" -> "Book Chapter"
    | "Thesis" -> "Thesis"
    | _ -> "Other"


In [None]:
open FSharpAux

let formatBibTex index (pub: Publication) = 
    $$"""@{{pub.PubType.Replace(" ", "_")}}{[index],
author = {{{pub.Authors}}},
title = {{{pub.Title}}},
journal = {{{pub.Publisher}}},
doi = {{{pub.DOI}}},
year = {{{pub.Year}}}[[note]]"""
    |> fun bibtex -> if pub.OpenAccess then bibtex.Replace("[[note]]", $$""",{{System.Environment.NewLine}}note = {open access}""") else bibtex.Replace("[[note]]", "")
    |> fun bibtex -> bibtex + System.Environment.NewLine + "}"
    |> fun bibtex -> bibtex.Replace("[index]", index.ToString())

pubs
|> Array.iteri (fun i p ->

    // not url safe!
    let fn = 
        p.Title
        |> Seq.map (fun c -> if Path.GetInvalidFileNameChars() |> Seq.contains c then '_' else c)
        |> Array.ofSeq
        |> String.fromCharArray
        |> fun s -> s.Replace(" ","_")
    File.WriteAllText(
        $"""C:\Users\schne\source\repos\nfdi4plants\nfdi4plants.github.io\src\content\publications\{i}.bib""", 
        formatBibTex i p
    )
)

In [None]:
open System
open System.Text
open System.Text.RegularExpressions

let parseBibtexStringDirty (bibtex:string) =
    let stopChars = [ '\r'; '\n'; '\t' ]

    let removeStopChars (s: string) =
        s
        |> Seq.filter (fun c -> not (stopChars |> Seq.contains c))
        |> Array.ofSeq
        |> String.fromCharArray


    let BibitemSplitRegex = @".*@(?<type>[^{]+){(?<id>[^,]*),(?<body>.+)}"
    let BibitemBodyAttributesRegex = @"(?<attribute>[^{}]*)\s*=\s*\{(?<value>(?:[^{}]|(?<open>\{)|(?<-open>\}))*(?(open)(?!)))\}(,|$)"

    let bibitem = removeStopChars(bibtex)
    let groups = Regex(BibitemSplitRegex).Match(bibitem).Groups

    let pubType = groups.Item("type").Value.Trim()

    Regex(BibitemBodyAttributesRegex).Matches(groups.Item("body").Value)
    |> Seq.cast<Match>
    |> Seq.map (fun m -> m.Groups["attribute"].Value.Trim(), m.Groups["value"].Value.Trim())
    |> Seq.append [|"type", pubType|]
    |> Map.ofSeq
    
//@"C:\Users\schne\source\repos\nfdi4plants\nfdi4plants.github.io\src\content\publications\featured\datahub.bib"
@"C:\Users\schne\source\repos\nfdi4plants\nfdi4plants.github.io\src\content\publications\5.bib"
|> File.ReadAllText
|> parseBibtexStringDirty


In [None]:
open FSharpAux

let formatBibTex (pub: Publication) = 
    $$"""@{{pub.PubType.Replace(" ", "_")}}{
author = {{{pub.Authors}}},
title = {{{pub.Title}}},
journal = {{{pub.Publisher}}},
doi = {{{pub.DOI}}},
year = {{{pub.Year}}}
[[note]]"""
    |> fun bibtex -> if pub.OpenAccess then bibtex.Replace("[[note]]", "note = {open access}") else bibtex.Replace("[[note]]", "")
    |> fun bibtex -> bibtex + System.Environment.NewLine + "}"

pubs
|> Array.iteri (fun i p ->

    // not url safe!
    let fn = 
        p.Title
        |> Seq.map (fun c -> if Path.GetInvalidFileNameChars() |> Seq.contains c then '_' else c)
        |> Array.ofSeq
        |> String.fromCharArray
        |> fun s -> s.Replace(" ","_")
    File.WriteAllText(
        $"""C:\Users\schne\source\repos\nfdi4plants\nfdi4plants.github.io\src\content\publications\{i}.bib""", 
        formatBibTex p
    )
)