In [1]:
#r "nuget:DwC-A_dotnet, 0.7.0"
#r "nuget:DwC-A_dotnet.Interactive, 0.1.10-Pre"

Loading extensions from `DwC-A_dotnet.Interactive.dll`

In [2]:
#!terms

Name,Term,Description
acceptedNameUsage,http://rs.tdwg.org/dwc/terms/acceptedNameUsage,"The full name, with authorship and date information if known, of the currently valid (zoological) or accepted (botanical) taxon."
acceptedNameUsageID,http://rs.tdwg.org/dwc/terms/acceptedNameUsageID,An identifier for the name usage (documented meaning of the name according to a source) of the currently valid (zoological) or accepted (botanical) taxon.
accessRights,http://purl.org/dc/terms/accessRights,Information about who can access the resource or an indication of its security status.
associatedMedia,http://rs.tdwg.org/dwc/terms/associatedMedia,"A list (concatenated and separated) of identifiers (publication, global unique identifier, URI) of media associated with the Occurrence."
associatedOccurrences,http://rs.tdwg.org/dwc/terms/associatedOccurrences,A list (concatenated and separated) of identifiers of other Occurrence records and their associations to this Occurrence.
associatedOrganisms,http://rs.tdwg.org/dwc/terms/associatedOrganisms,A list (concatenated and separated) of identifiers of other Organisms and the associations of this Organism to each of them.
associatedReferences,http://rs.tdwg.org/dwc/terms/associatedReferences,"A list (concatenated and separated) of identifiers (publication, bibliographic reference, global unique identifier, URI) of literature associated with the Occurrence."
associatedSequences,http://rs.tdwg.org/dwc/terms/associatedSequences,"A list (concatenated and separated) of identifiers (publication, global unique identifier, URI) of genetic sequence information associated with the Occurrence."
associatedTaxa,http://rs.tdwg.org/dwc/terms/associatedTaxa,A list (concatenated and separated) of identifiers or names of taxa and the associations of this Occurrence to each of them.
basisOfRecord,http://rs.tdwg.org/dwc/terms/basisOfRecord,The specific nature of the data record.


In [3]:
using DwC_A;
using DwC_A.Builders;
using DwC_A.Meta;
using DwC_A.Terms;

var fieldMetaDataBuilder = FieldsMetaDataBuilder.Fields()
    .AddField(f => f.Term(Terms.geodeticDatum).Default("WGS84"))
    .AutomaticallyIndex()
    .AddField(f => f.Term("id"))
    .AddField(f => f.Term(Terms.dateIdentified))
    .AddField(f => f.Term(Terms.recordedBy))
    .AddField(f => f.Term(Terms.decimalLatitude))
    .AddField(f => f.Term(Terms.decimalLongitude))
    .AddField(f => f.Term(Terms.license))
    .AddField(f => f.Term(Terms.kingdom))
    .AddField(f => f.Term(Terms.phylum))
    .AddField(f => f.Term(Terms.@class))
    .AddField(f => f.Term(Terms.order))
    .AddField(f => f.Term(Terms.genus))
    .AddField(f => f.Term(Terms.specificEpithet))
    .AddField(f => f.Term(Terms.scientificName))
    .AddField(f => f.Term(Terms.otherCatalogNumbers).Delimiter("|"));

In [4]:
var identifier = "http://purl.org/dc/terms/identifier";


var multiMediaMetaDataBuilder = FieldsMetaDataBuilder.Fields()
    .AutomaticallyIndex()
    .AddField(f => f.Term("id"))
    .AddField(f => f.Term(Terms.type))
    .AddField(f => f.Term(identifier))
    .AddField(f => f.Term(Terms.references));

In [5]:
var fileMetaData = CoreFileMetaDataBuilder.File("occurrence.txt")
    .FieldsEnclosedBy("\"")
    .FieldsTerminatedBy("\\t")
    .LinesTerminatedBy("\\n")
    .IgnoreHeaderLines(1)
    .Encoding(Encoding.UTF8)
    .Index(0)
    .RowType(RowTypes.Occurrence)
    .AddFields(fieldMetaDataBuilder);

In [6]:
var multimediaFileMetaData = ExtensionFileMetaDataBuilder.File("multimedia.txt")
    .CoreIndex(0)
    .RowType(RowTypes.Identification)
    .AddFields(multiMediaMetaDataBuilder);

In [7]:
using System.Text.Json;
using System.IO;

//Add a builder context so files are written under a subdirectory here
var context = new BuilderContext("./MyObservations", false);


var json = "./Data/observations.json";

var doc = JsonDocument.Parse(File.ReadAllText(json));

var fileBuilder = FileBuilder.MetaData(fileMetaData)
    .Context(context)
    .BuildRows(rowBuilder => BuildCoreRows(rowBuilder));

IEnumerable<string> BuildCoreRows(RowBuilder rowBuilder)
{
    foreach(var node in doc.RootElement.EnumerateArray())
    {
        yield return rowBuilder.AddField(node.GetProperty("id"))
            .AddField(node.GetProperty("time_observed_at"))
            .AddField(node.GetProperty("user_name"))
            .AddField(node.GetProperty("latitude"))
            .AddField(node.GetProperty("longitude"))
            .AddField(node.GetProperty("license"))
            .AddField(node.GetProperty("taxon_kingdom_name"))
            .AddField(node.GetProperty("taxon_phylum_name"))
            .AddField(node.GetProperty("taxon_class_name"))
            .AddField(node.GetProperty("taxon_order_name"))
            .AddField(node.GetProperty("taxon_genus_name"))
            .AddField(node.GetProperty("taxon_species_name"))
            .AddField(node.GetProperty("scientific_name"))
            .AddField(node.GetProperty("catalogNumber"))
            .Build();
    }
}

In [8]:
var multimediaFileBuilder = FileBuilder.MetaData(multimediaFileMetaData)
    .Context(context)
    .BuildRows(rowBuilder => BuildMultiMediaRows(rowBuilder));

public static string NullIfEmpty(this string s)
{
    return string.IsNullOrEmpty(s) ? null : s;
}

IEnumerable<string> BuildMultiMediaRows(RowBuilder rowBuilder)
{
    var mediaRows = doc.RootElement
        .EnumerateArray()
        .Select(n => new {
            id = n.GetProperty("id").ToString(),
            image = n.GetProperty("image_url").ToString(),
            sound = n.GetProperty("sound_url").ToString(),
            url = n.GetProperty("url").ToString()
        });

    foreach(var row in mediaRows)
    {
        var mediaUrl = row.image.NullIfEmpty() ?? row.sound;
        var type = row.image == "" ? "SoundRecording" : "StillImage";
        yield return rowBuilder.AddField(row.id)
            .AddField(type)
            .AddField(mediaUrl)
            .AddField(row.url)
            .Build();
    }
}

In [9]:
using DwC_A.Writers;

ArchiveWriter.CoreFile(fileBuilder, fileMetaData)
    .AddExtensionFile(multimediaFileBuilder, multimediaFileMetaData)
    .Context(context)
    .Build("MyObservations.zip");

In [10]:
var archive = new ArchiveReader("./MyObservations.zip");
archive.Display();
archive.CoreFile.Display();
archive.CoreFile.DataRows.Display();
archive.Extensions.GetFileReaderByFileName("multimedia.txt").Display();

var multimedia = archive.Extensions.GetFileReaderByFileName("multimedia.txt");
multimedia.DataRows.Display();


File Type,File Name,Row Type
CoreFile,occurrence.txt,http://rs.tdwg.org/dwc/terms/Occurrence
Extension:,multimedia.txt,http://rs.tdwg.org/dwc/terms/Identification
Metadata:,<null>,


Index,Name,Term,Vocabulary,Default,Delimiter
0,id*,id,<null>,<null>,<null>
1,dateIdentified,http://rs.tdwg.org/dwc/terms/dateIdentified,<null>,<null>,<null>
2,recordedBy,http://rs.tdwg.org/dwc/terms/recordedBy,<null>,<null>,<null>
3,decimalLatitude,http://rs.tdwg.org/dwc/terms/decimalLatitude,<null>,<null>,<null>
4,decimalLongitude,http://rs.tdwg.org/dwc/terms/decimalLongitude,<null>,<null>,<null>
5,license,http://purl.org/dc/terms/license,<null>,<null>,<null>
6,kingdom,http://rs.tdwg.org/dwc/terms/kingdom,<null>,<null>,<null>
7,phylum,http://rs.tdwg.org/dwc/terms/phylum,<null>,<null>,<null>
8,class,http://rs.tdwg.org/dwc/terms/class,<null>,<null>,<null>
9,order,http://rs.tdwg.org/dwc/terms/order,<null>,<null>,<null>


id,dateIdentified,recordedBy,decimalLatitude,decimalLongitude,license,kingdom,phylum,class,order,genus,specificEpithet,scientificName,otherCatalogNumbers,geodeticDatum
15977375,2018-08-28 16:57:34 UTC,pjoiner,32.9834594727,-97.1735916138,CC-BY-NC,Animalia,Arthropoda,Insecta,Lepidoptera,Amorpha,juglandis,Amorpha juglandis,S-1|1,WGS84
16232977,2018-09-05 15:29:36 UTC,pjoiner,32.9843521118,-97.1738128662,CC-BY-NC,Animalia,Chordata,Aves,Pelecaniformes,Ardea,herodias,Ardea herodias,S-2|2,WGS84
22118411,2019-04-07 00:13:03 UTC,pjoiner,32.81603405,-96.73988205,CC-BY-NC,Animalia,Arthropoda,Insecta,Hemiptera,Acanthocephala,declivis,Acanthocephala declivis,L-1|3,WGS84
22722299,2019-04-19 20:07:24 UTC,pjoiner,32.98429919,-97.17300588,CC-BY-NC,Animalia,Arthropoda,Insecta,Lepidoptera,Acronicta,insularis,Acronicta insularis,S-3|4,WGS84
24238146,2019-05-01 00:32:37 UTC,pjoiner,<null>,<null>,CC-BY-NC,Animalia,Arthropoda,Chilopoda,Scutigeromorpha,Scutigera,<null>,Scutigera,G-1|5,WGS84
24702094,2019-05-04 17:04:19 UTC,pjoiner,32.8657143,-96.9821132,CC-BY-NC,Animalia,Chordata,Reptilia,Squamata,Sceloporus,olivaceus,Sceloporus olivaceus,I-1|6,WGS84
25216585,2019-05-16 15:46:52 UTC,pjoiner,32.98498773,-97.17462759,CC-BY-NC,Animalia,Arthropoda,Insecta,Odonata,Anax,junius,Anax junius,S-5|7,WGS84
26076819,<null>,pjoiner,32.8179358521,-96.7379645258,CC-BY-NC,Animalia,Arthropoda,Insecta,Lepidoptera,Automeris,io,Automeris io,L-2|8,WGS84
27089654,2019-06-16 15:00:26 UTC,pjoiner,32.9674835205,-97.0188446045,CC-BY-NC,Plantae,Tracheophyta,Magnoliopsida,Sapindales,Zanthoxylum,clava-herculis,Zanthoxylum clava-herculis,C-1|9,WGS84
27089903,2019-06-16 15:05:09 UTC,pjoiner,32.9675598145,-97.017250061,CC-BY-NC,Animalia,Arthropoda,Insecta,Hymenoptera,Pogonomyrmex,barbatus,Pogonomyrmex barbatus,C-2|10,WGS84


Index,Name,Term,Vocabulary,Default,Delimiter
0,id*,id,<null>,<null>,<null>
1,type,http://purl.org/dc/elements/1.1/type,<null>,<null>,<null>
2,identifier,http://purl.org/dc/terms/identifier,<null>,<null>,<null>
3,references,http://purl.org/dc/terms/references,<null>,<null>,<null>
*Indicates index column,,,,,


id,type,identifier,references
15977375,StillImage,https://inaturalist-open-data.s3.amazonaws.com/photos/23918380/medium.jpeg,https://www.inaturalist.org/observations/15977375
16232977,StillImage,https://inaturalist-open-data.s3.amazonaws.com/photos/24364254/medium.jpeg,https://www.inaturalist.org/observations/16232977
22118411,StillImage,https://inaturalist-open-data.s3.amazonaws.com/photos/34240040/medium.jpeg,https://www.inaturalist.org/observations/22118411
22722299,StillImage,https://inaturalist-open-data.s3.amazonaws.com/photos/35210678/medium.jpeg,https://www.inaturalist.org/observations/22722299
24238146,StillImage,https://inaturalist-open-data.s3.amazonaws.com/photos/37482263/medium.jpeg,https://www.inaturalist.org/observations/24238146
24702094,StillImage,https://inaturalist-open-data.s3.amazonaws.com/photos/38199510/medium.jpeg,https://www.inaturalist.org/observations/24702094
25216585,StillImage,https://inaturalist-open-data.s3.amazonaws.com/photos/39035359/medium.jpeg,https://www.inaturalist.org/observations/25216585
26076819,StillImage,https://inaturalist-open-data.s3.amazonaws.com/photos/40457902/medium.jpg,https://www.inaturalist.org/observations/26076819
27089654,StillImage,https://inaturalist-open-data.s3.amazonaws.com/photos/42112397/medium.jpeg,https://www.inaturalist.org/observations/27089654
27089903,StillImage,https://inaturalist-open-data.s3.amazonaws.com/photos/42112751/medium.jpeg,https://www.inaturalist.org/observations/27089903
