### Using DataFrames in C#

* [Link to article](https://swharden.com/blog/2022-05-01-dotnet-dataframe/)
* [Microsoft Learning](https://learn.microsoft.com/en-us/dotnet/api/microsoft.data.analysis.dataframe?view=ml-dotnet-preview)
* [Deedle](http://bluemountaincapital.github.io/Deedle/csharpintro.html)


In [None]:
#r "nuget:Microsoft.Data.Analysis"

using Microsoft.Data.Analysis;

In [None]:
string[] names = { "Oliver", "Charlotte", "Henry", "Amelia", "Owen" };
int[] ages = { 23, 19, 42, 64, 35 };
double[] heights = { 1.91, 1.62, 1.72, 1.57, 1.85 };


DataFrameColumn[] columns = {
    new StringDataFrameColumn("Name", names),
    new PrimitiveDataFrameColumn<int>("Age", ages),
    new PrimitiveDataFrameColumn<double>("Height", heights),
};

DataFrame df = new(columns);

//Console.Write(df);
df


### Append a Row

In [None]:
List<KeyValuePair<string, object>> newRowData = new()
{
    new KeyValuePair<string, object>("Name", "Scott"),
    new KeyValuePair<string, object>("Age", 36),
    new KeyValuePair<string, object>("Height", 1.65),
};

df.Append(newRowData, inPlace: true);

df

### Append a Column

In [None]:
Random randNum = new Random();
int min = 120;
int max = 140;
int count = (int)df.Rows.Count;
int[] weights = Enumerable.Range(0, count).Select(i=>randNum.Next(min, max)).ToArray<int>(); //{ 123, 321, 111, 121, 130 };
PrimitiveDataFrameColumn<int> weightCol = new("Weight", weights);
df.Columns.Add(weightCol);

df


### Sort and Filter

In [None]:
DataFrame df2 = df.OrderBy("Name")
    .Filter(df["Age"].ElementwiseGreaterThan(30));

df2.Display();

### Mathematical Operations

In [None]:
DataFrameColumn iqCol = df["Age"] * df["Height"] * 1.5;

double[] iqs = Enumerable.Range(0, (int)iqCol.Length)
    .Select(x => (double)iqCol[x])
    .ToArray();

df.Columns.Add(new PrimitiveDataFrameColumn<double>("IQ", iqs));

df.Display();

### Statistical Operations
* [LinqStatistics](https://github.com/dkackman/LinqStatistics)

In [None]:
#r "nuget:LinqStatistics"

using LinqStatistics;

In [None]:
foreach (DataFrameColumn col in df.Columns.Skip(1))
{
    // warning: additional care must be taken for datasets which contain null
    double[] values = Enumerable.Range(0, (int)col.Length).Select(x => Convert.ToDouble(col[x])).ToArray();
    double mean = values.Average();
    double std  = values.StandardDeviation();
    Console.WriteLine($"{col.Name} = {mean} +/- {std:N3} (n={values.Length})");
}