### Using DataFrames in C#

* [Link to article](https://swharden.com/blog/2022-05-01-dotnet-dataframe/)
* [Microsoft Learning](https://learn.microsoft.com/en-us/dotnet/api/microsoft.data.analysis.dataframe?view=ml-dotnet-preview)
* [Deedle](http://bluemountaincapital.github.io/Deedle/csharpintro.html)


In [1]:
#r "nuget:Microsoft.Data.Analysis"

using Microsoft.Data.Analysis;

string[] names = { "Oliver", "Charlotte", "Henry", "Amelia", "Owen" };
int[] ages = { 23, 19, 42, 64, 35 };
double[] heights = { 1.91, 1.62, 1.72, 1.57, 1.85 };


Loading extensions from `C:\Users\micha\.nuget\packages\microsoft.data.analysis\0.20.0\interactive-extensions\dotnet\Microsoft.Data.Analysis.Interactive.dll`

In [2]:
DataFrameColumn[] columns = {
    new StringDataFrameColumn("Name", names),
    new PrimitiveDataFrameColumn<int>("Age", ages),
    new PrimitiveDataFrameColumn<double>("Height", heights),
};

DataFrame df = new(columns);

//Console.Write(df);
df


index,Name,Age,Height
0,Oliver,23,1.91
1,Charlotte,19,1.62
2,Henry,42,1.72
3,Amelia,64,1.57
4,Owen,35,1.85


### Append a Row

In [3]:
List<KeyValuePair<string, object>> newRowData = new()
{
    new KeyValuePair<string, object>("Name", "Scott"),
    new KeyValuePair<string, object>("Age", 36),
    new KeyValuePair<string, object>("Height", 1.65),
};

df.Append(newRowData, inPlace: true);

df

index,Name,Age,Height
0,Oliver,23,1.91
1,Charlotte,19,1.62
2,Henry,42,1.72
3,Amelia,64,1.57
4,Owen,35,1.85
5,Scott,36,1.65


### Append a Column

In [4]:
Random randNum = new Random();
int min = 120;
int max = 140;
int count = (int)df.Rows.Count;
int[] weights = Enumerable.Range(0, count).Select(i=>randNum.Next(min, max)).ToArray<int>(); //{ 123, 321, 111, 121, 130 };
PrimitiveDataFrameColumn<int> weightCol = new("Weight", weights);
df.Columns.Add(weightCol);

df


index,Name,Age,Height,Weight
0,Oliver,23,1.91,131
1,Charlotte,19,1.62,135
2,Henry,42,1.72,139
3,Amelia,64,1.57,134
4,Owen,35,1.85,138
5,Scott,36,1.65,128


### Sort and Filter

In [5]:
DataFrame df2 = df.OrderBy("Name")
    .Filter(df["Age"].ElementwiseGreaterThan(30));

df2.Display();

index,Name,Age,Height,Weight
0,Henry,42,1.72,139
1,Oliver,23,1.91,131
2,Owen,35,1.85,138
3,Scott,36,1.65,128


### Mathematical Operations

In [6]:
DataFrameColumn iqCol = df["Age"] * df["Height"] * 1.5;

double[] iqs = Enumerable.Range(0, (int)iqCol.Length)
    .Select(x => (double)iqCol[x])
    .ToArray();

df.Columns.Add(new PrimitiveDataFrameColumn<double>("IQ", iqs));

df.Display();

index,Name,Age,Height,Weight,IQ
0,Oliver,23,1.91,131,65.895
1,Charlotte,19,1.62,135,46.17
2,Henry,42,1.72,139,108.36
3,Amelia,64,1.57,134,150.72
4,Owen,35,1.85,138,97.125
5,Scott,36,1.65,128,89.1


### Statistical Operations
* [LinqStatistics](https://github.com/dkackman/LinqStatistics)

In [8]:
#r "nuget:LinqStatistics"

using LinqStatistics;

In [9]:
foreach (DataFrameColumn col in df.Columns.Skip(1))
{
    // warning: additional care must be taken for datasets which contain null
    double[] values = Enumerable.Range(0, (int)col.Length).Select(x => Convert.ToDouble(col[x])).ToArray();
    double mean = values.Average();
    double std  = values.StandardDeviation();
    Console.WriteLine($"{col.Name} = {mean} +/- {std:N3} (n={values.Length})");
}

Age = 36,5 +/- 15,984 (n=6)
Height = 1,72 +/- 0,134 (n=6)
Weight = 134,16666666666666 +/- 4,167 (n=6)
IQ = 92,895 +/- 36,131 (n=6)
