In [46]:
// ML.NET Nuget packages installation
#r "nuget:Microsoft.ML,1.3.1"
//Install XPlot package
#r "nuget:XPlot.Plotly,2.0.0"

using System.IO;
using Microsoft.ML;
using Microsoft.ML.Data;
using XPlot.Plotly;

In [47]:
 public class PaxTrip
    { //"PassengerId","Survived","Pclass","Name","Sex","Age","SibSp","Parch","Ticket","Fare","Cabin","Embarked"
        //[LoadColumn(0)] public float PassengerId { get; set; }
        public bool Label { get; set; }
        public float Pclass { get; set; }
        public string Name { get; set; }
        public string Sex { get; set; }
        public string RawAge { get; set; }
        public float SibSp { get; set; }
        public float Parch { get; set; }
        public string Ticket { get; set; }
        public float Fare { get; set; }
        public string Cabin { get; set; }
        public string Embarked { get; set; }

    }
    public class PaxTripFarePrediction
    {
        [ColumnName("PredictedLabel")] public bool Prediction;
        public float Probability;
        public float Score;
    }
    /// <summary>
    /// The RawAge class is a helper class for a column transformation.
    /// </summary>
    public class FromAge
    {
        public string RawAge;
    }

    /// <summary>
    /// The ProcessedAge class is a helper class for a column transformation.
    /// </summary>
    public class ToAge
    {
        public string Age;
    }

In [49]:
display(h1("Loading data...."));
 
MLContext mlContext = new MLContext(seed: 0);

// set up a text loader
            var textLoader = mlContext.Data.CreateTextLoader(
                new TextLoader.Options()
                {
                    Separators = new[] { ',' },
                    HasHeader = true,
                    AllowQuoting = true,
                    Columns = new[]
                    {
                    new TextLoader.Column("Label", DataKind.Boolean, 1),
                    new TextLoader.Column("Pclass", DataKind.Single, 2),
                    new TextLoader.Column("Name", DataKind.String, 3),
                    new TextLoader.Column("Sex", DataKind.String, 4),
                    new TextLoader.Column("RawAge", DataKind.String, 5),  // <-- not a float!
                    new TextLoader.Column("SibSp", DataKind.Single, 6),
                    new TextLoader.Column("Parch", DataKind.Single, 7),
                    new TextLoader.Column("Ticket", DataKind.String, 8),
                    new TextLoader.Column("Fare", DataKind.Single, 9),
                    new TextLoader.Column("Cabin", DataKind.String, 10),
                    new TextLoader.Column("Embarked", DataKind.String, 11)
                    }
                }
            );
 
string trainingDataPath = Path.Combine(Environment.CurrentDirectory, "train_data.csv");
string testDataPath = Path.Combine(Environment.CurrentDirectory, "test_data.csv");
 
IDataView trainDataView = textLoader.Load(trainingDataPath);
IDataView testDataView = textLoader.Load(testDataPath);
 
display(h4("Schema of training DataView:"));
display(trainDataView.Schema);

index,Name,Index,IsHidden,Type,Annotations
0,Label,0,False,{ Microsoft.ML.Data.BooleanDataViewType: RawType: System.Boolean },{ Microsoft.ML.DataViewSchema+Annotations: Schema: [ ] }
1,Pclass,1,False,{ Microsoft.ML.Data.NumberDataViewType: RawType: System.Single },{ Microsoft.ML.DataViewSchema+Annotations: Schema: [ ] }
2,Name,2,False,{ Microsoft.ML.Data.TextDataViewType: RawType: System.ReadOnlyMemory<System.Char> },{ Microsoft.ML.DataViewSchema+Annotations: Schema: [ ] }
3,Sex,3,False,{ Microsoft.ML.Data.TextDataViewType: RawType: System.ReadOnlyMemory<System.Char> },{ Microsoft.ML.DataViewSchema+Annotations: Schema: [ ] }
4,RawAge,4,False,{ Microsoft.ML.Data.TextDataViewType: RawType: System.ReadOnlyMemory<System.Char> },{ Microsoft.ML.DataViewSchema+Annotations: Schema: [ ] }
5,SibSp,5,False,{ Microsoft.ML.Data.NumberDataViewType: RawType: System.Single },{ Microsoft.ML.DataViewSchema+Annotations: Schema: [ ] }
6,Parch,6,False,{ Microsoft.ML.Data.NumberDataViewType: RawType: System.Single },{ Microsoft.ML.DataViewSchema+Annotations: Schema: [ ] }
7,Ticket,7,False,{ Microsoft.ML.Data.TextDataViewType: RawType: System.ReadOnlyMemory<System.Char> },{ Microsoft.ML.DataViewSchema+Annotations: Schema: [ ] }
8,Fare,8,False,{ Microsoft.ML.Data.NumberDataViewType: RawType: System.Single },{ Microsoft.ML.DataViewSchema+Annotations: Schema: [ ] }
9,Cabin,9,False,{ Microsoft.ML.Data.TextDataViewType: RawType: System.ReadOnlyMemory<System.Char> },{ Microsoft.ML.DataViewSchema+Annotations: Schema: [ ] }


In [50]:
//Util class to preview loaded data in IDataView

public static List<PaxTrip> Head(MLContext mlContext, IDataView dataView, int numberOfRows = 4)
{
    string msg = string.Format("DataView: Showing {0} rows with the columns", numberOfRows.ToString());
    display(msg);
          
    var rows = mlContext.Data.CreateEnumerable<PaxTrip>(dataView, reuseRowObject: false)
                    .Take(numberOfRows)
                    .ToList();
    
    return rows;
}


display("Showing 10 rows");
var tenRows = Head(mlContext,trainDataView, 4);
display(tenRows);

Showing 10 rows

DataView: Showing 4 rows with the columns

index,Label,Pclass,Name,Sex,RawAge,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,True,1,"Thorne, Mrs. Gertrude Maybelle",female,,0,0,PC 17585,79.2,,C
1,True,1,"Silverthorne, Mr. Spencer Victor",male,35.0,0,0,PC 17475,26.2875,E24,S
2,False,3,"Asim, Mr. Adola",male,35.0,0,0,SOTON/O.Q. 3101310,7.05,,S
3,False,3,"Ali, Mr. William",male,25.0,0,0,SOTON/O.Q. 3101312,7.05,,S


In [54]:
var pax = mlContext.Data.CreateEnumerable<PaxTrip>(trainDataView, reuseRowObject: false).ToArray();

// plot count of pax by sex
var chart = Chart.Plot(new Graph.Histogram()
    {
        x = pax.Select(v => v.Sex), 
        autobinx = false
    }
);
var layout = new Layout.Layout(){title="Passengers by Sex"};
chart.WithLayout(layout);
chart.WithXTitle("Sex");
chart.WithYTitle("Count");;
display(chart);

In [56]:
// plot count of pax by city embarked
var chart = Chart.Plot(new Graph.Histogram()
    {
        x = pax.Select(v => v.Embarked), 
        autobinx = false
    }
);
var layout = new Layout.Layout(){title="Passengers by Port Embarked"};
chart.WithLayout(layout);
chart.WithXTitle("Port");
chart.WithYTitle("Count");;
display(chart);

In [57]:
// plot count of pax by city embarked
var chart = Chart.Plot(new Graph.Histogram()
    {
        x = pax.Select(v => v.Label), 
        autobinx = false
    }
);
var layout = new Layout.Layout(){title="Count by Survival"};
chart.WithLayout(layout);
chart.WithXTitle("Survived");
chart.WithYTitle("Count");;
display(chart);

In [None]:
// Define group for Actual values 
var GenderBarGraph = new Graph.Bar()
{
    x = pax,
    y = pax.Select(v => v.Sex),
    name = "Sex"
};

// Define group for Prediction values 
var SurvivalBarGraph = new Graph.Bar()
{
    x = pax,
    y = pax.Select(v => v.Label),
    name = "Survived"
};

var chart = Chart.Plot(new[] {GenderBarGraph, SurvivalBarGraph});

var layout = new Layout.Layout(){barmode = "group", title="Survival by Gender"};
chart.WithLayout(layout);
chart.WithXTitle("Sex");
chart.WithYTitle("Count");
chart.WithLegend(true);
chart.Width = 700;
chart.Height = 400;

display(chart);