In [102]:
#r "nuget:Microsoft.ML,1.4.0"
#r "nuget:Microsoft.Data.Analysis"

In [103]:
using Microsoft.ML;
using Microsoft.Data.Analysis;
using Microsoft.ML.Data;
using XPlot.Plotly;
using System.Linq;

In [104]:
using Microsoft.AspNetCore.Html;
Formatter<DataFrame>.Register((df, writer) =>
{
    var headers = new List<IHtmlContent>();
    headers.Add(th(i("index")));
    headers.AddRange(df.Columns.Select(c => (IHtmlContent) th(c.Name)));
    var rows = new List<List<IHtmlContent>>();
    var take = 10;
    for (var i = 0; i < Math.Min(take, df.Rows.Count); i++)
    {
        var cells = new List<IHtmlContent>();
        cells.Add(td(i));
        foreach (var obj in df.Rows[i])
        {
            cells.Add(td(obj));
        }
        rows.Add(cells);
    }

    var t = table(
        thead(
            headers),
        tbody(
            rows.Select(
                r => tr(r))));

    writer.Write(t);
}, "text/html");

In [105]:
var data = DataFrame.LoadCsv("./bank.csv", separator: ';');

data

index,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,30,unemployed,married,primary,no,1787,no,no,cellular,19,oct,79,1,-1,0,unknown,no
1,33,services,married,secondary,no,4789,yes,yes,cellular,11,may,220,1,339,4,failure,no
2,35,management,single,tertiary,no,1350,yes,no,cellular,16,apr,185,1,330,1,failure,no
3,30,management,married,tertiary,no,1476,yes,yes,unknown,3,jun,199,4,-1,0,unknown,no
4,59,blue-collar,married,secondary,no,0,yes,no,unknown,5,may,226,1,-1,0,unknown,no
5,35,management,single,tertiary,no,747,no,no,cellular,23,feb,141,2,176,3,failure,no
6,36,self-employed,married,tertiary,no,307,yes,no,cellular,14,may,341,1,330,2,other,no
7,39,technician,married,secondary,no,147,yes,no,cellular,6,may,151,2,-1,0,unknown,no
8,41,entrepreneur,married,tertiary,no,221,yes,no,unknown,14,may,57,2,-1,0,unknown,no
9,43,services,married,primary,no,-88,yes,yes,cellular,17,apr,313,1,147,2,failure,no


In [106]:
data.Info()

index,Info,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,DataType,System.Single,System.String,System.String,System.String,System.String,System.Single,System.String,System.String,System.String,System.Single,System.String,System.Single,System.Single,System.Single,System.Single,System.String,System.String
1,Length (excluding null values),4521,4521,4521,4521,4521,4521,4521,4521,4521,4521,4521,4521,4521,4521,4521,4521,4521


In [107]:
data.Description()

index,Description,age,balance,day,duration,campaign,pdays,previous
0,Length (excluding null values),4521.0,4521.0,4521.0,4521.0,4521.0,4521.0,4521.0
1,Max,87.0,71188.0,31.0,3025.0,50.0,871.0,25.0
2,Min,19.0,-3313.0,1.0,4.0,1.0,-1.0,0.0
3,Mean,41.170094,1422.6578,15.915284,263.9613,2.7936296,39.766644,0.54257905


In [108]:
var jobs = data.GroupBy("job").Count();

jobs

index,job,age,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,unemployed,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128
1,services,417,417,417,417,417,417,417,417,417,417,417,417,417,417,417,417
2,management,969,969,969,969,969,969,969,969,969,969,969,969,969,969,969,969
3,blue-collar,946,946,946,946,946,946,946,946,946,946,946,946,946,946,946,946
4,self-employed,183,183,183,183,183,183,183,183,183,183,183,183,183,183,183,183
5,technician,768,768,768,768,768,768,768,768,768,768,768,768,768,768,768,768
6,entrepreneur,168,168,168,168,168,168,168,168,168,168,168,168,168,168,168,168
7,admin.,478,478,478,478,478,478,478,478,478,478,478,478,478,478,478,478
8,student,84,84,84,84,84,84,84,84,84,84,84,84,84,84,84,84
9,housemaid,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112,112


In [109]:
Chart.Plot(
    new Graph.Bar()
    {
        x = jobs.Columns["job"],
        y = jobs.Columns["age"]
    }
)

In [110]:
var marital = data.GroupBy("marital").Count();

marital

index,marital,age,job,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,married,2797,2797,2797,2797,2797,2797,2797,2797,2797,2797,2797,2797,2797,2797,2797,2797
1,single,1196,1196,1196,1196,1196,1196,1196,1196,1196,1196,1196,1196,1196,1196,1196,1196
2,divorced,528,528,528,528,528,528,528,528,528,528,528,528,528,528,528,528


In [111]:
Chart.Plot(
    new Graph.Bar()
    {
        x = marital.Columns["marital"],
        y = marital.Columns["age"]
    }
)

In [112]:
data.Columns["default"].ValueCounts()

index,Values,Counts
0,no,4445
1,yes,76


In [113]:
Chart.Plot(
    new Graph.Histogram()
    {
        x = data.Columns["default"]
    }
)

In [114]:
data

index,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,30,unemployed,married,primary,no,1787,no,no,cellular,19,oct,79,1,-1,0,unknown,no
1,33,services,married,secondary,no,4789,yes,yes,cellular,11,may,220,1,339,4,failure,no
2,35,management,single,tertiary,no,1350,yes,no,cellular,16,apr,185,1,330,1,failure,no
3,30,management,married,tertiary,no,1476,yes,yes,unknown,3,jun,199,4,-1,0,unknown,no
4,59,blue-collar,married,secondary,no,0,yes,no,unknown,5,may,226,1,-1,0,unknown,no
5,35,management,single,tertiary,no,747,no,no,cellular,23,feb,141,2,176,3,failure,no
6,36,self-employed,married,tertiary,no,307,yes,no,cellular,14,may,341,1,330,2,other,no
7,39,technician,married,secondary,no,147,yes,no,cellular,6,may,151,2,-1,0,unknown,no
8,41,entrepreneur,married,tertiary,no,221,yes,no,unknown,14,may,57,2,-1,0,unknown,no
9,43,services,married,primary,no,-88,yes,yes,cellular,17,apr,313,1,147,2,failure,no


In [115]:
var context = new MLContext();

In [116]:
var trainTestSplit = context.Data.TrainTestSplit(data, testFraction: 0.2);

In [117]:
var dataView = (IDataView)data;

In [118]:
var stringColumns = dataView.Schema
    .Select(col => col.Name)
    .Where(c => !new[] { "age", "balance", "day", "duration", "campaign", "pdays", "previous" }.Contains(c))
    .ToArray();

In [119]:
var textOptions = new Microsoft.ML.Transforms.Text.TextFeaturizingEstimator.Options();

var pipeline = context.Transforms.Conversion.ConvertType("Label", "default", DataKind.Boolean)
    .Append(context.Transforms.Text.FeaturizeText("Text", textOptions, stringColumns))
    .Append(context.Transforms.Concatenate("Features", "age", "balance"))
    .Append(context.Transforms.Concatenate("Features", "Features", "Text"))
    .Append(context.BinaryClassification.Trainers.LbfgsLogisticRegression());

In [120]:
var crossValidation = context.BinaryClassification.CrossValidate(trainTestSplit.TestSet, pipeline);

var averageAuc = crossValidation.Average(cv => cv.Metrics.AreaUnderRocCurve);

Console.WriteLine($"Average AUC - {averageAuc}");

Average AUC - 0.9020709435802997


In [121]:
var model = pipeline.Fit(trainTestSplit.TrainSet);

In [122]:
public class BankData
{
    public float age { get; set; }
    public string job { get; set; }
    public string marital { get; set; }
    public string education { get; set; }
    public string @default { get; set; }
    public float balance { get; set; }
    public string housing  { get; set; }
    public string loan { get; set; }
    public string contact { get; set; }
    public float day { get; set; }
    public string month { get; set; }
    public float duration { get; set; }
    public float campaign { get; set; }
    public float pdays { get; set; }
    public float previous { get; set; }
    public string poutcome { get; set; }
    public string y { get; set; }
}

In [123]:
public class BankPrediction 
{
    public bool PredictedLabel;
    public float Score; 
}

In [124]:
var predictionFunc = context.Model.CreatePredictionEngine<BankData, BankPrediction>(model);

In [125]:
var newItem = new BankData
{
    age = 22,
    job = "entrepreneur",
    marital = "single",
    education = "primary",
    balance = -900,
    housing = "yes",
    loan = "yes",
    contact = "cellular"
};

In [126]:
var prediction = predictionFunc.Predict(newItem);

prediction.PredictedLabel

In [127]:
var anotherItem = new BankData
{
    age = 44,
    job = "management",
    marital = "married",
    education = "secondary",
    balance = 4200,
    housing = "yes",
    loan = "no",
    contact = "cellular"
};

In [128]:
var newPrediction = predictionFunc.Predict(anotherItem);

newPrediction.PredictedLabel