# .Net機械学習チュートリアル - 感情分析

[MSDNのチュートリアル](https://docs.microsoft.com/ja-jp/dotnet/machine-learning/tutorials/sentiment-analysis)に従って。まずはMicrosoft.MLをnugetで取得

In [3]:
#r "nuget:Microsoft.ML"

In [4]:
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
using static Microsoft.ML.DataOperationsCatalog;
using Microsoft.ML.Trainers;
using Microsoft.ML.Transforms.Text;

datapathは`static readonly`で用意しておく

In [5]:
static readonly string _dataPath = Path.Combine(Environment.CurrentDirectory, "Data", "yelp_labelled.txt");

## クラスの準備

In [51]:
public class SentimentData
{
    [LoadColumn(0)]
    public string SentimentText;

    [LoadColumn(1), ColumnName("Label")]
    public bool Sentiment;
}

public class SentimentPrediction : SentimentData
{

    [ColumnName("PredictedLabel")]
    public bool Prediction { get; set; }

    public float Probability { get; set; }

    public float Score { get; set; }
}

// display
Console.WriteLine("[SentimentData]");
display(new SentimentData());
Console.WriteLine("[SentimentPreciction]");
display(new SentimentPrediction());

[SentimentData]


SentimentText,Sentiment
<null>,False


[SentimentPreciction]


Prediction,Probability,Score,SentimentText,Sentiment
False,0,0,<null>,False


## データを読み込む

In [62]:
var mlContext = new MLContext();

// データを読み込む
IDataView dataView = mlContext.Data.LoadFromTextFile<SentimentData>(_dataPath, hasHeader: false);
// TrainTestSplitで  Train:Test = 0.2:0.8 とする
var splitDataView = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.2);

## モデルの構築とトレーニング

In [65]:
%%time

var estimator = 
    mlContext.Transforms.Text
      .FeaturizeText(outputColumnName: "Features", inputColumnName: nameof(SentimentData.SentimentText))
      .Append(mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(labelColumnName: "Label", featureColumnName: "Features"));
    
Console.WriteLine("=============== Create and Train the Model ===============");
var splitTrainSet = splitDataView.TrainSet;
var model = estimator.Fit(splitTrainSet);
Console.WriteLine("=============== End of training ===============");



Wall time: 287.0753ms

In [86]:
var splitTestSet = splitDataView.TestSet;
IDataView predictions = model.Transform(splitTestSet);
CalibratedBinaryClassificationMetrics metrics = mlContext.BinaryClassification.Evaluate(predictions, "Label");

Console.WriteLine("=============== Evaluating Model accuracy with Test data===============");
Console.WriteLine("Model quality metrics evaluation");
Console.WriteLine("--------------------------------");
Console.WriteLine($"Accuracy: {metrics.Accuracy:P2}");
Console.WriteLine($"Auc: {metrics.AreaUnderRocCurve:P2}");
Console.WriteLine($"F1Score: {metrics.F1Score:P2}");
Console.WriteLine("=============== End of model evaluation ===============");

Model quality metrics evaluation
--------------------------------
Accuracy: 83.42%
Auc: 90.48%
F1Score: 83.60%


## テストデータを予測する

In [80]:
PredictionEngine<SentimentData, SentimentPrediction> predictionFunction = mlContext.Model.CreatePredictionEngine<SentimentData, SentimentPrediction>(model);
SentimentData sampleStatement = new SentimentData{SentimentText = "This was a very bad steak"};    
var resultPrediction = predictionFunction.Predict(sampleStatement);

display(resultPrediction);

Prediction,Probability,Score,SentimentText,Sentiment
False,0.10682983,-2.12354,This was a very bad steak,False


## バッチ予測

In [79]:
// Adds some comments to test the trained model's data points.
IEnumerable<SentimentData> sentiments = new[]
{
    new SentimentData{SentimentText = "This was a horrible meal"},
    new SentimentData{SentimentText = "I love this spaghetti."}
};

// Load batch comments just created 
IDataView batchComments = mlContext.Data.LoadFromEnumerable(sentiments);
IDataView predictions = model.Transform(batchComments);
// Use model to predict whether comment data is Positive (1) or Negative (0).
IEnumerable<SentimentPrediction> predictedResults = mlContext.Data.CreateEnumerable<SentimentPrediction>(predictions, reuseRowObject: false);

display(predictedResults);

index,Prediction,Probability,Score,SentimentText,Sentiment
0,False,0.14584976,-1.76753,This was a horrible meal,False
1,True,0.99650645,5.6533427,I love this spaghetti.,False
