# .Net機械学習チュートリアル - 感情分析

[MSDNのチュートリアル](https://docs.microsoft.com/ja-jp/dotnet/machine-learning/tutorials/sentiment-analysis)に従って。まずはMicrosoft.MLをnugetで取得

In [1]:
#r "nuget:Microsoft.ML,1.4.0"
#r "nuget:Microsoft.ML.FastTree"

In [2]:
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Microsoft.ML;
using Microsoft.ML.Data;
using static Microsoft.ML.DataOperationsCatalog;
using Microsoft.ML.Trainers;
using Microsoft.ML.Transforms.Text;

datapathは`static readonly`で用意しておく

In [3]:
static readonly string _dataPath = Path.Combine(Environment.CurrentDirectory, "data", "yelp_labelled.txt");
display(_dataPath);

/home/jovyan/notebooks/notes_dotnet/MLNet/data/yelp_labelled.txt

## クラスの準備

In [4]:
public class SentimentData
{
    [LoadColumn(0)]
    public string SentimentText;

    [LoadColumn(1), ColumnName("Label")]
    public bool Sentiment;
}

public class SentimentPrediction : SentimentData
{

    [ColumnName("PredictedLabel")]
    public bool Prediction { get; set; }

    public float Probability { get; set; }

    public float Score { get; set; }
}

// display
Console.WriteLine("[SentimentData]");
display(new SentimentData());
Console.WriteLine("[SentimentPreciction]");
display(new SentimentPrediction());

[SentimentData]


SentimentText,Sentiment
<null>,False


[SentimentPreciction]


Prediction,Probability,Score,SentimentText,Sentiment
False,0,0,<null>,False


## データを読み込む

In [5]:
var mlContext = new MLContext();

// データを読み込む
IDataView dataView = mlContext.Data.LoadFromTextFile<SentimentData>(_dataPath, hasHeader: false);
// TrainTestSplitで  Train:Test = 0.2:0.8 とする
var splitDataView = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.2);

## モデルの構築とトレーニング

In [6]:
%%time

var estimator = 
    mlContext.Transforms.Text
      // SentimentText列をFeaturizeして数値列化し、Features列とする
      .FeaturizeText(outputColumnName: "Features", inputColumnName: nameof(SentimentData.SentimentText))
      // SdcaLogisticRegression で分類
      .Append(mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(labelColumnName: "Label", featureColumnName: "Features"));
    

var splitTrainSet = splitDataView.TrainSet;
var model = estimator.Fit(splitTrainSet); // 学習実行

Wall time: 1838.9042000000002ms

In [7]:
var splitTestSet = splitDataView.TestSet;
IDataView predictions = model.Transform(splitTestSet);
CalibratedBinaryClassificationMetrics metrics = mlContext.BinaryClassification.Evaluate(predictions, "Label");

Console.WriteLine("=============== Evaluating Model accuracy with Test data===============");
Console.WriteLine("Model quality metrics evaluation");
Console.WriteLine("--------------------------------");
Console.WriteLine($"Accuracy: {metrics.Accuracy:P2}");
Console.WriteLine($"Auc: {metrics.AreaUnderRocCurve:P2}");
Console.WriteLine($"F1Score: {metrics.F1Score:P2}");
Console.WriteLine("=============== End of model evaluation ===============");

Model quality metrics evaluation
--------------------------------
Accuracy: 84.49%
Auc: 90.51%
F1Score: 84.49%


## テストデータを予測する

In [8]:
PredictionEngine<SentimentData, SentimentPrediction> predictionFunction = mlContext.Model.CreatePredictionEngine<SentimentData, SentimentPrediction>(model);
SentimentData sampleStatement = new SentimentData{SentimentText = "This was a very bad steak"};    
var resultPrediction = predictionFunction.Predict(sampleStatement);

display(resultPrediction);

Prediction,Probability,Score,SentimentText,Sentiment
False,0.09706937,-2.2302198,This was a very bad steak,False


## バッチ予測

In [9]:
// Adds some comments to test the trained model's data points.
IEnumerable<SentimentData> sentiments = new[]
{
    new SentimentData{SentimentText = "This was a horrible meal"},
    new SentimentData{SentimentText = "I love this spaghetti."}
};

// Load batch comments just created 
IDataView batchComments = mlContext.Data.LoadFromEnumerable(sentiments);
IDataView predictions = model.Transform(batchComments);
// Use model to predict whether comment data is Positive (1) or Negative (0).
IEnumerable<SentimentPrediction> predictedResults = mlContext.Data.CreateEnumerable<SentimentPrediction>(predictions, reuseRowObject: false);

display(predictedResults);

index,Prediction,Probability,Score,SentimentText,Sentiment
0,False,0.12665626,-1.9308524,This was a horrible meal,False
1,True,0.99589515,5.4914775,I love this spaghetti.,False
