In [None]:
#r "nuget:Microsoft.ML"

In [None]:
using System;
using System.IO;
using Microsoft.ML;
using Microsoft.ML.Data;
using static Microsoft.ML.DataOperationsCatalog;

In [None]:
public class SentimentIssue
{
    [LoadColumn(0)]
    public bool Label { get; set; }
    [LoadColumn(2)]
    public string Text { get; set; }
}

In [None]:
public class SentimentPrediction : SentimentIssue
{
    // ColumnName attribute is used to change the column name from
    // its default value, which is the name of the field.
    [ColumnName("PredictedLabel")]
    public bool Prediction { get; set; }

    // No need to specify ColumnName attribute, because the field
    // name "Probability" is the column name we want.
    public float Probability { get; set; }

    public float Score { get; set; }
}

In [None]:
var mlContext = new MLContext(seed: 1);

In [None]:
IDataView dataView = mlContext.Data.LoadFromTextFile<SentimentIssue>(@"..\Data\wikiDetoxAnnotated40kRows.tsv", hasHeader: true);

In [None]:
display(dataView.Preview(10).RowView);

index,Values
0,"[ Label: False, Text: "" ==He is a Rapist!!!!!== Please edit the article to include this important fact. Thank You. — Preceding unsigned comment added by • "" ]"
1,"[ Label: False, Text: The other two films Hitch and Magnolia are also directly related to the community in question, and may be of interest to those who see those films. So why not link to them? ]"
2,"[ Label: False, Text: == blocking people == how can i block lambs12 from my page and editing my stuff she's saying bad things she's my sister ]"
3,"[ Label: True, Text: == Are you a female????? == mootmootmootmootmootmootmootmootmootmoot ]"
4,"[ Label: False, Text: :::::Good enough for me; thanks for the info. I'll check the Sheffield references later. - ]"
5,"[ Label: False, Text: == Motherjane concert location == Hi, Motherjane, being a popular band, has headlined a lot of college rock festivals in India. But quoting the name of every college that they have performed is not quite right. So please refrain from adding the names of any more colleges/institutes. Thanks! — ]"
6,"[ Label: False, Text: == warning == don't vandalize ]"
7,"[ Label: False, Text: "" :There were reviews from critics in her song? "" ]"
8,"[ Label: False, Text: "" Jack, is it a misrepresentation for you to write more than 500,000 nucleotide base pairs, or should you write 582 base pair. Lets start watching """"RNA world"""" page too. "" ]"
9,"[ Label: False, Text: "":Thank you, I will use these tool responsibly. (Editor Review) "" ]"


In [None]:
TrainTestData trainTestSplit = mlContext.Data.TrainTestSplit(dataView, testFraction: 0.2);
IDataView trainingData = trainTestSplit.TrainSet;
IDataView testData = trainTestSplit.TestSet;

In [None]:
display(trainingData.Preview(10).RowView);

index,Values
0,"[ Label: False, Text: "" ==He is a Rapist!!!!!== Please edit the article to include this important fact. Thank You. — Preceding unsigned comment added by • "" ]"
1,"[ Label: False, Text: The other two films Hitch and Magnolia are also directly related to the community in question, and may be of interest to those who see those films. So why not link to them? ]"
2,"[ Label: True, Text: == Are you a female????? == mootmootmootmootmootmootmootmootmootmoot ]"
3,"[ Label: False, Text: :::::Good enough for me; thanks for the info. I'll check the Sheffield references later. - ]"
4,"[ Label: False, Text: == Motherjane concert location == Hi, Motherjane, being a popular band, has headlined a lot of college rock festivals in India. But quoting the name of every college that they have performed is not quite right. So please refrain from adding the names of any more colleges/institutes. Thanks! — ]"
5,"[ Label: False, Text: == warning == don't vandalize ]"
6,"[ Label: False, Text: "" :There were reviews from critics in her song? "" ]"
7,"[ Label: False, Text: "" Jack, is it a misrepresentation for you to write more than 500,000 nucleotide base pairs, or should you write 582 base pair. Lets start watching """"RNA world"""" page too. "" ]"
8,"[ Label: False, Text: "":Thank you, I will use these tool responsibly. (Editor Review) "" ]"
9,"[ Label: False, Text: HELLO SITUSH ARE YOU THERE?? PEOPLE HAVE SOME QUESTIONS ]"


In [None]:
display(testData.Preview(10).RowView);

index,Values
0,"[ Label: False, Text: == blocking people == how can i block lambs12 from my page and editing my stuff she's saying bad things she's my sister ]"
1,"[ Label: False, Text: "" :Michael, with all due respect, it's over. It may be that you can do math better than me, but it's not the math that's the problem, it's statements like """"I don't know a whole lot about this case, but I tend to think that race may well be relevant here"""" and """"My first instinct is it likely should be included as the news accounts likely included the information. But did they or was this simply not an issue?"""". I assume you are """"counting"""" those as """"in favour of inclusion""""really, one can't. They are not unequivocal inclusion votes and raise more questions than they answer. Moreover, I assume you're including comments like """"I agree that race should be included as long as it does not violate WP:UNDUE."""" That's finebut this participant did not address how the inclusion does not violate UNDUE, and since this (as John points out) is not a vote, such a statement can't have the full weight as is carried by the more complete answers of John and Beyond My Ken. "" ]"
2,"[ Label: False, Text: == Ecotheology is a acceptet term == I strongly suggest that the headline is keept. Just check out the web, ecotheology is a well known and accepted term. Religion and economy can be a link or further reading. ]"
3,"[ Label: False, Text: Welcome Hello and welcome to Wikipedia. We appreciate encyclopedic contributions, but some of your recent contributions seem to be advertising or for promotional purposes. Wikipedia does not allow advertising in articles. For more information on this, see *Policy on neutral point of view *Guideline on spam *Guideline on external links *Guideline on conflict of interest If you still have questions, there is a new contributor's help page, or you can write {{helpme}} below this message along with a question and someone will be along to answer it shortly. You may also find the following pages useful for a general introduction to Wikipedia. *The five pillars of Wikipedia *How to edit a page *Help pages *Tutorial *How to write a great article *Manual of Style I hope you enjoy editing Wikipedia! Please sign your name on talk pages using four tildes (~~~~); this will automatically produce your name and the date. Feel free to write a note on the bottom of my talk page if you want to get in touch with me. Again, welcome! ]"
4,"[ Label: False, Text: Does anyone know where the 6 for €8 fosters is gone? havnt seen that feckin pack lately ]"
5,"[ Label: False, Text: :Are you saying Quinn is fringe or Quinn's argument on gays is fringe? I'm not sure Quinn himself is fringe. Ed Decker on the other hand... I'd say if consensus around the definition of a fringe critic could be built and fringe critics could be identified, a better place for their criticisms would be the Anti-Mormonism article. That would be consistent with WP:FRINGE because it would place the criticisms in context. That being said, I do agree with your identification of Quinn as an openly gay critic, given the nature of his criticism. ]"
6,"[ Label: False, Text: == Uma Thurman == Hello, Noreplyhaha! If you'd like to make a case for undoing my edits to the Uma Thurman entry, I'd love to know what your reasons are. I think my edits are all justified for the reasons I stated in the comments, and I will be re-entering them unless there is an important reason not to. ]"
7,"[ Label: False, Text: racisim. Obviously it has failed.]] ]"
8,"[ Label: True, Text: so many ppl tlk on dis stoopid site i swear they must be retarded ]"
9,"[ Label: False, Text: "" == GA Review II == I'm sorry to say this, but I have to fail this article's GAN. The several major problems that brought up in this article's previous GA assessment have not been issued. As s/he has said before me, """"he article's WP:LEAD should be longer. The History, Media attention, Academics, Student Government, Demographics, Trivia, and Additional Information section are very deficient in citiations. Also, Triva and Additional information should be integrated into the rest of the article."""" Again, please look over WP:WIAGA and fix these problem, then renominate it for GA-class. However, I have re-assessed this article as B-class, as it does meet B-class criteria. My regards, "" ]"


In [None]:
// STEP 2: Common data process configuration with pipeline data transformations          
var dataProcessPipeline = mlContext.Transforms.Text.FeaturizeText(
    outputColumnName: "Features", 
    inputColumnName: nameof(SentimentIssue.Text));

In [None]:
// STEP 3: Set the training algorithm, then create and config the modelBuilder                            
var trainer = mlContext.BinaryClassification.Trainers.SdcaLogisticRegression(labelColumnName: "Label", featureColumnName: "Features");
var trainingPipeline = dataProcessPipeline.Append(trainer);

In [None]:
// STEP 4: Train the model fitting to the DataSet
var trainedModel = trainingPipeline.Fit(trainingData);

In [None]:
display(trainedModel);

index,type,FeatureColumnName,FeatureColumnType,Model
Dimensions,IsKnownSize,ItemType,Size,RawType
SubModel,Calibrator,SubModel,Calibrator,Unnamed: 4_level_2
0,Microsoft.ML.Transforms.Text.TextFeaturizingEstimator+Transformer,,,
1,"Microsoft.ML.Data.BinaryPredictionTransformer<Microsoft.ML.Calibrators.CalibratedModelParametersBase<Microsoft.ML.Trainers.LinearBinaryModelParameters,Microsoft.ML.Calibrators.PlattCalibrator>>",Features,DimensionsIsKnownSizeItemTypeSizeRawType[ 1017430 ]True{ Single: RawType: System.Single }1017430Microsoft.ML.Data.VBuffer<System.Single>,"SubModelCalibratorSubModelCalibratorMicrosoft.ML.Calibrators.ParameterMixingCalibratedModelParameters`2[Microsoft.ML.Trainers.LinearBinaryModelParameters,Microsoft.ML.Calibrators.PlattCalibrator]Microsoft.ML.Calibrators.ParameterMixingCalibratedModelParameters`2[Microsoft.ML.Trainers.LinearBinaryModelParameters,Microsoft.ML.Calibrators.PlattCalibrator]Microsoft.ML.Calibrators.ParameterMixingCalibratedModelParameters`2[Microsoft.ML.Trainers.LinearBinaryModelParameters,Microsoft.ML.Calibrators.PlattCalibrator]Microsoft.ML.Calibrators.ParameterMixingCalibratedModelParameters`2[Microsoft.ML.Trainers.LinearBinaryModelParameters,Microsoft.ML.Calibrators.PlattCalibrator]"
Dimensions,IsKnownSize,ItemType,Size,RawType
[ 1017430 ],True,{ Single: RawType: System.Single },1017430,Microsoft.ML.Data.VBuffer<System.Single>
SubModel,Calibrator,SubModel,Calibrator,
"Microsoft.ML.Calibrators.ParameterMixingCalibratedModelParameters`2[Microsoft.ML.Trainers.LinearBinaryModelParameters,Microsoft.ML.Calibrators.PlattCalibrator]","Microsoft.ML.Calibrators.ParameterMixingCalibratedModelParameters`2[Microsoft.ML.Trainers.LinearBinaryModelParameters,Microsoft.ML.Calibrators.PlattCalibrator]","Microsoft.ML.Calibrators.ParameterMixingCalibratedModelParameters`2[Microsoft.ML.Trainers.LinearBinaryModelParameters,Microsoft.ML.Calibrators.PlattCalibrator]","Microsoft.ML.Calibrators.ParameterMixingCalibratedModelParameters`2[Microsoft.ML.Trainers.LinearBinaryModelParameters,Microsoft.ML.Calibrators.PlattCalibrator]",

Dimensions,IsKnownSize,ItemType,Size,RawType
[ 1017430 ],True,{ Single: RawType: System.Single },1017430,Microsoft.ML.Data.VBuffer<System.Single>

SubModel,Calibrator,SubModel.1,Calibrator.1
"Microsoft.ML.Calibrators.ParameterMixingCalibratedModelParameters`2[Microsoft.ML.Trainers.LinearBinaryModelParameters,Microsoft.ML.Calibrators.PlattCalibrator]","Microsoft.ML.Calibrators.ParameterMixingCalibratedModelParameters`2[Microsoft.ML.Trainers.LinearBinaryModelParameters,Microsoft.ML.Calibrators.PlattCalibrator]","Microsoft.ML.Calibrators.ParameterMixingCalibratedModelParameters`2[Microsoft.ML.Trainers.LinearBinaryModelParameters,Microsoft.ML.Calibrators.PlattCalibrator]","Microsoft.ML.Calibrators.ParameterMixingCalibratedModelParameters`2[Microsoft.ML.Trainers.LinearBinaryModelParameters,Microsoft.ML.Calibrators.PlattCalibrator]"


In [None]:
// STEP 5: Evaluate the model and show accuracy stats
var predictions = trainedModel.Transform(testData);
var metrics = mlContext.BinaryClassification.Evaluate(data: predictions, labelColumnName: "Label", scoreColumnName: "Score");

In [None]:
display(trainer.ToString())

Microsoft.ML.Trainers.SdcaLogisticRegressionBinaryTrainer

In [None]:
display(metrics);

LogLoss,LogLossReduction,Entropy,AreaUnderRocCurve,Accuracy,PositivePrecision,PositiveRecall,NegativePrecision,NegativeRecall,F1Score,AreaUnderPrecisionRecallCurve,ConfusionMatrix
0.2123293520279152,0.5282117726643863,0.4500522474395522,0.94170723287864,0.9480064991876016,0.9040767386091128,0.50066401062417,0.95042194092827,0.9944812362030904,0.6444444444444444,0.7757276738856298,"{ Microsoft.ML.Data.ConfusionMatrix: PerClassPrecision: [ 0.9040767386091128, 0.95042194092827 ], PerClassRecall: [ 0.50066401062417, 0.9944812362030905 ], Counts: [ [ 377, 376 ], [ 40, 7208 ] ], NumberOfClasses: 2 }"


In [None]:
SentimentIssue sampleStatement = new SentimentIssue { Text = "Your service is crappy *0(*!!!!!!s****." };

In [None]:
var predictionEngine = mlContext.Model.CreatePredictionEngine<SentimentIssue, SentimentPrediction>(trainedModel);

In [None]:
var prediction = predictionEngine.Predict(sampleStatement);

In [None]:
display(prediction);

Prediction,Probability,Score,Label,Text
True,0.78806555,1.3133042,False,Your service is crappy *0(*!!!!!!s****.
