In [20]:
// Dependencies

#r "nuget: Plotly.NET, 6.0.0-preview.1"
#r "nuget: Plotly.NET.Interactive, 6.0.0-preview.1"
#r "nuget: FSharp.Stats.Interactive, 0.6.0"
#r "nuget: BioFSharp, 1.2.0"
#r "nuget: BioFSharp.IO, 2.0.0-preview.3"
#r "nuget: Deedle.Interactive, 3.0.0"

In [21]:
open Plotly.NET 
open Plotly.NET.Interactive
open Plotly.NET.StyleParam
open FSharp.Stats.Interactive
open BioFSharp
open BioFSharp.IO
open Deedle
open FSharp.Stats.SeqExtension

In [90]:


let getBasicBP (data : list<string * string * list<float>>) (subplotId:int) =
    data
    |> List.map(fun (label, color, values) ->
        Chart.BoxPlot(
            Y = values, 
            BoxPoints = BoxPoints.All, 
            Jitter = 0.8, 
            Notched = true ,
            Marker = TraceObjects.Marker.init(Color=Color.fromString color, Size = 10),
            BoxMean = BoxMean.SD,
            Name = label)
        |> GenericChart.mapTrace(fun t -> 
            t?BoxPoints <- "all"
            t?fillcolor <- "rgba(0,0,0,0)"
            t?line <- {| color = "rgba(0,0,0,0)"|}
            t
        )   
    )




In [91]:

let getBasicPC  (data : list<string * string * list<float>>) (subplotId:int) = 
    data 
    |> List.map(fun (label, color, values) ->
        let avg = List.average values 
        let stdDev = Seq.stDev values
        Chart.Point(x = [label], y = [avg], ShowLegend = false, Marker = TraceObjects.Marker.init(Color = Color.fromString color, Symbol = MarkerSymbol.Diamond, Size = 10 ))
        |> Chart.withAnnotation(LayoutObjects.Annotation.init(Text = $"{avg:F4}", X = label, Y = avg + 0.04, XShift = 7, ShowArrow = false) )
    )


In [None]:
let relevanceBP = [0.7; 0.3; 1; 0.7; 1; 0.7; 1; 0.7]
let relevanceEP = [0.7; 0.3; 1; 1; 1; 0.7; 1; 0.7]
let relevanceHP = [0.3; 0.7; 1; 0.3; 0.7; 0.7; 0.7; 1]

let coherenceBP = [0.7;0.7;1;1;0.7;1;1;0.7]
let coherenceEP = [0.7;0.7;1;0.7;0.7;1;1;0.7]
let coherenceHP = [0.7;1;0.7;0.7;0.7;1;1;1]

let facConBP = [0.3;0.3;1;0.7;1;1;1;0.7]
let facConEP = [1.0;0.7;1;0.7;0.7;1;0.3;0.7]
let facConHP = [0.3;1;1;0.7;0.7;0.7;1;1]

let fluencyBP = [0.7;0.7;1;0.7;1;1;1;0.7]
let fluencyEP = [1.0;0.7;1;1;1;1;0.7;0.7]
let fluencyHP = [0.7;0.7;1;1;0.3;1;1;0.7]

let dataRelevance =
    [ "Basic Prompt", "#e41a1c", relevanceBP
      "Enhanced Prompt", "#4daf4a", relevanceEP
      "Hybrid Prompt", "#377eb8", relevanceHP ]

// let combBBP = Chart.combine([(getBasicBP relevanceBP "red" "Basic Prompt");(getBasicPC relevanceBP "red" "Basic Prompt")])
// let combEBP = Chart.combine([(getBasicBP relevanceEP "green" "Enhanced Prompt");(getBasicPC relevanceEP "green" "Enhanced Prompt")])
// let combHBP = Chart.combine([(getBasicBP relevanceHP "blue" "Hybrid Prompt");(getBasicPC relevanceHP "blue" "Hybrid Prompt")])



let yAxis =
    LayoutObjects.LinearAxis.init(
        Range = Range.MinMax(0, 1.2),
        Tick0 = 0.0,
        DTick = 0.15,
        ShowGrid = true,
        GridColor = Color.fromString "#e0e0e0",
        Title = Title.init(Text = "Relevance", Font = Font.init(Size = 14))
    )

let fin = 
    Chart.combine([combBBP;combEBP;combHBP])
    |> Chart.withYAxisStyle(MinMax = (0.2 , 1.2), ShowGrid = true, GridColor = Color.fromString("#e0e0e0"))
    |> Chart.withYAxis(yAxis)
    |> Chart.withTitle (Title.init(Text = "Relevance", Font = Font.init(Size = 30),X = 0.5, XAnchor = XAnchorPosition.Center))
    |> Chart.withMarginSize(Left=40, Right=40, Top=60, Bottom=40)
    |> Chart.withLayoutStyle (PlotBGColor = Color.fromString "#fafafa", PaperBGColor = Color.fromString "white")
    |> Chart.withYAxisStyle(TitleText = "Evaluation Score", TitleFont = Font.init(Size = 20), ShowGrid = true, GridColor = Color.fromString("LightGrey"))
    |> Chart.withLegend(LayoutObjects.Legend.init(X = 1.02, Y = 0.5, XAnchor = XAnchorPosition.Left, YAnchor = YAnchorPosition.Middle))
    |> Chart.withTemplate(ChartTemplates.lightMirrored)
    |> Chart.withSize(1000,500)

let relevanceChart = 
    let relPoints, relAnnots = List.unzip(getBasicBP dataRelevance 1)
    let relMean, relMAnnots = List.unzip(getBasicPC dataRelevance 1)
    Chart.combine(relPoints @ relMean)

Error: input.fsx (22,31)-(22,59) typecheck error This value is not a function and cannot be applied.
input.fsx (22,77)-(22,105) typecheck error This value is not a function and cannot be applied.
input.fsx (23,31)-(23,61) typecheck error This value is not a function and cannot be applied.
input.fsx (23,82)-(23,112) typecheck error This value is not a function and cannot be applied.
input.fsx (24,31)-(24,60) typecheck error This value is not a function and cannot be applied.
input.fsx (24,79)-(24,108) typecheck error This value is not a function and cannot be applied.

In [161]:
// Chart Maintainer
let relevanceBP = [0.7; 0.3; 1; 0.7; 1; 0.7; 1; 0.7]
let relevanceEP = [0.7; 0.3; 1; 1; 1; 0.7; 1; 0.7]
let relevanceHP = [0.3; 0.7; 1; 0.3; 0.7; 0.7; 0.7; 1]

let coherenceBP = [0.7;0.7;1;1;0.7;1;1;0.7]
let coherenceEP = [0.7;0.7;1;0.7;0.7;1;1;0.7]
let coherenceHP = [0.7;1;0.7;0.7;0.7;1;1;1]

let facConBP = [0.3;0.3;1;0.7;1;1;1;0.7]
let facConEP = [1.0;0.7;1;0.7;0.7;1;0.3;0.7]
let facConHP = [0.3;1;1;0.7;0.7;0.7;1;1]

let fluencyBP = [0.7;0.7;1;0.7;1;1;1;0.7]
let fluencyEP = [1.0;0.7;1;1;1;1;0.7;0.7]
let fluencyHP = [0.7;0.7;1;1;0.3;1;1;0.7]

let dataRelevance =
    [ "Basic Prompt", "#e41a1c", relevanceBP
      "Enhanced Prompt", "#4daf4a", relevanceEP
      "Hybrid Prompt", "#377eb8", relevanceHP ]

let dataCoherence =
    [ "Basic Prompt", "#e41a1c", coherenceBP
      "Enhanced Prompt", "#4daf4a", coherenceEP
      "Hybrid Prompt", "#377eb8", coherenceHP ]


let dataFactualConsistency =
    [ "Basic Prompt", "#e41a1c", facConBP
      "Enhanced Prompt", "#4daf4a", facConEP
      "Hybrid Prompt", "#377eb8", facConHP ]


let dataFluency =
    [ "Basic Prompt", "#e41a1c", fluencyBP
      "Enhanced Prompt", "#4daf4a", fluencyEP
      "Hybrid Prompt", "#377eb8", fluencyHP ]

let titles = 
    [ ("Relevance", 0.18, 1.05)
      ("Coherence", 0.82, 1.05)
      ("Factual Consistency", 0.18, 0.48)
      ("Fluency", 0.82, 0.48) ]

let titleAnnotations =
    titles
    |> List.map (fun (txt, x, y) ->
        LayoutObjects.Annotation.init(
            Text = txt,
            X = x,
            Y = y,
            ShowArrow = false,
            XRef = "paper",
            YRef = "paper",
            Font = Font.init(Size = 25, Color = Color.fromString "#000")
        ))      


let pointsWithErrorBarsAndScatter (data : list<string * string * list<float>>) subplotId showLegend =
    data
    |> List.map (fun (label, color, values) ->
        let mean = FSharp.Stats.Seq.mean values
        //let stddev = FSharp.Stats.Seq.stDev values

        // Mean + error bar
        let chartPoint =
            Chart.Point(
                xy = [(label, mean)],
                Name = if showLegend then label else null
                ,
                ShowLegend = showLegend,
                Marker = TraceObjects.Marker.init(Color = Color.fromString color,Symbol = MarkerSymbol.DiamondX, Size = 20)
            )
            //|> Chart.withYErrorStyle(Visible = true, Type = StyleParam.ErrorType.Data, Array = [stddev])
            |> Chart.withAxisAnchor(X = subplotId, Y = subplotId)
            |> Chart.withYAxisStyle(MinMax = (0.0, 1.2), Id = StyleParam.SubPlotId.YAxis subplotId)  
            |> Chart.withXAxisStyle(ShowGrid = false, Id = StyleParam.SubPlotId.XAxis subplotId)

        // Scatter as jittered boxplot (whiskers hidden)
        let chartScatter =
            Chart.BoxPlot(
                Y = values,
                X = List.replicate (List.length values) label,  // <-- force same x-category
                BoxPoints = BoxPoints.All,
                Jitter = 0.9,
                Notched = false,
                ShowLegend = false,
                Marker = TraceObjects.Marker.init(Color = Color.fromString color, Size = 10),
                Name = label
            )
            |> GenericChart.mapTrace(fun t -> 
                t?BoxPoints <- "all"
                t?fillcolor <- "rgba(0,0,0,0)"   // hide box fill
                t?line <- {| color = "rgba(0,0,0,0)"|} // hide whiskers
                t
            )
            |> Chart.withAxisAnchor(X = subplotId, Y = subplotId)   // <-- bind to correct subplot

        let combChart = Chart.combine([chartScatter; chartPoint])

        let annotation =
            LayoutObjects.Annotation.init(
                Text = $"{mean:F4}",
                X = label,
                Y = mean + 0.05,
                ShowArrow = false,
                XRef = $"x{subplotId}",
                YRef = $"y{subplotId}",
                Font = Font.init(Size = 15),
                XShift = 35
            )

        combChart, annotation
    )





let relPoints, relAnnots = List.unzip (pointsWithErrorBarsAndScatter dataRelevance 1 true)
let cohPoints, cohAnnots = List.unzip (pointsWithErrorBarsAndScatter dataCoherence 2 false)
let facPoints, facAnnots = List.unzip (pointsWithErrorBarsAndScatter dataFactualConsistency 3 false)
let fluePoints, flueAnnots = List.unzip (pointsWithErrorBarsAndScatter dataFluency 4 false)


let yAxisGlobalTitle =
    LayoutObjects.Annotation.init(
        Text = "Evaluation Score",
        X = -0.025, 
        Y = 0.5,
        ShowArrow = false,
        TextAngle = -90,
        XRef = "paper",
        YRef = "paper",
        Font = Font.init(Size = 30),
        XAnchor = StyleParam.XAnchorPosition.Right,
        YAnchor = StyleParam.YAnchorPosition.Middle
    )


let finalChart2 =
    Chart.combine(relPoints @ cohPoints @ facPoints @ fluePoints)
    |> Chart.withAnnotations(titleAnnotations @ relAnnots @ cohAnnots @ facAnnots @ flueAnnots @ [yAxisGlobalTitle])
    |> Chart.withLegend(LayoutObjects.Legend.init(Orientation = Orientation.Horizontal, Font = Font.init(Size = 20), X = 0.5, XAnchor = XAnchorPosition.Center, Y = -0.08,YAnchor = YAnchorPosition.Bottom, TraceOrder = TraceOrder.Normal))//|> Chart.withLegend(LayoutObjects.Legend.init(X = 1.02, Y = 0.5, XAnchor = XAnchorPosition.Left, YAnchor = YAnchorPosition.Middle)) //Title = Title.init("Prompt Type") , 
    |> Chart.withLayoutGrid(LayoutObjects.LayoutGrid.init(
        Rows= 2, 
        Columns = 2, 
        SubPlots = 
                [| 
                    [| (StyleParam.LinearAxisId.X 1, StyleParam.LinearAxisId.Y 1); 
                       (StyleParam.LinearAxisId.X 2, StyleParam.LinearAxisId.Y 2) |]
                    [| (StyleParam.LinearAxisId.X 3, StyleParam.LinearAxisId.Y 3); 
                       (StyleParam.LinearAxisId.X 4, StyleParam.LinearAxisId.Y 4) |]
                |],
        RowOrder = LayoutGridRowOrder.TopToBottom,
        XGap = 0.15,
        YGap = 0.15
    ))
    |> Chart.withSize(1600, 1200)
    |> Chart.withTemplate(ChartTemplates.lightMirrored)


finalChart2


In [13]:
// allRelBP
// |> Chart.saveHtml "relevance-boxplot.html"

In [162]:
// Chart CSB 


let relevanceBP2 = [0.7;0.7;0.7;1;0.3;0.7;0.7;1]
let relevanceEP2 = [0.7;0.7;0.7;1;0.7;1;0.7;1]
let relevanceHP2 = [0.7;1;1;1;0.7;0.7;1;1]

let coherenceBP2 = [1.0;0.7;1;0.7;0.3;0.3;0.3;1]
let coherenceEP2 = [0.7;0.3;1;0.7;0.7;0.7;0.7;0.7]
let coherenceHP2 = [0.7;0.7;1;0.7;0.7;0.7;1;0.7]

let facConBP2 = [0.7;1;1;0.7;0.3;0.3;0.3;1]
let facConEP2 = [1.0;0.7;1;0.7;0.3;1;0.7;1]
let facConHP2 = [1.0;1;1;1;0.7;0.3;0.7;1]

let fluencyBP2 = [0.7;1;1;0.7;0.3;1;0.7;1]
let fluencyEP2 = [1.0;1;0.7;0.7;0.7;0.7;0.7;0.7]
let fluencyHP2 = [1.0;1;1;0.7;0.3;1;0.7;0.7]

let dataRelevance2 =
    [ "Basic Prompt", "#e41a1c", relevanceBP2
      "Enhanced Prompt", "#4daf4a", relevanceEP2
      "Hybrid Prompt", "#377eb8", relevanceHP2 ]

let dataCoherence2 =
    [ "Basic Prompt", "#e41a1c", coherenceBP2
      "Enhanced Prompt", "#4daf4a", coherenceEP2
      "Hybrid Prompt", "#377eb8", coherenceHP2 ]

let dataFactualConsistency2 =
    [ "Basic Prompt", "#e41a1c", facConBP2
      "Enhanced Prompt", "#4daf4a", facConEP2
      "Hybrid Prompt", "#377eb8", facConHP2 ]

let dataFluency2 =
    [ "Basic Prompt", "#e41a1c", fluencyBP2
      "Enhanced Prompt", "#4daf4a", fluencyEP2
      "Hybrid Prompt", "#377eb8", fluencyHP2 ]

let relPoints2, relAnnots2 = List.unzip (pointsWithErrorBarsAndScatter dataRelevance2 1 true)
let cohPoints2, cohAnnots2 = List.unzip (pointsWithErrorBarsAndScatter dataCoherence2 2 false)
let facPoints2, facAnnots2 = List.unzip (pointsWithErrorBarsAndScatter dataFactualConsistency2 3 false)
let fluePoints2, flueAnnots2 = List.unzip (pointsWithErrorBarsAndScatter dataFluency2 4 false)


let finalChart3 =
    Chart.combine(relPoints2 @ cohPoints2 @ facPoints2 @ fluePoints2)
    |> Chart.withAnnotations(titleAnnotations @ relAnnots2 @ cohAnnots2 @ facAnnots2 @ flueAnnots2 @ [yAxisGlobalTitle])
    |> Chart.withLegend(LayoutObjects.Legend.init(Orientation = Orientation.Horizontal, Font = Font.init(Size = 20), X = 0.5, XAnchor = XAnchorPosition.Center, Y = -0.08,YAnchor = YAnchorPosition.Bottom, TraceOrder = TraceOrder.Normal))//|> Chart.withLegend(LayoutObjects.Legend.init(X = 1.02, Y = 0.5, XAnchor = XAnchorPosition.Left, YAnchor = YAnchorPosition.Middle)) //Title = Title.init("Prompt Type") , 
    |> Chart.withLayoutGrid(LayoutObjects.LayoutGrid.init(
        Rows= 2, 
        Columns = 2, 
        SubPlots = 
                [| 
                    [| (StyleParam.LinearAxisId.X 1, StyleParam.LinearAxisId.Y 1); 
                       (StyleParam.LinearAxisId.X 2, StyleParam.LinearAxisId.Y 2) |]
                    [| (StyleParam.LinearAxisId.X 3, StyleParam.LinearAxisId.Y 3); 
                       (StyleParam.LinearAxisId.X 4, StyleParam.LinearAxisId.Y 4) |]
                |],
        RowOrder = LayoutGridRowOrder.TopToBottom,
        XGap = 0.15,
        YGap = 0.15
    ))
    |> Chart.withSize(1600, 1200)
    |> Chart.withTemplate(ChartTemplates.lightMirrored)


finalChart3


In [9]:
// Overall usefulness bar chart 


let prompts = ["Basic Prompt"; "Enhanced Prompt"; "Hybrid Prompt"]

let resBP = [0; 1; 3; 4]
let resEP = [0; 1; 5; 2]
let resHP = [0; 1; 6; 1]

let metrics = ["Strongly Disagree"; "Disagree"; "Agree"; "Strongly Agree"]
let colors = ["#d73027"; "#fc8d59"; "#91bfdb"; "#4575b4"] 

let usefulChart =
    [0 .. 3]
    |> List.map (fun i ->
        let values = [resBP.[i]; resEP.[i]; resHP.[i]]
        let data = List.zip (List.rev prompts) (List.rev values)
        Chart.Bar(
            keysValues = data,
            Name = metrics.[i],
            MarkerColor = Color.fromString colors.[i]
        )
        
    )
    |> Chart.combine //color "#e0e0e0"
    |> Chart.withLayoutStyle(BarMode = BarMode.Stack, PaperBGColor = Color.fromString("White"), PlotBGColor = Color.fromString("White"))
    |> Chart.withXAxisStyle(MinMax = (0 , 8), ShowGrid = false, GridColor = Color.fromString("White"))
    |> Chart.withYAxisStyle(ShowGrid = false, GridColor = Color.fromString("White"))
    |> Chart.withTitle(Title.init(Text = "Overall Usefulness per Prompt", Font = Font.init(Size = 30),X = 0.4, XAnchor = XAnchorPosition.Right))
    |> Chart.withLegend(LayoutObjects.Legend.init(Orientation = Orientation.Horizontal, X = 0.5, XAnchor = XAnchorPosition.Center, Y = 1,YAnchor = YAnchorPosition.Bottom, TraceOrder = TraceOrder.Normal))
    |> Chart.withSize(1200,500)

usefulChart 

In [10]:
// 2nd eval usefulness 3 agrees LPA2

let resBP2 = [0;2;5;1]
let resEP2 = [0;0;4;4]
let resHP2 = [0;1;3;4]

let usefulChart2 =
    [0 .. 3]
    |> List.map (fun i ->
        let values = [resBP2.[i]; resEP2.[i]; resHP2.[i]]
        let data = List.zip (List.rev prompts) (List.rev values)
        Chart.Bar(
            keysValues = data,
            Name = metrics.[i],
            MarkerColor = Color.fromString colors.[i]
        )
        
    )
    |> Chart.combine //color "#e0e0e0"
    |> Chart.withLayoutStyle(BarMode = BarMode.Stack, PaperBGColor = Color.fromString("White"), PlotBGColor = Color.fromString("White"))
    |> Chart.withXAxisStyle(MinMax = (0 , 8), ShowGrid = false, GridColor = Color.fromString("White"))
    |> Chart.withYAxisStyle(ShowGrid = false, GridColor = Color.fromString("White"))
    |> Chart.withTitle(Title.init(Text = "Overall Usefulness per Prompt", Font = Font.init(Size = 30),X = 0.4, XAnchor = XAnchorPosition.Right))
    |> Chart.withLegend(LayoutObjects.Legend.init(Orientation = Orientation.Horizontal, X = 0.5, XAnchor = XAnchorPosition.Center, Y = 1,YAnchor = YAnchorPosition.Bottom, TraceOrder = TraceOrder.Normal))
    |> Chart.withSize(1200,500)

usefulChart2

In [154]:
//t-test
open FSharp.Stats
open FSharp.Stats.Testing
let sample1 = relevanceEP |> vector
let sample2 = relevanceHP |> vector

// let twoway = TTest.twoSample true sample1 sample2 

let performTTest x y =
    try
        let res = 
            Testing.TTest.twoSample true x y
        res.PValue
    with 
    | _ -> 0.0 
performTTest sample1 sample2

let perfromTTestperMetric (valBP:list<float>) (valEP:list<float>) (valHP:list<float>) =
    [
        performTTest (valBP |> vector) (valEP |> vector)
        performTTest (valBP |> vector) (valHP |> vector)
        performTTest (valEP |> vector) (valHP |> vector)
    ]

let resMaintainer =
    $"
    Relevance pValues: {perfromTTestperMetric relevanceBP relevanceEP relevanceHP}
    Coherence pValues: {perfromTTestperMetric coherenceBP coherenceEP coherenceHP}
    Factual Consistency pValues: {perfromTTestperMetric facConBP facConEP facConHP}
    Fluency pValues: {perfromTTestperMetric fluencyBP fluencyEP fluencyHP}
    "

let resSecondEval =
    $"
    Relevance pValues: {perfromTTestperMetric relevanceBP2 relevanceEP2 relevanceHP2}
    Coherence pValues: {perfromTTestperMetric coherenceBP2 coherenceEP2 coherenceHP2}
    Factual Consistency pValues: {perfromTTestperMetric facConBP2 facConEP2 facConHP2}
    Fluency pValues: {perfromTTestperMetric fluencyBP2 fluencyEP2 fluencyHP2}
    "

resMaintainer, resSecondEval

Unnamed: 0,Unnamed: 1
Item1,Relevance pValues: [0.7637988635063372; 0.4998774141083544; 0.3497684180646332]  Coherence pValues: [0.6419792418874009; 1; 0.6419792418874009]  Factual Consistency pValues: [0.928852731105513; 0.7265964443552055; 0.7637988635063372]  Fluency pValues: [0.6419792418873995; 0.6419792418874009; 0.41543954004821515]
Item2,Relevance pValues: [0.3718632163598864; 0.10869886668073529; 0.3503914791841837]  Coherence pValues: [0.8533719196311086; 0.3830096470939707; 0.30840722079260585]  Factual Consistency pValues: [0.35933853344999356; 0.2512868877414969; 0.7715680770174497]  Fluency pValues: [0.8087077322432732; 1; 0.8087077322432732]
