/
common.go
157 lines (131 loc) · 4.84 KB
/
common.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
/*
Copyright 2024 KubeAGI.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package v1alpha1
import (
"reflect"
batchv1 "k8s.io/api/batch/v1"
corev1 "k8s.io/api/core/v1"
)
type MetricsKind string
const (
// AnswerRelevancy in ragas https://docs.ragas.io/en/stable/concepts/metrics/answer_relevance.html
// Scores the relevancy of the answer according to the given question.
AnswerRelevancy MetricsKind = "answer_relevancy"
// AnswerSimilarity in ragas https://docs.ragas.io/en/stable/concepts/metrics/semantic_similarity.html
// Scores the semantic similarity of ground truth with generated answer.
AnswerSimilarity MetricsKind = "answer_similarity"
// AnswerCorrectness in ragas https://docs.ragas.io/en/stable/concepts/metrics/answer_correctness.html
// Measures answer correctness compared to ground truth as a combination(Weighted) of
// - factuality
// - semantic similarity
AnswerCorrectness MetricsKind = "answer_correctness"
// Faithfulness in ragas https://docs.ragas.io/en/stable/concepts/metrics/faithfulness.html
// Scores the factual consistency of the generated answer against the given context.
Faithfulness MetricsKind = "faithfulness"
// ContextPrecision in ragas https://docs.ragas.io/en/stable/concepts/metrics/context_precision.html
// Average Precision is a metric that evaluates whether all of the relevant items selected by the model are ranked higher or not.
ContextPrecision MetricsKind = "context_precision"
// ContextRelevancy in ragas https://docs.ragas.io/en/stable/concepts/metrics/context_relevancy.html
// Gauges the relevancy of the retrieved context
ContextRelevancy MetricsKind = "context_relevancy"
// ContextRecall in ragas https://docs.ragas.io/en/stable/concepts/metrics/context_recall.html
// Estimates context recall by estimating TP and FN using annotated answer and retrieved context.
ContextRecall MetricsKind = "context_recall"
// AspectCritique in ragas https://docs.ragas.io/en/stable/concepts/metrics/critique.html
// Designed to assess submissions based on predefined aspects such as harmlessness and correctness
// SUPPORTED_ASPECTS = [ harmfulness, maliciousness, coherence, correctness, conciseness, ]
AspectCritique MetricsKind = "aspect_critique"
)
type Metric struct {
// Kind of this Metric
Kind MetricsKind `json:"kind,omitempty"`
// Parameters in this Metrics
Parameters []Parameter `json:"parameters,omitempty"`
// ToleranceThreshbold on this Metric
// If the evaluation score is smaller than this tolerance threshold,we treat this RAG solution as `Bad`
ToleranceThreshbold int `json:"tolerance_threshold,omitempty"`
}
// Parameter to metrics which is a key-value pair
type Parameter struct {
Key string `json:"key,omitempty"`
Value string `json:"value,omitempty"`
}
// Report is the summarization of evaluation
type Report struct {
// TODO
}
// Define RAG common structure and variables
const (
EvaluationJobLabels = Group + "/rag"
EvaluationApplicationLabel = Group + "/application"
)
func RagStatusChanged(a, b RAGStatus) bool {
if !a.CompletionTime.Equal(b.CompletionTime) {
return true
}
if a.Phase != b.Phase {
return true
}
ac, bc := a.Conditions, b.Conditions
la, lb := len(ac), len(bc)
if la != lb {
return true
}
if la == 0 {
return false
}
return ac[0].Type != bc[0].Type || ac[0].Status != bc[0].Status ||
ac[0].Reason != bc[0].Reason || ac[0].Message != bc[0].Message
}
const (
ING = "ing" // evaluating
COMPLETE = "complete"
FAILED = "failed"
SUSPEND = "suspend"
)
func RagStatus(rag *RAG) (string, RAGPhase, string) {
phase := rag.Status.Phase
status, phaseMsg := ING, ""
if len(rag.Status.Conditions) > 0 {
cond := rag.Status.Conditions[0]
phaseMsg = rag.Status.Conditions[0].Message
if phase == CompletePhase && cond.Type == batchv1.JobComplete && cond.Status == corev1.ConditionTrue {
status = COMPLETE
}
if cond.Type == batchv1.JobFailed && cond.Status == corev1.ConditionTrue {
status = FAILED
}
}
if rag.Spec.Suspend {
status = SUSPEND
}
return status, phase, phaseMsg
}
func RAGSpecChanged(a, b RAGSpec) bool {
if !reflect.DeepEqual(*a.Application, *b.Application) {
return true
}
if !reflect.DeepEqual(a.Datasets, b.Datasets) {
return true
}
if !reflect.DeepEqual(a.JudgeLLM, b.JudgeLLM) {
return true
}
if !reflect.DeepEqual(*a.Storage, *b.Storage) {
return true
}
if a.ServiceAccountName != b.ServiceAccountName {
return true
}
return false
}