This repository has been archived by the owner on Oct 5, 2024. It is now read-only.
forked from Garvit244/Leetcode
-
Notifications
You must be signed in to change notification settings - Fork 0
/
rag_metrics.py
94 lines (69 loc) · 2.85 KB
/
rag_metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import os
from typing import Tuple, List, Any
from deepeval.benchmarks.human_eval.human_eval import HumanEval
from deepeval.benchmarks.tasks import HumanEvalTask
from deepeval.models import DeepEvalBaseLLM
from langchain_core.messages import AIMessage, HumanMessage
from langchain_openai import ChatOpenAI
import os
import glob
from dotenv import load_dotenv
load_dotenv()
class GPT4Model(DeepEvalBaseLLM):
def __init__(self, model, context, *args, **kwargs):
# First, initialize the model attribute before calling super()
self.model = model
self.context = context
super().__init__(*args, **kwargs)
def load_model(self):
return self.model
def generate(self, prompt: str) -> str:
chat_model = self.load_model()
return chat_model.invoke(prompt).content
def get_model_name(self):
return "OpenAI Model"
async def a_generate(self, prompt: str) -> str:
chat_model = self.load_model()
res = await chat_model.ainvoke(prompt)
return res.content
# def evaluate(self, prompt: str, context:str) -> list[str]:
# chat_model = self.load_model()
# generations = chat_model._generate([HumanMessage(prompt)]).generations
# completions = [r.text for r in generations]
# return completions
def generate_samples(
self, prompt: str, n: int, temperature: float
) -> list[Any]:
chat_model = self.load_model()
og_parameters = {"n": chat_model.n, "temp": chat_model.temperature}
chat_model.n = n
chat_model.temperature = temperature
prompt = str(self.context) + prompt
generations = chat_model._generate([HumanMessage(prompt)]).generations
completions = [r.text for r in generations]
return completions
for i, task in enumerate(HumanEvalTask):
print(f"Iteration {i}: {task}")
i = i + 1
# Get a list of all files in the directory
files = glob.glob(f'continue_requests/request_{str(i)}.txt')
# Sort the files by sequence number
files.sort(key=lambda x: int(x.split('_')[-1].split('.')[0]))
# Iterate over the files
print("Files: ", files)
for file_path in files:
with open(file_path, 'r') as file:
# Load the file
content = file.read()
# Process the content of the file
print(f"Processing file: {file_path}")
print(content)
gpt_4 = GPT4Model(model=ChatOpenAI(model_name = "gpt-4-1106-preview", api_key= os.getenv("OPENAI_API")), context=content)
# Define benchmark with specific tasks and number of code generations
benchmark = HumanEval(
tasks=[task],
n=1,
)
# Replace 'gpt_4' with your own custom model
benchmark.evaluate(model=gpt_4, k=1)
print(benchmark.overall_score)