OPENAI API #1446
Replies: 8 comments 5 replies
-
Could you elaborate, please |
Beta Was this translation helpful? Give feedback.
-
This seems to be the goal of the Basaran project. It seems like they're still missing some features but it looks promising. |
Beta Was this translation helpful? Give feedback.
-
Yes but I like to have it here in webui because less headache and installs and I like the project and model support. |
Beta Was this translation helpful? Give feedback.
-
a basic fake openai API with tokens connecting to webui api. import random
import requests
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from flask import Flask, request, jsonify
app = Flask(__name__)
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')
@app.route('/generate_text', methods=['POST'])
def generate_text():
model_name = request.json.get('model')
prompt = request.json.get('prompt')
length = request.json.get('length', 50)
temperature = request.json.get('temperature', 1.0)
top_p = request.json.get('top_p', 1.0)
# Replace this with your model's actual name
if model_name != 'text-davinci-002':
return jsonify({'error': 'Invalid model name'})
# Replace this with your actual model's response
# generated_text = fake_generate_text(prompt, length)
generated_text = generate_text_via_webui_api(prompt, length, temperature)#[length:]
response = {
'choices': [{
'text': generated_text['text'],
'index': 0,
'logprobs': None,
'finish_reason': 'length',
'tokens': generated_text['tokens']
}],
'model': model_name,
'prompt': prompt
}
return jsonify(response)
def fake_generate_text(prompt, length):
# Replace this with your actual model's response
# In this case, we're just generating random text
text = prompt + ' '
for i in range(length):
text += chr(random.randint(32, 126))
input_ids = tokenizer.encode(text, return_tensors='pt')
output_ids = model.generate(input_ids=input_ids, max_length=length+len(prompt), do_sample=False)
tokens = output_ids.tolist()[0]
return {
'text': tokenizer.decode(tokens, skip_special_tokens=True),
'tokens': tokens,
'prompt': tokenizer.encode(prompt, return_tensors='pt').tolist()[0]
}
def generate_text_via_webui_api(prompt, length, temperature):
server = "127.0.0.1"
port = 5000
# Generation parameters
# Reference: https://huggingface.co/docs/transformers/main_classes/text_generation#transformers.GenerationConfig
params = {
'do_sample': True,
'temperature': temperature,
'top_p': 1,
'typical_p': 1,
'repetition_penalty': 1,
'encoder_repetition_penalty': 1,
'top_k': 50,
'num_beams': 1,
'penalty_alpha': 0,
'min_length': 0,
'length_penalty': 1,
'no_repeat_ngram_size': 0,
'early_stopping': False,
'prompt': prompt + ' ',
# 'max_new_tokens': 200,
# 'do_sample': True,
# 'temperature': 0.72,
# 'top_p': 0.73,
# 'typical_p': 1,
# 'repetition_penalty': 1.1,
# 'encoder_repetition_penalty': 1.0,
# 'top_k': 0,
# 'min_length': 0,
# 'no_repeat_ngram_size': 0,
# 'num_beams': 1,
# 'penalty_alpha': 0,
# 'length_penalty': 1,
# 'early_stopping': False,
# 'seed': -1,
# 'add_bos_token': True,
# 'truncation_length': 2048,
# 'ban_eos_token': False,
# 'skip_special_tokens': True,
# 'stopping_strings': [],
}
url=f"http://{server}:{port}/api/v1/generate/chat/completions"
print(url)
response = requests.post(url, json=params).json()
# return response
text = prompt + ' '
# for i in range(length):
# text += chr(random.randint(32, 126))
input_ids = tokenizer.encode(text, return_tensors='pt')
output_ids = model.generate(input_ids=input_ids, max_length=length+len(prompt), do_sample=False)
tokens = output_ids.tolist()[0]
return {
'text': response['results'][0]['text'],
'tokens': tokens,
'prompt': tokenizer.encode(prompt, return_tensors='pt').tolist()[0]
}
@app.route('/generate_tokens', methods=['POST'])
def generate_tokens():
prompt = request.json['prompt']
max_tokens = request.json.get('max_tokens', 20)
temperature = request.json.get('temperature', 1.0)
top_p = request.json.get('top_p', 1.0)
input_ids = tokenizer.encode(prompt, return_tensors='pt')
output_ids = model.generate(input_ids=input_ids, max_length=max_tokens+len(input_ids[0]), do_sample=False)
tokens = output_ids.tolist()[0]
response = {
'choices': [{
'text': tokenizer.decode(tokens, skip_special_tokens=True),
'index': 0,
'logprobs': None,
'finish_reason': 'length',
'tokens': tokens
}],
'model': 'gpt2',
'prompt': input_ids.tolist()[0]
}
return jsonify(response)
if __name__ == '__main__':
app.run(host='127.0.0.2', port=5000) and this is an openai api test connecting to the fake openai api getting answer from webui api: import requests
# Update the URL to point to your Flask server endpoint
url = 'http://127.0.0.2:5000/generate_text'
data = {
'model': 'text-davinci-002',
'prompt': 'Hello, how are you?',
'length': 50,
'temperature': 0.6,
'top_p': 1.0}
response = requests.post(url, json=data)
if response.status_code == 200:
print(response.json())
print(response.json()['choices'][0]['text'])
else:
print("Error:", response.text) |
Beta Was this translation helpful? Give feedback.
-
I'm just noticing this now, but I just submitted a pull request for a start of this here: #1475 |
Beta Was this translation helpful? Give feedback.
-
I think this might be exactly what you're looking for: https://github.com/lhenault/simpleAI |
Beta Was this translation helpful? Give feedback.
-
I gotta second this; text-generation-webui does a ton of wonderful things and works with so many models and configurations, but it's still a bit hard to integrate it with other services. Since everyone is targeting openai's API, that seems to be the best choice for an integration api. (This is assuming it supports streaming and non-streaming, which I think it does.) |
Beta Was this translation helpful? Give feedback.
-
FYI, the code was merged. 🎉 |
Beta Was this translation helpful? Give feedback.
-
I think this project needs openai api imitation to be compatible with Auto-GPT and other opcoming.
Beta Was this translation helpful? Give feedback.
All reactions