-
Notifications
You must be signed in to change notification settings - Fork 0
/
lcdemo.py
113 lines (96 loc) · 3.73 KB
/
lcdemo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
## This is a demo for a bot that uses TTS, SST, and LLM models to have a real time conversation with an user
import time
import logging
import replicate
import sounddevice as sd
from scipy.io.wavfile import write as scwrite
import vlc
# set up logging
logging.basicConfig(
format="%(asctime)s %(levelname)-8s %(message)s",
level=logging.INFO,
datefmt="%Y-%m-%d %H:%M:%S",
)
if __name__ == "__main__":
while True:
## record the audio
logging.info(
"starting program \n--------------------------------------------------"
)
print(
"Record a message to start the conversation. When you press enter, you'll have 10 seconds to record your message."
)
dev_skip = input("Press enter to begin recording")
if not dev_skip == "0":
logging.info("now recording")
fs = 44100 # Sample rate
seconds = 7 # Duration of recording
myrecording = sd.rec(int(seconds * fs), samplerate=fs, channels=2)
sd.wait() # Wait until recording is finished
logging.info("finished recording. Saving as recording.wav")
scwrite("recording.wav", fs, myrecording) # Save as WAV file
##put it through whispr
logging.info(
"\n----------------interpreting recording with whispr------------\n"
)
sttout = replicate.run(
"vaibhavs10/incredibly-fast-whisper:3ab86df6c8f54c11309d4d1f930ac292bad43ace52d10c80d87eb258b3c9f79c",
input={
"task": "transcribe",
"audio": open("recording.wav", "rb"),
"language": "None",
"timestamp": "chunk",
"batch_size": 64,
"diarise_audio": False,
},
)
logging.info(f"whisper output: {sttout}")
## put it through llama
transcript = sttout["text"]
system_prompt = (
f"You are an AI chatbot called faebot. \n"
"You are having a conversation with your engineer, transfaeries. Fae is currently live on stream and is livestreaming development work on you! \n"
"You are the real star of the show."
)
prompt = f"User: {transcript}"
llmout = replicate.run(
"meta/llama-2-13b-chat",
input={
"debug": False,
"top_k": 50,
"top_p": 1,
"prompt": prompt,
"temperature": 0.5,
"system_prompt": system_prompt,
"max_new_tokens": 200,
"min_new_tokens": -1,
},
)
response = "".join(llmout)
logging.info(f"llama output: {response}")
# put it through neontts
ttsout = replicate.run(
"awerks/neon-tts:139606fe1536f85a9f07d87982400b8140c9a9673733d47913af96738894128f", input={"text": response, "language": "en"}
)
# ttsout = replicate.run(
# "adirik/styletts2:989cb5ea6d2401314eb30685740cb9f6fd1c9001b8940659b406f952837ab5ac",
# input={
# "beta": 0.7,
# "seed": 0,
# "text": response,
# "alpha": 0.3,
# "diffusion_steps": 10,
# "embedding_scale": 1.5,
# },
# )
logging.info(f"tts output url: {ttsout}")
# ## Play back the audio
vlc_instance = vlc.Instance()
player = vlc_instance.media_player_new()
media = vlc_instance.media_new(ttsout)
player.set_media(media)
player.play()
time.sleep(1.5)
duration = player.get_length() / 1000
## let user start recording again before the AI is finished
time.sleep(int(duration / 2))