### Step 1 环境安装

1.1 创建一个虚拟环境，最好放到外面的terminal下执行

In [None]:
# %conda create --name hwfinal python=3.11.8
# %conda activate hwfinal

1.2 VScode右上角的当前kernel，选择前面新建的kernel hwfinal，并安装以下必要的包，运行前会提示安装ipjupyter，选择是

In [16]:

# ! pip install langchain-nvidia-ai-endpoints
# ! pip install jupyterlab
# ! pip install langchain_core
# ! pip install langchain
# ! pip install langchain-community
# ! pip install matplotlib
# ! pip install numpy
# ! pip install openai
# ! pip install gradio
# ! pip install faiss-cpu

# ! pip install openai-whisper==20231117 
# ! pip install ffmpeg==1.4
# ! conda install ffmpeg -y
# ! pip install edge-tts
# ! pip install transformers

Collecting openai-whisper==20231117
  Using cached openai_whisper-20231117-py3-none-any.whl
Collecting triton<3,>=2.0.0 (from openai-whisper==20231117)
  Using cached triton-2.3.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.4 kB)
Collecting numba (from openai-whisper==20231117)
  Using cached numba-0.60.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (2.7 kB)
Collecting torch (from openai-whisper==20231117)
  Using cached torch-2.4.0-cp311-cp311-manylinux1_x86_64.whl.metadata (26 kB)
Collecting more-itertools (from openai-whisper==20231117)
  Using cached more_itertools-10.4.0-py3-none-any.whl.metadata (36 kB)
Collecting tiktoken (from openai-whisper==20231117)
  Using cached tiktoken-0.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Collecting llvmlite<0.44,>=0.43.0dev0 (from numba->openai-whisper==20231117)
  Using cached llvmlite-0.43.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadat

### Step 2 设置 NVIDIA APIKEY

In [1]:
import getpass
import os

if os.environ.get("NVIDIA_API_KEY", "").startswith("nvapi-"):
    nvapi_key = os.environ["NVIDIA_API_KEY"]
else:
    nvapi_key = getpass.getpass("NVAPI Key (starts with nvapi-): ")
    assert nvapi_key.startswith("nvapi-"), f"{nvapi_key[:5]}... is not a valid key"
    os.environ["NVIDIA_API_KEY"] = nvapi_key

### Step 3 建立知识库

3.1 选择推理模型及对应的嵌入模型

In [2]:
from langchain_nvidia_ai_endpoints import ChatNVIDIA
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings

llm = ChatNVIDIA(model="ai-phi-3-small-128k-instruct", nvidia_api_key=nvapi_key, max_tokens=512)

embedder = NVIDIAEmbeddings(model="NV-Embed-QA")



3.2 构建文档数据

In [3]:
import os
from tqdm import tqdm
from pathlib import Path

# Here we read in the text data and prepare them into vectorstore
ps = os.listdir("./zh_data/")
data = []
sources = []
for p in ps:
    if p.endswith('.txt'):
        path2file="./zh_data/"+p
        with open(path2file,encoding="utf-8") as f:
            lines=f.readlines()
            for line in lines:
                if len(line)>=1:
                    data.append(line)
                    sources.append(path2file)
                    
documents=[d for d in data if d != '\n']

3.3.1 保存到向量库(流程一)

In [4]:
from operator import itemgetter
from langchain_community.vectorstores.faiss import FAISS
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain.text_splitter import CharacterTextSplitter
from langchain_nvidia_ai_endpoints import ChatNVIDIA


In [5]:
text_splitter = CharacterTextSplitter(chunk_size=400, separator=" ")
docs = []
metadatas = []

for i, d in enumerate(documents):
    splits = text_splitter.split_text(d)
    #print(len(splits))
    docs.extend(splits)
    metadatas.extend([{"source": sources[i]}] * len(splits))

store = FAISS.from_texts(docs, embedder , metadatas=metadatas)
store.save_local('./zh_data/nv_embedding')

3.3.2 从向量库中取出内容(流程二)

In [5]:
store = FAISS.load_local("./zh_data/nv_embedding", embedder,allow_dangerous_deserialization=True)

### Step 4 主流程

4.1 phi模型根据提示进行回答

In [6]:
def phi_demo(mesage):
    retriever = store.as_retriever()
    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                "Answer solely based on the following context:\n<Documents>\n{context}\n</Documents>",
            ),
            ("user", "{question}"),
        ]
    )

    chain = (
        {"context": retriever, "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )
    result = chain.invoke(mesage)
    return result

4.2 定义辅助函数1

In [7]:
#@title Edge TTS
def calculate_rate_string(input_value):
    rate = (input_value - 1) * 100
    sign = '+' if input_value >= 1 else '-'
    return f"{sign}{abs(int(rate))}"


def make_chunks(input_text, language):
    language="English"
    if language == "English":
      temp_list = input_text.strip().split(".")
      filtered_list = [element.strip() + '.' for element in temp_list[:-1] if element.strip() and element.strip() != "'" and element.strip() != '"']
      if temp_list[-1].strip():
          filtered_list.append(temp_list[-1].strip())
      return filtered_list


import uuid
def tts_file_name(text):
    if text.endswith("."):
        text = text[:-1]
    text = text.lower()
    text = text.strip()
    text = text.replace(" ","_")
    truncated_text = text[:25] if len(text) > 25 else text if len(text) > 0 else "empty"
    random_string = uuid.uuid4().hex[:8].upper()
    file_name = f"./content/edge_tts_voice/{truncated_text}_{random_string}.mp3"
    return file_name


from pydub import AudioSegment
import shutil
import os
def merge_audio_files(audio_paths, output_path):
    # Initialize an empty AudioSegment
    merged_audio = AudioSegment.silent(duration=0)

    # Iterate through each audio file path
    for audio_path in audio_paths:
        # Load the audio file using Pydub
        audio = AudioSegment.from_file(audio_path)

        # Append the current audio file to the merged_audio
        merged_audio += audio

    # Export the merged audio to the specified output path
    merged_audio.export(output_path, format="mp3")

4.3 定义辅助函数2

In [8]:

def edge_free_tts(chunks_list,speed,voice_name,save_path):
  # print(chunks_list)
  if len(chunks_list)>1:
    chunk_audio_list=[]
    if os.path.exists("./content/edge_tts_voice"):
      shutil.rmtree("./content/edge_tts_voice")
    os.mkdir("./content/edge_tts_voice")
    k=1
    for i in chunks_list:
      print(i)
      edge_command=f'edge-tts  --rate={calculate_rate_string(speed)}% --voice {voice_name} --text "{i}" --write-media ./content/edge_tts_voice/{k}.mp3'
      print(edge_command)
      var1=os.system(edge_command)
      if var1==0:
        pass
      else:
        print(f"Failed: {i}")
      chunk_audio_list.append(f"./content/edge_tts_voice/{k}.mp3")
      k+=1
    # print(chunk_audio_list)
    merge_audio_files(chunk_audio_list, save_path)
  else:
    edge_command=f'edge-tts  --rate={calculate_rate_string(speed)}% --voice {voice_name} --text "{chunks_list[0]}" --write-media {save_path}'
    print(edge_command)
    var2=os.system(edge_command)
    if var2==0:
      pass
    else:
      print(f"Failed: {chunks_list[0]}")
  return save_path

4.4 定义辅助函数3

In [10]:

if not os.path.exists("./content/audio"):
    os.mkdir("./content")
    os.mkdir("./content/audio")
import uuid
def random_audio_name_generate():
  random_uuid = uuid.uuid4()
  audio_extension = ".mp3"
  random_audio_name = str(random_uuid)[:8] + audio_extension
  return random_audio_name

text = 'This is Microsoft Phi 3 mini 4k instruct Demo'  # @param {type: "string"}
Language = "English" # @param ['English']
# Gender of voice simply change from male to female and choose the voice you want to use
Gender = "Female"# @param ['Male', 'Female']
female_voice="en-US-AriaNeural"# @param["en-US-AriaNeural",'zh-CN-XiaoxiaoNeural','zh-CN-XiaoyiNeural']
speed = 1  # @param {type: "number"}
translate_text_flag  = False
if len(text)>=600:
  long_sentence = True
else:
  long_sentence = False

# long_sentence = False # @param {type:"boolean"}
save_path = ''  # @param {type: "string"}
if len(save_path)==0:
  save_path=tts_file_name(text)
if Language == "English" :
  if Gender=="Male":
    voice_name="en-US-ChristopherNeural"
  if Gender=="Female":
    voice_name=female_voice

def talk(input_text):
  global translate_text_flag,Language,speed,voice_name
  if len(input_text)>=600:
    long_sentence = True
  else:
    long_sentence = False

  if long_sentence==True and translate_text_flag==True:
    chunks_list=make_chunks(input_text,Language)
  elif long_sentence==True and translate_text_flag==False:
    chunks_list=make_chunks(input_text,"English")
  else:
    chunks_list=[input_text]
  save_path="./content/audio/"+random_audio_name_generate()
  edge_save_path=edge_free_tts(chunks_list,speed,voice_name,save_path)
  return edge_save_path

4.5 定义辅助函数4

In [11]:
import whisper
select_model ="tiny" # ['tiny', 'base']
whisper_model = whisper.load_model(select_model)

def convert_to_text(audio_path):
  result = whisper_model.transcribe(audio_path,word_timestamps=True,fp16=False,language='English',task='translate')
  with open('scan.txt', 'w') as file:
    file.write(str(result))
  return result["text"]


4.6 定义主流程

In [12]:
#@title Run gradio app
import gradio as gr
from IPython.display import Audio, display
def run_text_prompt(message, chat_history):
    bot_message = phi_demo(message)
    edge_save_path=talk(bot_message)
    # print(edge_save_path)
    display(Audio(edge_save_path, autoplay=True))

    chat_history.append((message, bot_message))
    return "", chat_history


def run_audio_prompt(audio, chat_history):
    if audio is None:
        return None, chat_history
    print(audio)
    message_transcription = convert_to_text(audio)
    _, chat_history = run_text_prompt(message_transcription, chat_history)
    return None, chat_history


with gr.Blocks() as demo:
    chatbot = gr.Chatbot(label="Chat with Phi 3 small 128k instruct")

    msg = gr.Textbox(label="Ask anything")
    msg.submit(run_text_prompt, [msg, chatbot], [msg, chatbot])

    with gr.Row():
        audio = gr.Audio(sources="microphone", type="filepath")

        send_audio_button = gr.Button("Send Audio", interactive=True)
        send_audio_button.click(run_audio_prompt, [audio, chatbot], [audio, chatbot])

demo.launch(share=True,debug=True)

  from .autonotebook import tqdm as notebook_tqdm


Running on local URL:  http://127.0.0.1:7860
Running on public URL: https://155e763c16ddcd7d9f.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


edge-tts  --rate=+0% --voice en-US-AriaNeural --text " According to the provided context, the Chinese team won 40 gold medals in the 2024 Summer Olympic Games." --write-media ./content/audio/e1921db3.mp3


WEBVTT

00:00:00.100 --> 00:00:03.812
According to the provided context the Chinese team won 40

00:00:03.825 --> 00:00:07.025
gold medals in the 2024 Summer Olympic Games



/tmp/gradio/8ec7350daafe9c2d39db6f189a8c669c6c78af807fd7e9e1738c6639054a7281/audio.wav




edge-tts  --rate=+0% --voice en-US-AriaNeural --text " The provided context discusses the viral image of surfer Gabriel Medina at the Olympics, which was shared by Olympics.com on their official social media platforms. The image, captured by photographer Jamie Burke, quickly gained popularity on the internet. The document also mentions that the Louvre Museum, home to the famous Mona Lisa painting, has joined the 2024 Paris Olympics celebrations. Additionally, the Chinese Olympic team achieved outstanding results at the Paris Olympics, with 40 gold, 27 silver, and 24 bronze medals." --write-media ./content/audio/48a5bfb7.mp3


WEBVTT

00:00:00.100 --> 00:00:03.875
The provided context discusses the viral image of surfer Gabriel

00:00:03.888 --> 00:00:07.725
Medina at the Olympics which was shared by Olympics.com on

00:00:07.737 --> 00:00:12.900
their official social media platforms The image captured by photographer

00:00:12.912 --> 00:00:17.663
Jamie Burke quickly gained popularity on the internet The document

00:00:17.725 --> 00:00:20.913
also mentions that the Louvre Museum home to the famous

00:00:20.925 --> 00:00:25.775
Mona Lisa painting has joined the 2024 Paris Olympics celebrations

00:00:26.637 --> 00:00:30.762
Additionally the Chinese Olympic team achieved outstanding results at the

00:00:30.775 --> 00:00:35.550
Paris Olympics with 40 gold 27 silver and 24 bronze

00:00:35.562 --> 00:00:36.112
medals



/tmp/gradio/319fd6a14369c2fab08a444c9ac9921285bb4f324d7148da81f46f901fc73ac7/audio.wav




edge-tts  --rate=+0% --voice en-US-AriaNeural --text " - The 2024 Paris Olympics saw the Chinese delegation achieve a remarkable performance, securing 40 gold, 27 silver, and 24 bronze medals, marking their best overseas results in terms of gold medal count.

- The Louvre Museum, home to the world-renowned Mona Lisa, is set to be part of the festivities for the 2024 Paris Olympics, showcasing the event's inclusive and celebratory spirit.

- A striking photograph of surfer Gabriel Medina by photographer Jerome Bruyer, which went viral on social media, was humorously suggested by the official Olympics.com to be worthy of hanging in the Louvre." --write-media ./content/audio/380a28c3.mp3


WEBVTT

00:00:00.100 --> 00:00:04.025
The 2024 Paris Olympics saw the Chinese delegation achieve a

00:00:04.037 --> 00:00:09.637
remarkable performance securing 40 gold 27 silver and 24 bronze

00:00:09.650 --> 00:00:13.338
medals marking their best overseas results in terms of gold

00:00:13.350 --> 00:00:17.675
medal count The Louvre Museum home to the world-renowned Mona

00:00:17.688 --> 00:00:20.363
Lisa is set to be part of the festivities for

00:00:20.375 --> 00:00:25.288
the 2024 Paris Olympics showcasing the event's inclusive and celebratory

00:00:25.300 --> 00:00:30.288
spirit A striking photograph of surfer Gabriel Medina by photographer

00:00:30.300 --> 00:00:34.275
Jerome Bruyer which went viral on social media was humorously

00:00:34.288 --> 00:00:37.775
suggested by the official Olympics.com to be worthy of hanging

00:00:37.788 --> 00:00:38.475
in the Louvre



/tmp/gradio/0ffe072b5569a63d7a6dac9e0ed71f114e7aef30df455040f16420c12f8bd79c/audio.wav




edge-tts  --rate=+0% --voice en-US-AriaNeural --text " 杰罗姆·布鲁耶捕捉的一张冲浪名将加布里埃尔·梅迪纳的照片在网络上引起了极大的关注。

这张照片被晒出在Olympics.com官方社交媒体上，迅速走红网络。" --write-media ./content/audio/00eb67c0.mp3


WEBVTT

00:00:00.100 --> 00:00:01.450
这张照片被晒出在Olympics . com官方社交媒体上，迅速走红网络



/tmp/gradio/cd353e3a35ae3bc928fea4f2772d5c1938a5965df18bb35a450cedb36a1a2ece/audio.wav




edge-tts  --rate=+0% --voice en-US-AriaNeural --text " 杰罗姆·布鲁耶

The document mentions that the photographer who captured the shocking moment of surfer Gabriel Medina is Jerome Buraya." --write-media ./content/audio/b5e1a098.mp3


WEBVTT

00:00:00.100 --> 00:00:03.487
The document mentions that the photographer who captured the shocking

00:00:03.500 --> 00:00:06.562
moment of surfer Gabriel Medina is Jerome Buraya



edge-tts  --rate=+0% --voice en-US-AriaNeural --text " 李发彬和欧紫霞将中国代表团旗帜带入场。" --write-media ./content/audio/4e821f8c.mp3
Failed:  李发彬和欧紫霞将中国代表团旗帜带入场。


Traceback (most recent call last):
  File "/home/liu/miniconda3/envs/hwfinal/bin/edge-tts", line 8, in <module>
    sys.exit(main())
             ^^^^^^
  File "/home/liu/miniconda3/envs/hwfinal/lib/python3.11/site-packages/edge_tts/util.py", line 136, in main
    asyncio.run(amain())
  File "/home/liu/miniconda3/envs/hwfinal/lib/python3.11/asyncio/runners.py", line 190, in run
    return runner.run(main)
           ^^^^^^^^^^^^^^^^
  File "/home/liu/miniconda3/envs/hwfinal/lib/python3.11/asyncio/runners.py", line 118, in run
    return self._loop.run_until_complete(task)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/liu/miniconda3/envs/hwfinal/lib/python3.11/asyncio/base_events.py", line 654, in run_until_complete
    return future.result()
           ^^^^^^^^^^^^^^^
  File "/home/liu/miniconda3/envs/hwfinal/lib/python3.11/site-packages/edge_tts/util.py", line 131, in amain
    await _run_tts(args)
  File "/home/liu/miniconda3/envs/hwfinal/lib/python3.11/site-packages/e

/tmp/gradio/eea3aaafb307d1ab3b764211ecb44d399ab8e99e3ce0236d63d397f3fa1dbec8/audio.wav




edge-tts  --rate=+0% --voice en-US-AriaNeural --text " 李发彬和欧紫霞担任中国体育代表团旗帜的携带者，将它们带入了2024年巴黎奥运会。" --write-media ./content/audio/bceb937f.mp3


WEBVTT

00:00:00.100 --> 00:00:01.337
李发彬和欧紫霞担任中国体育代表团旗帜的携带者，将它们带入了2024年巴黎奥运会

