In [None]:
# !pip install transformers accelerate torch fastapi uvicorn


In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "deepseek-ai/DeepSeek-R1-0528-Qwen3-8B"

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="cuda", torch_dtype="auto", trust_remote_code=True)

# Save locally
tokenizer.save_pretrained("./DeepSeek-R1")
model.save_pretrained("./DeepSeek-R1")


In [None]:

# import torch

# # Load from local directory
# MODEL_PATH = "./DeepSeek-R1"

# tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
# model = AutoModelForCausalLM.from_pretrained(
#     MODEL_PATH,
#     device_map="auto",
#     torch_dtype=torch.float16,
#     trust_remote_code=True
# )
# model.eval()

# def generate_response(prompt: str, max_tokens: int = 512) -> str:
#     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
#     with torch.no_grad():
#         outputs = model.generate(**inputs, max_new_tokens=max_tokens, do_sample=True, top_p=0.95, temperature=0.7)
#     return tokenizer.decode(outputs[0], skip_special_tokens=True)


In [None]:


model_path = "./DeepSeek-R1"  # Local directory

tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    torch_dtype=torch.float16,
    device_map="auto",
    trust_remote_code=True
)
model.eval()

# 🔁 Inference function
def chat(prompt: str, max_tokens: int = 512) -> str:
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_tokens,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            eos_token_id=tokenizer.eos_token_id,
        )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)


In [None]:
response = chat("Explain the concept of black holes in 10 words.")
print(response)


In [2]:
# Import necessary libraries
# import sounddevice as sd
import scipy.io.wavfile
import os
from faster_whisper import WhisperModel
from transformers import AutoTokenizer, AutoModelForCausalLM
from TTS.api import TTS

# 🔹 Initialize ASR model (Whisper)
asr_model = WhisperModel("tiny.en", device="cuda")  # Change to "cuda" for GPU
print("ASR Model Loaded")

# 🔹 Initialize LLM model (Phi-2)
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2")  # Make sure to download model locally
llm_model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2", device_map="cuda")
print("LLM Model Loaded")

# 🔹 Initialize TTS model (Coqui TTS)
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)
print("TTS Model Loaded")

import pyaudio
import wave

def record_audio(filename="input.wav", duration=5, samplerate=16000):
    print("🎙️ Recording...")
    p = pyaudio.PyAudio()
    
    # Set parameters
    stream = p.open(format=pyaudio.paInt16,
                    channels=1,
                    rate=samplerate,
                    input=True,
                    frames_per_buffer=1024)
    
    frames = []
    for _ in range(0, int(samplerate / 1024 * duration)):
        data = stream.read(1024)
        frames.append(data)

    print(f"✅ Recording complete. Saving to {filename}")
    stream.stop_stream()
    stream.close()
    p.terminate()

    # Save the file
    with wave.open(filename, 'wb') as wf:
        wf.setnchannels(1)
        wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
        wf.setframerate(samplerate)
        wf.writeframes(b''.join(frames))

# Call the function to record
record_audio(duration=5)


# 🔹 Function to transcribe audio to text using the ASR model
def transcribe_audio(audio_path="input.wav"):
    segments, _ = asr_model.transcribe(audio_path)
    transcript = " ".join([seg.text for seg in segments])
    print(f"👂 Transcribed: {transcript}")
    return transcript

# 🔹 Function to generate a response using the LLM model
def generate_response(prompt):
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")  # Use "cuda" for GPU
    outputs = llm_model.generate(inputs.input_ids, max_new_tokens=100)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(f"🤖 LLM Response: {response}")
    return response

# 🔹 Function to convert text to speech and save the output
def speak_text(text, filename="output.wav"):
    tts.tts_to_file(text=text, file_path=filename)
    os.system(f"aplay {filename}")  # On Linux/macOS, use pyaudio or sounddevice on Windows
    print(f"🔊 Speaking: {text}")

# 🔁 Main loop to keep the system running
def main():
    while True:
        # 1. Record audio
        record_audio(duration=5)  # 5 seconds recording duration

        # 2. Transcribe audio
        transcript = transcribe_audio()

        # 3. Generate response
        response = generate_response(transcript)

        # 4. Speak the response
        
        speak_text(response)

if __name__ == "__main__":
    main()


  from .autonotebook import tqdm as notebook_tqdm


ASR Model Loaded


Loading checkpoint shards: 100%|██████████████████████████████████████████████████████████| 2/2 [00:02<00:00,  1.33s/it]


LLM Model Loaded
 > tts_models/en/ljspeech/tacotron2-DDC is already downloaded.
 > Model's license - apache 2.0
 > Check https://choosealicense.com/licenses/apache-2.0/ for more info.
 > vocoder_models/en/ljspeech/hifigan_v2 is already downloaded.
 > Model's license - apache 2.0
 > Check https://choosealicense.com/licenses/apache-2.0/ for more info.
 > Using model: Tacotron2
 > Setting up Audio Processor...
 | > sample_rate:22050
 | > resample:False
 | > num_mels:80
 | > log_func:np.log
 | > min_level_db:-100
 | > frame_shift_ms:None
 | > frame_length_ms:None
 | > ref_level_db:20
 | > fft_size:1024
 | > power:1.5
 | > preemphasis:0.0
 | > griffin_lim_iters:60
 | > signal_norm:False
 | > symmetric_norm:True
 | > mel_fmin:0
 | > mel_fmax:8000.0
 | > pitch_fmin:1.0
 | > pitch_fmax:640.0
 | > spec_gain:1.0
 | > stft_pad_mode:reflect
 | > max_norm:4.0
 | > clip_norm:True
 | > do_trim_silence:True
 | > trim_db:60
 | > do_sound_norm:False
 | > do_amp_to_db_linear:True
 | > do_amp_to_db_mel:Tr

  WeightNorm.apply(module, name, dim)


Removing weight norm...
TTS Model Loaded
🎙️ Recording...


ALSA lib confmisc.c:855:(parse_card) cannot find card '0'
ALSA lib conf.c:5178:(_snd_config_evaluate) function snd_func_card_inum returned error: No such file or directory
ALSA lib confmisc.c:422:(snd_func_concat) error evaluating strings
ALSA lib conf.c:5178:(_snd_config_evaluate) function snd_func_concat returned error: No such file or directory
ALSA lib confmisc.c:1334:(snd_func_refer) error evaluating name
ALSA lib conf.c:5178:(_snd_config_evaluate) function snd_func_refer returned error: No such file or directory
ALSA lib conf.c:5701:(snd_config_expand) Evaluate error: No such file or directory
ALSA lib pcm.c:2664:(snd_pcm_open_noupdate) Unknown PCM sysdefault
ALSA lib confmisc.c:855:(parse_card) cannot find card '0'
ALSA lib conf.c:5178:(_snd_config_evaluate) function snd_func_card_inum returned error: No such file or directory
ALSA lib confmisc.c:422:(snd_func_concat) error evaluating strings
ALSA lib conf.c:5178:(_snd_config_evaluate) function snd_func_concat returned error: No

OSError: [Errno -9996] Invalid input device (no default output device)

In [1]:
!pip install pyaudio


[0m

In [None]:
# !pip install Pillow==9.5.0
# !pip install TTS==0.15.1 
