<a href="https://colab.research.google.com/github/mancher07/samsung/blob/main/GradioLive.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# @title <b>Dependencies Local🟡🚀</b>   {display-mode: "form"}

%%capture
!pip install transformers==4.38.2 tiktoken torch numpy gradio

import argparse
import os
import gradio as gr
import json
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from threading import Thread

# Cargar el modelo y el tokenizador
tokenizer = AutoTokenizer.from_pretrained("stabilityai/stable-code-instruct-3b")
model = AutoModelForCausalLM.from_pretrained("stabilityai/stable-code-instruct-3b", torch_dtype=torch.bfloat16)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

In [None]:
# @title <b>Bot Java Local🟡{display-mode: "form"}

# Variables globales y configuración del modelo
MAX_LENGTH = 8000000 #@param {type:"slider", min:512, max:8000000, step:128}
DEFAULT_MAX_NEW_TOKENS = 687300 #@param {type:"slider", min:100, max:800000, step:100}
TEMPERATURE = 0.9 #@param {type:"slider", min:0, max:1, step:0.1}
SYSTEM_PROMPT = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions" #@param {type:"string"}

def predict(message, history, system_prompt, temperature, max_tokens):
    instruction = system_prompt
    for human, assistant in history:
        instruction += f'user\n{human}\n\nassistant\n{assistant}\n'
    instruction += f'\nuser\n{message}\n\nassistant\n'

    problem = [instruction]
    streamer = TextIteratorStreamer(tokenizer, timeout=100.0, skip_prompt=True, skip_special_tokens=True)
    enc = tokenizer(problem, return_tensors="pt", padding=True, truncation=True)
    input_ids = enc.input_ids
    attention_mask = enc.attention_mask

    if input_ids.shape[1] > MAX_LENGTH:
        input_ids = input_ids[:, -MAX_LENGTH:]

    input_ids = input_ids.to(device)
    attention_mask = attention_mask.to(device)
    generate_kwargs = {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
        "streamer": streamer,
        "do_sample": True,
        "top_p": 0.95,
        "temperature": temperature,
        "max_new_tokens": max_tokens,
    }

    t = Thread(target=model.generate, kwargs=generate_kwargs)
    t.start()
    outputs = []
    for text in streamer:
        outputs.append(text)
        yield "".join(outputs)
    yield "".join(outputs)

interface = gr.ChatInterface(
    fn=predict,
    title="BotJava",
    description="Chat Model Stable Code 3B",
    theme="Glass",
    chatbot=gr.Chatbot(label="Chat History"),
    textbox=gr.Textbox(placeholder="input", container=False, scale=7),
    retry_btn=None,
    undo_btn="Delete Previous",
    clear_btn="Clear",
    additional_inputs=[
        gr.Textbox(SYSTEM_PROMPT, label="System Prompt"),
        gr.Slider(0, 1, TEMPERATURE, label="Temperature"),
        gr.Slider(100, 2048, DEFAULT_MAX_NEW_TOKENS, label="Max Tokens"),
    ],
    additional_inputs_accordion="Parameters",
)

interface.launch(share=None)

In [None]:
# @title <b>Java Bot MyDrive Mount♻️  {display-mode: "form"}
from google.colab import drive
drive.mount('/content/drive')

from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Definir la ruta del modelo en Google Drive
model_path = "/content/drive/MyDrive/stable-code-instruct-3b"

# Cargar el tokenizador y el modelo desde Google Drive
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# El resto de tu código para configurar y lanzar el bot
import gradio as gr
from transformers import TextIteratorStreamer
from threading import Thread

# Variables globales y configuración del modelo
MAX_LENGTH = 6284800
DEFAULT_MAX_NEW_TOKENS = 800000
TEMPERATURE = 0.9
SYSTEM_PROMPT = "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions"

def predict(message, history, system_prompt, temperature, max_tokens):
    instruction = system_prompt
    for human, assistant in history:
        instruction += f'user\n{human}\n\nassistant\n{assistant}\n'
    instruction += f'\nuser\n{message}\n\nassistant\n'

    problem = [instruction]
    streamer = TextIteratorStreamer(tokenizer, timeout=100.0, skip_prompt=True, skip_special_tokens=True)
    enc = tokenizer(problem, return_tensors="pt", padding=True, truncation=True)
    input_ids = enc.input_ids
    attention_mask = enc.attention_mask

    if input_ids.shape[1] > MAX_LENGTH:
        input_ids = input_ids[:, -MAX_LENGTH:]

    input_ids = input_ids.to(device)
    attention_mask = attention_mask.to(device)
    generate_kwargs = {
        "input_ids": input_ids,
        "attention_mask": attention_mask,
        "streamer": streamer,
        "do_sample": True,
        "top_p": 0.95,
        "temperature": temperature,
        "max_new_tokens": max_tokens,
    }

    t = Thread(target=model.generate, kwargs=generate_kwargs)
    t.start()
    outputs = []
    for text in streamer:
        outputs.append(text)
        yield "".join(outputs)
    yield "".join(outputs)

interface = gr.ChatInterface(
    fn=predict,
    title="BotJava",
    description="Chat Model Stable Code 3B",
    theme="Glass",
    chatbot=gr.Chatbot(label="Chat History"),
    textbox=gr.Textbox(placeholder="input", container=False, scale=7),
    retry_btn=None,
    undo_btn="Delete Previous",
    clear_btn="Clear",
    additional_inputs=[
        gr.Textbox(SYSTEM_PROMPT, label="System Prompt"),
        gr.Slider(0, 1, TEMPERATURE, label="Temperature"),
        gr.Slider(100, 2048, DEFAULT_MAX_NEW_TOKENS, label="Max Tokens"),
    ],
    additional_inputs_accordion="Parameters",
)

interface.launch(share=None)

In [None]:
# @title <b>MyDrive Dependencies/Model🚀 Mount♻️  {display-mode: "form"}
%%capture
# Montar Google Drive
from google.colab import drive
drive.mount('/content/drive')

!pip install gradio

# Activar el entorno virtual
!source /content/drive/MyDrive/colab_env/bin/activate

# Comprobar que las dependencias están instaladas
!pip list

# Tu código aquí
import argparse
import os
import gradio as gr
import json
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
from threading import Thread

# Definir la ruta del modelo en Google Drive
model_path = "/content/drive/MyDrive/stable-code-instruct-3b"

# Cargar el tokenizador
tokenizer = AutoTokenizer.from_pretrained(model_path)

# Cargar el modelo
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

In [None]:

# @title <b>¡Save Model/Tokenizer = MyDrive!💾 {display-mode: "form"}

%%capture
from google.colab import drive
drive.mount('/content/drive')

from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

# Definir la ruta del modelo en Google Drive
model_path = "/content/drive/MyDrive/stable-code-instruct-3b"

# Cargar el tokenizador y el modelo
tokenizer = AutoTokenizer.from_pretrained("stabilityai/stable-code-instruct-3b")
model = AutoModelForCausalLM.from_pretrained("stabilityai/stable-code-instruct-3b", torch_dtype=torch.bfloat16)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Guardar el modelo y el tokenizador
model.save_pretrained(model_path)
tokenizer.save_pretrained(model_path)

In [None]:

# @title <b>¡Save Dependencies/Import install MyDrive💾 {display-mode: "form"}
%%capture
from google.colab import drive
drive.mount('/content/drive')

# Crear un directorio para el entorno virtual en Google Drive
!mkdir -p /content/drive/MyDrive/colab_env

# Crear el entorno virtual en Google Drive
!python3 -m venv /content/drive/MyDrive/colab_env

# Activar el entorno virtual y luego instalar las dependencias
!source /content/drive/MyDrive/colab_env/bin/activate && pip install transformers==4.38.2 tiktoken torch numpy gradio

In [None]:
# @title <b>Add Huggin Files📂 {display-mode: "form"}

%%capture

# Paso 1: Montar Google Drive
from google.colab import drive

#@markdown **Montar Google Drive para almacenar archivos**
drive.mount('/content/drive')

# Paso 2: Instalar la biblioteca huggingface_hub
#@markdown **Instalar la biblioteca `huggingface_hub` para descargar modelos desde Hugging Face**
!pip install huggingface_hub

# Paso 3: Descargar el archivo .safetensors
from huggingface_hub import hf_hub_download

#@markdown **Especifica el repositorio y el archivo que deseas descargar:**
repo_id = 'stabilityai/stable-code-instruct-3b'  #@param {type:"string"}
filename = 'model-00001-of-00002.safetensors'  #@param {type:"string"}

# Descargar el archivo y guardarlo en Google Drive
file_path = hf_hub_download(repo_id=repo_id, filename=filename)
destination_dir = '/content/drive/MyDrive/stable-code-instruct-3b'  #@param {type:"string"}

# Crear la carpeta destino si no existe
import os
os.makedirs(destination_dir, exist_ok=True)

# Ruta del archivo descargado
source_path = '/content/drive/MyDrive/model-00001-of-00002.safetensors'

# Ruta de destino
destination_path = os.path.join(destination_dir, 'model-00001-of-00002.safetensors')

# Mover el archivo
os.rename(source_path, destination_path)

print(f"Archivo movido a {destination_path}")