In [5]:
# !pip install datasets

In [2]:
!pip install evaluate

Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.3


In [7]:
!pip install GPUtil

import torch
from GPUtil import showUtilization as gpu_usage
from numba import cuda

def free_gpu_cache():
    print("Initial GPU Usage")
    gpu_usage()                             

    torch.cuda.empty_cache()

    cuda.select_device(0)
    cuda.close()
    cuda.select_device(0)

    print("GPU Usage after emptying the cache")
    gpu_usage()

free_gpu_cache()                           


Collecting GPUtil
  Downloading GPUtil-1.4.0.tar.gz (5.5 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hBuilding wheels for collected packages: GPUtil
  Building wheel for GPUtil (setup.py) ... [?25ldone
[?25h  Created wheel for GPUtil: filename=GPUtil-1.4.0-py3-none-any.whl size=7394 sha256=d4c1ab47729f8f8414b9c4f9c2d96001cfc9eca364a4f6563dbb1c7bcba93271
  Stored in directory: /root/.cache/pip/wheels/a9/8a/bd/81082387151853ab8b6b3ef33426e98f5cbfebc3c397a9d4d0
Successfully built GPUtil
Installing collected packages: GPUtil
Successfully installed GPUtil-1.4.0
Initial GPU Usage
| ID | GPU | MEM |
------------------
|  0 |  0% |  0% |
|  1 |  0% |  0% |
GPU Usage after emptying the cache
| ID | GPU | MEM |
------------------
|  0 |  7% |  1% |
|  1 |  0% |  0% |


In [3]:
from datasets import load_dataset, DatasetDict
from transformers import T5Tokenizer, T5ForConditionalGeneration, Trainer, TrainingArguments
from sklearn.model_selection import KFold
from evaluate import load
import numpy as np
import torch

In [9]:

# Load your dataset
dataset = load_dataset('csv', data_files={'data': '/kaggle/input/filtered/filtered_dataset1.csv'})

# Split dataset into 80% train+validation and 20% test sets
train_test_split = dataset['data'].train_test_split(test_size=0.2)

# Create DatasetDict for train+validation and test splits
dataset_dict = DatasetDict({
    'train': train_test_split['train'],  # 80%
    'test': train_test_split['test']  # 20% reserved for final evaluation
})

# Load T5 tokenizer and model
model_name = "t5-small"  # or "t5-base"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

# Preprocessing function
def preprocess_function(examples):
    inputs = [ex for ex in examples['input']]  # unstructured text
    targets = [ex for ex in examples['output']]  # LaTeX output
    model_inputs = tokenizer(inputs, max_length=512, padding='max_length', truncation=True)
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(targets, max_length=512, padding='max_length', truncation=True)
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# Apply tokenization to the entire train+validation set
train_dataset = dataset_dict['train'].map(preprocess_function, batched=True)

# BLEU score metric
bleu_metric = load("bleu")

def compute_bleu_metrics(eval_preds):
    predictions, labels = eval_preds
    decoded_preds = tokenizer.batch_decode(predictions[0], skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    bleu_score = bleu_metric.compute(predictions=decoded_preds, references=decoded_labels)
    return {"bleu_score": bleu_score["bleu"]}

def preprocess_logits_for_metrics(logits, labels):
    """
    Original Trainer may have a memory leak. 
    This is a workaround to avoid storing too many tensors that are not needed.
    """
    pred_ids = torch.argmax(logits[0], dim=-1)
    return pred_ids, labels


from transformers import TrainingArguments, Trainer

# Training arguments with checkpoint saving every 5000 steps
training_args = TrainingArguments(
    output_dir="./results",                # Directory where model checkpoints will be saved
    evaluation_strategy="no",           # Evaluate every `eval_steps` steps
    save_steps=5000,                       # Save checkpoint every 5000 steps
    learning_rate=2e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=8,
    num_train_epochs=5,
    weight_decay=0.000001,
    save_total_limit=2,                    # Limit number of saved checkpoints
    load_best_model_at_end=False           # Load the best model if using early stopping
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    compute_metrics=compute_bleu_metrics,
    preprocess_logits_for_metrics = preprocess_logits_for_metrics
)
# Train the model on this fold
trainer.train()

# Finally, calculate accuracy on the last 20% (test set)
tokenized_test = dataset_dict['test'].map(preprocess_function, batched=True)
final_trainer = Trainer(
    model=model,
    args=training_args,
    eval_dataset=tokenized_test,
    compute_metrics=compute_bleu_metrics,
    preprocess_logits_for_metrics = preprocess_logits_for_metrics
)

# Evaluate on the test set
print("Final evaluation on the test set:")
final_eval_results = final_trainer.evaluate()
print(final_eval_results)

# # Save the final fine-tuned model and tokenizer
# model.save_pretrained("./fine_tuned_model")
# tokenizer.save_pretrained("./fine_tuned_model")


Generating data split: 0 examples [00:00, ? examples/s]

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Map:   0%|          | 0/19694 [00:00<?, ? examples/s]



Downloading builder script:   0%|          | 0.00/5.94k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/3.34k [00:00<?, ?B/s]

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112359377778425, max=1.0…

  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Step,Training Loss
500,1.4659
1000,0.2291
1500,0.1709
2000,0.1408
2500,0.1226
3000,0.111
3500,0.1008
4000,0.0921
4500,0.0879
5000,0.0829


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):
  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Map:   0%|          | 0/4924 [00:00<?, ? examples/s]



Final evaluation on the test set:


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


{'eval_loss': 0.04181332141160965, 'eval_bleu_score': 0.6990608209587831, 'eval_runtime': 654.213, 'eval_samples_per_second': 7.527, 'eval_steps_per_second': 0.471}


In [26]:
# tokenizer = T5Tokenizer.from_pretrained("/kaggle/working/fine_tuned_model")
# model = T5ForConditionalGeneration.from_pretrained("/kaggle/working/fine_tuned_model")

def generate_latex(input_text, output_file_path):
    
    # Tokenize the input text
    inputs = tokenizer(
        input_text, 
        return_tensors="pt", 
        padding="max_length", 
        max_length=512, 
        truncation=True
    )
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    inputs = {key: value.to(device) for key, value in inputs.items()}
    # Generate output with a fixed max length
    outputs = model.generate(**inputs, max_length=512, pad_token_id=tokenizer.pad_token_id)
    
    # Decode the generated output into readable LaTeX format
    latex_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
    latex_output = latex_output.replace("SLASH","\\").replace("UNDERSCORE","_").replace("CAP","^").replace("LEFTB","{").replace("RIGHTB","}")
    
    # Write the output to a file
    with open(output_file_path, "w") as f:
        f.write(latex_output)
    
    # Return the LaTeX output in case you still want to print it
    return latex_output

# Example usage
input_text = ""
output_file_path = "generated_latex.txt"  # Specify the output file path
output_latex = generate_latex(input_text, output_file_path)

# Optionally print the generated LaTeX
print("Generated LaTeX:", output_latex)



Generated LaTeX: \int x ^ { 2 } \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \frac { 1 } ^ { 2 } \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \


In [1]:
from huggingface_hub import login

# Log in to Hugging Face using the generated token
login(token="token")


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [12]:
model.push_to_hub("vinalal/speech-latex1")
tokenizer.push_to_hub("vinalal/speech-latex1")

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/vinalal/speech-latex1/commit/a799e2867c963222677e5315a7316fb0c66ede89', commit_message='Upload tokenizer', commit_description='', oid='a799e2867c963222677e5315a7316fb0c66ede89', pr_url=None, repo_url=RepoUrl('https://huggingface.co/vinalal/speech-latex1', endpoint='https://huggingface.co', repo_type='model', repo_id='vinalal/speech-latex1'), pr_revision=None, pr_num=None)

In [4]:
model = T5ForConditionalGeneration.from_pretrained("vinalal/speech-latex1")
tokenizer = T5Tokenizer.from_pretrained("vinalal/speech-latex1")

config.json:   0%|          | 0.00/1.51k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/142 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/20.8k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/2.59k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

In [28]:
dataset = load_dataset('csv', data_files={'data': '/kaggle/input/tuning3/tuning.csv'})

# Split dataset into 80% train+validation and 20% test sets
train_data = dataset['data']

def preprocess_function(examples):
    inputs = [ex for ex in examples['input']]  # unstructured text
    targets = [ex for ex in examples['output']]  # LaTeX output
    model_inputs = tokenizer(inputs, max_length=512, padding='max_length', truncation=True)
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(targets, max_length=512, padding='max_length', truncation=True)
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# Apply tokenization to the entire train+validation set
train_dataset = train_data.map(preprocess_function, batched=True)

# BLEU score metric
bleu_metric = load("bleu")

def compute_bleu_metrics(eval_preds):
    predictions, labels = eval_preds
    decoded_preds = tokenizer.batch_decode(predictions[0], skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    bleu_score = bleu_metric.compute(predictions=decoded_preds, references=decoded_labels)
    return {"bleu_score": bleu_score["bleu"]}

def preprocess_logits_for_metrics(logits, labels):
    """
    Original Trainer may have a memory leak. 
    This is a workaround to avoid storing too many tensors that are not needed.
    """
    pred_ids = torch.argmax(logits[0], dim=-1)
    return pred_ids, labels

from transformers import TrainingArguments, Trainer

# Training arguments with checkpoint saving every 5000 steps
training_args = TrainingArguments(
    output_dir="./results",                # Directory where model checkpoints will be saved
    evaluation_strategy="no",           # Evaluate every `eval_steps` steps
    learning_rate=5e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=8,
    num_train_epochs=8,
    weight_decay=0.000001,
    load_best_model_at_end=False           # Load the best model if using early stopping
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    compute_metrics=compute_bleu_metrics,
    preprocess_logits_for_metrics = preprocess_logits_for_metrics
)
# Train the model on this fold
trainer.train()

Generating data split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/503 [00:00<?, ? examples/s]

  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


Step,Training Loss
500,0.0087


  with torch.cuda.device(device), torch.cuda.stream(stream), autocast(enabled=autocast_enabled):


TrainOutput(global_step=504, training_loss=0.00864115608708253, metrics={'train_runtime': 225.7267, 'train_samples_per_second': 17.827, 'train_steps_per_second': 2.233, 'total_flos': 544615409123328.0, 'train_loss': 0.00864115608708253, 'epoch': 8.0})

In [6]:
# tokenizer = T5Tokenizer.from_pretrained("/kaggle/working/fine_tuned_model")
# model = T5ForConditionalGeneration.from_pretrained("/kaggle/working/fine_tuned_model")

def generate_latex(input_text, output_file_path):
    
    # Tokenize the input text
    inputs = tokenizer(
        input_text, 
        return_tensors="pt", 
        padding="max_length", 
        max_length=512, 
        truncation=True
    )
    device = torch.device("cpu")
    inputs = {key: value.to(device) for key, value in inputs.items()}
    # Generate output with a fixed max length
    outputs = model.generate(**inputs, max_length=512, pad_token_id=tokenizer.pad_token_id)
    
    # Decode the generated output into readable LaTeX format
    latex_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
    latex_output = latex_output.replace("SLASH","\\").replace("UNDERSCORE","_").replace("CAP","^").replace("LEFTB","{").replace("RIGHTB","}")
    
    # Write the output to a file
    with open(output_file_path, "w") as f:
        f.write(latex_output)
    
    # Return the LaTeX output in case you still want to print it
    return latex_output

# Example usage
input_text = "e to the power of i times pi plus 1 equals 0."
output_file_path = "generated_latex.txt"  # Specify the output file path
output_latex = generate_latex(input_text, output_file_path)

# Optionally print the generated LaTeX
print("Generated LaTeX:", output_latex)


Generated LaTeX: e^{ ( i ) \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, \, 1 = \,


In [2]:
!pip install gradio
!pip install SpeechRecognition

Collecting gradio
  Downloading gradio-5.4.0-py3-none-any.whl.metadata (16 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.4-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.4.2 (from gradio)
  Downloading gradio_client-1.4.2-py3-none-any.whl.metadata (7.1 kB)
Collecting python-multipart==0.0.12 (from gradio)
  Downloading python_multipart-0.0.12-py3-none-any.whl.metadata (1.9 kB)
Collecting ruff>=0.2.2 (from gradio)
  Downloading ruff-0.7.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<1.0,>=0.1.1 (from gradio)
  Downloading safehttpx-0.1.1-py3-none-any.whl.metadata (4.1 kB)
Collecting semantic-version~=2.0 (from gradio)
  Downloading semantic_version-2.10.0-py2.py3-none-any.whl.metadata (9.7 kB)
Collecting starlette<1.0,>=0.40.0 (from gradio)
  Downloading starlette-0.41.2-py3-none-any.whl.metadata (6.0

In [1]:
import gradio as gr
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration
import matplotlib.pyplot as plt
from tempfile import NamedTemporaryFile
import speech_recognition as sr
from huggingface_hub import login

# Log in to Hugging Face using the token
login(token="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")

# Initialize model and tokenizer from Hugging Face
model = T5ForConditionalGeneration.from_pretrained("vinalal/speech-latex1")
tokenizer = T5Tokenizer.from_pretrained("vinalal/speech-latex1")
device = torch.device("cpu")
model.to(device)

# Function to convert speech to text
def recognize_speech(audio):
    recognizer = sr.Recognizer()
    with sr.AudioFile(audio) as source:
        audio_data = recognizer.record(source, duration=10)
        text = recognizer.recognize_google(audio_data)
    return text

# Convert recognized text to LaTeX
def generate_latex(input_text):
    inputs = tokenizer(input_text, return_tensors="pt", padding="max_length", max_length=512, truncation=True)
    inputs = {key: value.to(device) for key, value in inputs.items()}
    outputs = model.generate(**inputs, max_length=512, pad_token_id=tokenizer.pad_token_id)
    latex_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
    latex_output = latex_output.replace("SLASH", "\\").replace("UNDERSCORE", "_").replace("CAP", "^").replace("LEFTB", "{").replace("RIGHTB", "}")
    return latex_output

# Render LaTeX formula to an image
def render_latex_to_image(latex_code):
    fig, ax = plt.subplots()
    ax.text(0.5, 0.5, f"${latex_code}$", fontsize=20, ha='center', va='center')
    ax.axis("off")  # Remove axes for a clean formula display
    
    with NamedTemporaryFile(delete=False, suffix=".png") as tmp_file:
        image_path = tmp_file.name
        plt.savefig(image_path, format="png", bbox_inches="tight", pad_inches=0.2)
    plt.close(fig)
    return image_path

# Combined function for Gradio interface
def process_audio(audio):
    # Recognize speech
    recognized_text = recognize_speech(audio)
    
    # Generate LaTeX
    latex_code = generate_latex(recognized_text)
    
    # Render LaTeX to an image for preview
    preview_image_path = render_latex_to_image(latex_code)
    
    return latex_code, preview_image_path

# Set up Gradio interface
interface = gr.Interface(
    fn=process_audio,
    inputs=gr.Audio(type="filepath"),  # Audio input
    outputs=[
        gr.Textbox(label="Generated LaTeX Code"),
        gr.Image(label="Compiled LaTeX Preview")
    ],
    title="Speech-to-LaTeX Demo",
    description="Provide input speech to get LaTeX code and preview it."
)

# Launch in the notebook
interface.launch(debug=True)


ModuleNotFoundError: No module named 'gradio'