In [18]:
import tensorflow as tf
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch
import speech_recognition as sr
import requests
from bs4 import BeautifulSoup
from transformers import pipeline
import torchvision.transforms as transforms
from PIL import Image
from transformers import AutoTokenizer, AutoModel
import logging
from transformers import logging as hf_logging
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

In [14]:
logging.basicConfig(level=logging.ERROR)
hf_logging.set_verbosity_error()

In [2]:
def speech_to_text():
    recognizer = sr.Recognizer()

    with sr.Microphone() as source:
        print("Listening...")
        audio = recognizer.listen(source)

    try:
        print("Recognizing...")
        text = recognizer.recognize_google(audio)
        return text

    except sr.UnknownValueError:
        print("Could not understand audio")
        return None

    except sr.RequestError as e:
        print("Could not request results; {0}".format(e))
        return None

# Example usage
transcribed_text = speech_to_text()
print("You said:", transcribed_text)

Listening...
Recognizing...
Could not understand audio
You said: None


In [9]:
def process_text(text):
    tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
    model = AutoModel.from_pretrained("bert-base-uncased")
    
    inputs = tokenizer(text, return_tensors="pt")
    outputs = model(**inputs)
    
    text_embedding = outputs.last_hidden_state.mean(dim=1)
    return text_embedding

In [10]:
def process_image(image_path):
    image = Image.open(image_path)
    transform = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    
    image_tensor = transform(image).unsqueeze(0)
    model = torch.hub.load("pytorch/vision", "resnet50", pretrained=True)
    model.eval()
    
    with torch.no_grad():
        image_embedding = model(image_tensor)
    
    return image_embedding


## Siver para todo lo que es buscar en google

In [5]:
def search_articles(query, api_key, cx):
    url = f"https://www.googleapis.com/customsearch/v1?key={api_key}&cx={cx}&q={query}"
    response = requests.get(url)
    data = response.json()
    return [item["link"] for item in data["items"]]



In [6]:
def scrape_article(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")
    paragraphs = soup.find_all("p")
    content = " ".join([p.text for p in paragraphs])
    return content


In [7]:

def summarize_text(text):
    summarizer = pipeline("summarization")
    summary = summarizer(text, max_length=150, min_length=30, do_sample=False)
    return summary[0]["summary_text"]

In [17]:
def process_input(text=None, image_path=None):
    text_embedding = None
    image_embedding = None
    
    if text:
        text_embedding = process_text(text)
        print("Text embedding:\n", text_embedding)
    
    if image_path:
        image_embedding = process_image(image_path)
        print("Image embedding:\n", image_embedding)
    
    if text_embedding is not None:
        print("Text embedding:\n", text_embedding)

# Example usage
process_input(text="Can you translate to french this: 'Hello, my name is Vigo Walker.'", image_path="")


Text embedding:
 tensor([[ 3.0664e-02,  2.7714e-01, -9.1591e-03, -2.1780e-01,  5.5857e-01,
          6.0666e-02,  4.2229e-01,  7.2604e-01, -6.4565e-02, -2.5360e-01,
          7.7892e-02, -1.7580e-01, -8.7040e-02,  5.1066e-02, -3.1571e-01,
          3.3879e-01, -9.3154e-03,  8.3648e-02, -1.2632e-01,  3.3357e-01,
          1.8940e-01,  2.3967e-03, -8.4752e-02,  4.1016e-01,  4.3265e-01,
          2.3155e-01, -2.1470e-01,  1.6782e-01, -5.5896e-01, -4.3271e-01,
          2.2313e-01,  9.3111e-02, -2.5458e-01, -2.7067e-01, -7.7486e-03,
         -1.7288e-01, -7.1298e-02, -1.9842e-01, -5.6821e-01,  2.4015e-01,
         -5.3005e-01, -1.7691e-01,  3.3282e-01,  1.8040e-02, -1.6690e-01,
         -4.2645e-01,  1.9286e-01, -1.6905e-01, -1.4371e-01, -4.9484e-03,
         -3.6910e-01,  4.9370e-03, -1.9216e-01, -9.5659e-03,  1.4639e-01,
          7.0196e-01, -5.0443e-01, -5.7186e-01, -3.1220e-01, -1.4678e-01,
         -7.8183e-03, -2.2864e-01,  3.2701e-01, -4.3311e-01,  1.3237e-01,
          1.9121e-01,

In [None]:
def main():
    query = input("Enter your query: ")
    # Replace with your API key and cx (search engine ID)
    api_key = "YOUR_API_KEY"
    cx = "YOUR_CX"
    
    article_urls = search_articles(query, api_key, cx)
    all_summaries = []
    
    for url in article_urls[:3]:  # Limit to the first 3 articles
        content = scrape_article(url)
        summary = summarize_text(content)
        all_summaries.append(summary)
    
    print("Summaries of top 3 articles:")
    for i, summary in enumerate(all_summaries):
        print(f"{i + 1}. {summary}")

if __name__ == "__main__":
    main()


In [19]:
def translate_to_french(text):
    model_name = "Helsinki-NLP/opus-mt-en-fr"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    
    inputs = tokenizer.encode(text, return_tensors="pt")
    outputs = model.generate(inputs, max_length=50, num_return_sequences=1)
    translated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    return translated_text

translated_text = translate_to_french("Hello, my name is Vigo Walker.")
print("Translated text:", translated_text)


Downloading (…)okenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.42k [00:00<?, ?B/s]

Downloading (…)olve/main/source.spm:   0%|          | 0.00/778k [00:00<?, ?B/s]

Downloading (…)olve/main/target.spm:   0%|          | 0.00/802k [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.34M [00:00<?, ?B/s]



Downloading pytorch_model.bin:   0%|          | 0.00/301M [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

Translated text: Bonjour, je m'appelle Vigo Walker.


In [1]:
!nvidia-smi

# !pip install transformers
# !pip install torch

import json
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import GPT2Tokenizer, GPT2LMHeadModel, DataCollatorForLanguageModeling

class CodeDataset(Dataset):
    def __init__(self, data, tokenizer, max_length=512):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        text = self.data[idx]
        tokens = self.tokenizer(text, truncation=True, padding='max_length', max_length=self.max_length, return_tensors='pt')
        tokens = {key: value.squeeze(0) for key, value in tokens.items()}
        return tokens

# Load preprocessed data from JSON file
with open('code_examples.json', 'r') as f:
    preprocessed_data = json.load(f)

# Convert lists of words to text strings
data = [' '.join(words) for words in preprocessed_data]

# Initialize the GPT-2 tokenizer and dataset
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token
dataset = CodeDataset(data, tokenizer)

# Create a DataLoader for batching
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
dataloader = DataLoader(dataset, batch_size=8, shuffle=True, collate_fn=data_collator)

# Load the pre-trained GPT-2 model and set it up for training
model = GPT2LMHeadModel.from_pretrained('gpt2')

# Check if multiple GPUs are available and wrap the model with nn.DataParallel
if torch.cuda.device_count() > 1:
    print("Using", torch.cuda.device_count(), "GPUs")
    model = torch.nn.DataParallel(model)

model.train()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Set up the optimizer and training loop
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)

epochs = 3
for epoch in range(epochs):
    for batch in dataloader:
        optimizer.zero_grad()

        input_ids = batch["input_ids"].to(device)
        labels = batch["labels"].to(device)

        outputs = model(input_ids, labels=labels)
        loss = outputs.loss

        loss.backward()
        optimizer.step()

    print(f"Epoch: {epoch + 1}, Loss: {loss.item()}")

# Save the trained model
model.save_pretrained("trained_2_model")

# To load the saved model later:
# model = GPT2LMHeadModel.from_pretrained("trained_model")


Sat Apr 29 23:58:44 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 531.14                 Driver Version: 531.14       CUDA Version: 12.1     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                      TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf            Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce GTX 1650 S...  WDDM | 00000000:01:00.0  On |                  N/A |
|  0%   46C    P0               23W / 100W|   1383MiB /  4096MiB |      9%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   1  NVIDIA GeForce GTX 1650 S...  WDDM | 00000000:05:00.0  On |  