# Libraries

In [None]:
import torch
import transformers
from transformers import BloomTokenizerFast, BloomForCausalLM, TrainingArguments
from datasets import load_dataset,list_datasets
from utils import ModifiedTrainer, tokenize_data, data_collator

In [None]:
# dataset = load_dataset('deepmind/code_contests',cache_dir='/cpfs01/user/Wuchen2023/datasets')
dataset = load_dataset('codeparrot/codeparrot-clean-train',cache_dir='/cpfs01/user/Wuchen2023/datasets', streaming=True, split='train')
# dataset=load_dataset('codeparrot/github-code-clean',streaming=True, cache_dir='/cpfs01/user/Wuchen2023/datasets')
dataset

 # Main

In [None]:
model_name = "bloom-7b1"
tokenizer = BloomTokenizerFast.from_pretrained(f"/cpfs01/user/Wuchen2023/models/{model_name}", add_prefix_space=True)

In [None]:
sample=next(iter(dataset))
tokenizer.encode(
                sample['content'],
                max_length=1024,
                padding="max_length",
                truncation=True,
            )

In [None]:
tokenizer(sample['content'], truncation=True, max_length=1024)

In [None]:
print(torch.cuda.is_available())
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [None]:
model = BloomForCausalLM.from_pretrained(f"/cpfs01/user/Wuchen2023/models/{model_name}")


In [None]:
dataset = load_dataset('tatsu-lab/alpaca',cache_dir='/cpfs01/user/Wuchen2023/datasets')
dataset['train']

In [None]:
dataset = load_dataset('rotten_tomatoes',cache_dir='/cpfs01/user/Wuchen2023/datasets')
dataset['train']

In [None]:
def tokenize_dataset(dataset):
    return tokenizer(dataset['code'])

dataset = dataset.map(tokenize_dataset, batched=True)
dataset['train']

In [None]:
input_ids = tokenise_data(dataset, tokenizer)

In [None]:
model.gradient_checkpointing_enable()
model.is_parallelizable = True
model.model_parallel = True

training_args = TrainingArguments(
    "output",
    fp16=False,
    gradient_accumulation_steps= 1,
    per_device_train_batch_size = 2,
    learning_rate = 2e-5,
    num_train_epochs=2,
    logging_steps=10,
)

trainer = ModifiedTrainer(
    model=model,
    train_dataset=input_ids,
    args=training_args,
    data_collator=data_collator,
)

trainer.train()

In [None]:
import requests
from PIL import Image
from transformers import Blip2Processor, Blip2ForConditionalGeneration

processor = Blip2Processor.from_pretrained("Salesforce/blip2-opt-2.7b")
model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b",device_map='auto', offload_folder="/mnt/c/Users/wuc/CodeGuru/offload")

img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg' 
raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')

question = "how many dogs are in the picture?"
inputs = processor(raw_image, question, return_tensors="pt")

out = model.generate(**inputs)
print(processor.decode(out[0], skip_special_tokens=True))

In [None]:
import requests
from PIL import Image
from transformers import BlipProcessor, Blip2ForConditionalGeneration

processor = BlipProcessor.from_pretrained("Salesforce/blip2-opt-2.7b")
model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b")

img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg' 
raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')

question = "how many dogs are in the picture?"
inputs = processor(raw_image, question, return_tensors="pt")

out = model.generate(**inputs)
print(processor.decode(out[0], skip_special_tokens=True))


In [None]:
import requests
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration

processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg' 
raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')

# conditional image captioning
text = "a photography of"
inputs = processor(raw_image, text, return_tensors="pt")

out = model.generate(**inputs)
print(processor.decode(out[0], skip_special_tokens=True))
# >>> a photography of a woman and her dog

# unconditional image captioning
inputs = processor(raw_image, return_tensors="pt")

out = model.generate(**inputs)
print(processor.decode(out[0], skip_special_tokens=True))


In [None]:
import requests
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration

processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")

img_url = 'https://storage.googleapis.com/sfr-vision-language-research/BLIP/demo.jpg' 
raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')

# conditional image captioning
text = "a photography of"
inputs = processor(raw_image, text, return_tensors="pt")

out = model.generate(**inputs)
print(processor.decode(out[0], skip_special_tokens=True))

# unconditional image captioning
inputs = processor(raw_image, return_tensors="pt")

out = model.generate(**inputs)
print(processor.decode(out[0], skip_special_tokens=True))

In [None]:
raw_image = Image.open(r"/mnt/c/Users/wuc/Desktop/PoC.png")
raw_image.show()

# conditional image captioning
text = "the image describes"
inputs = processor(raw_image, text, return_tensors="pt")

out = model.generate(**inputs)
print(processor.decode(out[0], skip_special_tokens=True))


In [None]:
# unconditional image captioning
inputs = processor(raw_image, return_tensors="pt")

out = model.generate(**inputs)
print(processor.decode(out[0], skip_special_tokens=True))

In [None]:
import requests

API_URL = "https://api-inference.huggingface.co/models/wangrongsheng/MiniGPT-4-LLaMA"
headers = {"Authorization": "Bearer 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"}

def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.json()
	
output = query({
	"inputs": "Can you please let us know more details about your ",
})

In [None]:
output