In [1]:
#!/usr/bin/env python
# coding: utf-8

import os
import torch
import pickle
from torch.utils.data import DataLoader
from transformers import AutoTokenizer
from model_network import CLIPPhi2Model, train_model
from dataset import collate_fn, llavadataset

# Proxy setup, if necessary
try:
    os.environ['HTTP_PROXY'] = 'http://185.46.212.90:80'
    os.environ['HTTPS_PROXY'] = 'http://185.46.212.90:80'
    print("Proxy exported")
except Exception as e:
    print("Could not set proxy:", e)

# Ensure CUDA is available, otherwise fall back to CPU
if torch.cuda.is_available():
    print(f"Using CUDA: {torch.cuda.device_count()} GPUs available")
    device = torch.device('cuda')
else:
    print("CUDA is not available. Using CPU instead.")
    device = torch.device('cpu')

# Load your dataset
with open("coco_dataset_pickle", "rb") as fp:
    coco_unpickle = pickle.load(fp)

# Tokenizer and model setup
clip_model_name = "openai/clip-vit-base-patch32"
phi_model_name = "microsoft/phi-2"
train_batch_size = 4
val_batch_size = 4
tokenizer = AutoTokenizer.from_pretrained(phi_model_name, trust_remote_code=True, use_cache=True)
tokenizer.save_pretrained("saved_tokenizer")

# Model initialization and DataParallel wrapping
MModalGPT = CLIPPhi2Model()
if torch.cuda.is_available():
    MModalGPT = torch.nn.DataParallel(MModalGPT).to(device)

# Data loaders setup
train_dataloader = DataLoader(
    llavadataset(coco_unpickle, phi_model_name, clip_model_name, 'train', tokenizer),
    collate_fn=collate_fn, batch_size=train_batch_size, num_workers=20, shuffle=True, pin_memory=True)

val_dataloader = DataLoader(
    llavadataset(coco_unpickle, phi_model_name, clip_model_name, 'val', tokenizer),
    collate_fn=collate_fn, batch_size=val_batch_size, num_workers=20, shuffle=True, pin_memory=True)

# Optimizer setup
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, MModalGPT.parameters()), lr=1e-6)

# Set float32_matmul_precision to 'medium'
torch.set_float32_matmul_precision('medium')

# Train the model
train_model(MModalGPT, train_dataloader, val_dataloader, optimizer, device, max_steps=100000, model_save_step=1000, model_val_step=1000, log_step=100, max_token_filter=35, tokenizer=tokenizer)


Using device: cuda
Proxy exported
Using CUDA: 4 GPUs available


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  return self.fget.__get__(instance, owner)()


Train size 532577 and validation size 59176
Train size 532577 and validation size 59176
Training started.




Step 0/100000: Avg Running Loss = 5.348811149597168
Step 100/100000: Avg Running Loss = 5.33362282037735
Step 200/100000: Avg Running Loss = 5.241844072341919
Step 300/100000: Avg Running Loss = 5.254102308750152
Step 400/100000: Avg Running Loss = 5.234310839176178
Step 500/100000: Avg Running Loss = 5.187703197002411
Batch skipped as captions too long.
Step 600/100000: Avg Running Loss = 5.267480807304382
Batch skipped as captions too long.
Step 700/100000: Avg Running Loss = 5.271900467872619
Step 800/100000: Avg Running Loss = 5.24034351348877
Step 900/100000: Avg Running Loss = 5.3043603372573855
Saving Checkpoint
0 - Target captions:
 A close shot of a glass container with a bundle of roses inside.  
0 - predicted_captions:
 A plate of food on a plate a a a a a. a.....<|endoftext|> 
1 - Target captions:
 Round toddler Bento lunch boxes with character utensils<|endoftext|><|endoftext|><|endoftext|>  
1 - predicted_captions:
 A bowl of fruit and a a of a a of a a of a a of a of<|en

ProxyError: Caught ProxyError in DataLoader worker process 15.
Original Traceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/urllib3/connectionpool.py", line 703, in urlopen
    httplib_response = self._make_request(
  File "/usr/local/lib/python3.8/dist-packages/urllib3/connectionpool.py", line 449, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "/usr/local/lib/python3.8/dist-packages/urllib3/connectionpool.py", line 444, in _make_request
    httplib_response = conn.getresponse()
  File "/usr/lib/python3.8/http/client.py", line 1348, in getresponse
    response.begin()
  File "/usr/lib/python3.8/http/client.py", line 316, in begin
    version, status, reason = self._read_status()
  File "/usr/lib/python3.8/http/client.py", line 277, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "/usr/lib/python3.8/socket.py", line 669, in readinto
    return self._sock.recv_into(b)
ConnectionResetError: [Errno 104] Connection reset by peer

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/requests/adapters.py", line 486, in send
    resp = conn.urlopen(
  File "/usr/local/lib/python3.8/dist-packages/urllib3/connectionpool.py", line 787, in urlopen
    retries = retries.increment(
  File "/usr/local/lib/python3.8/dist-packages/urllib3/util/retry.py", line 592, in increment
    raise MaxRetryError(_pool, url, error or ResponseError(cause))
urllib3.exceptions.MaxRetryError: HTTPConnectionPool(host='185.46.212.90', port=80): Max retries exceeded with url: http://images.cocodataset.org/train2017/000000247487.jpg (Caused by ProxyError('Cannot connect to proxy.', ConnectionResetError(104, 'Connection reset by peer')))

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/worker.py", line 308, in _worker_loop
    data = fetcher.fetch(index)
  File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/fetch.py", line 51, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/usr/local/lib/python3.8/dist-packages/torch/utils/data/_utils/fetch.py", line 51, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/saurabh/era_saurabh/ERA/capstone_part2/step1_pretrain/step1_dataset.py", line 41, in __getitem__
    image_load = Image.open(requests.get(img_url,stream=True).raw)
  File "/usr/local/lib/python3.8/dist-packages/requests/api.py", line 73, in get
    return request("get", url, params=params, **kwargs)
  File "/usr/local/lib/python3.8/dist-packages/requests/api.py", line 59, in request
    return session.request(method=method, url=url, **kwargs)
  File "/usr/local/lib/python3.8/dist-packages/requests/sessions.py", line 589, in request
    resp = self.send(prep, **send_kwargs)
  File "/usr/local/lib/python3.8/dist-packages/requests/sessions.py", line 703, in send
    r = adapter.send(request, **kwargs)
  File "/usr/local/lib/python3.8/dist-packages/requests/adapters.py", line 513, in send
    raise ProxyError(e, request=request)
requests.exceptions.ProxyError: HTTPConnectionPool(host='185.46.212.90', port=80): Max retries exceeded with url: http://images.cocodataset.org/train2017/000000247487.jpg (Caused by ProxyError('Cannot connect to proxy.', ConnectionResetError(104, 'Connection reset by peer')))


In [None]:
### this is for running in local ###
import os
try:
    os.environ['HTTP_PROXY']='http://185.46.212.90:80'
    os.environ['HTTPS_PROXY']='http://185.46.212.90:80'
    print ("proxy_exported")
except:
    None

In [None]:
import torch
from step1_network import CLIPPhi2Model, train_model
from step1_dataset import collate_fn, llavadataset
from torch.utils.data import random_split, DataLoader
import torch.nn as nn
from transformers import AutoTokenizer
import pickle
import os

In [None]:
# Check for GPU availability and fallback to CPU if not available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

In [None]:
with open("coco_dataset_pickle", "rb") as fp:   # Unpickling
    coco_unpickle = pickle.load(fp)

In [None]:
coco_unpickle

In [None]:
clip_model_name  = "openai/clip-vit-base-patch32"
phi_model_name   = "microsoft/phi-2"
train_batch_size = 2 #2
val_batch_size   = 4 #4
tokenizer  = AutoTokenizer.from_pretrained(phi_model_name, trust_remote_code=True, use_cache=True)

In [None]:
tokenizer.save_pretrained("saved_tokenizer")

In [None]:
print (device)

In [None]:
# model
MModalGPT        = CLIPPhi2Model().to(device)
max_steps        = 100 #100000
model_save_step  = 10 #1000
model_val_step   = 2 #1000
log_step         = 2 #1000
max_token_filter = 35 #35 # memory management restriction

In [None]:
# data loaders
train_dataloader = DataLoader(llavadataset(coco_unpickle[0:100], phi_model_name,clip_model_name,'train',tokenizer),
                  collate_fn=collate_fn, batch_size=train_batch_size, num_workers = 2, shuffle=True, pin_memory=True)
val_dataloader   = DataLoader(llavadataset(coco_unpickle[0:100], phi_model_name,clip_model_name,'val',tokenizer),
                  collate_fn=collate_fn, batch_size=val_batch_size, num_workers = 2, shuffle=True, pin_memory=True)



In [None]:
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, MModalGPT.parameters()), lr=1e-5)

In [None]:
torch.set_float32_matmul_precision('medium')
train_model(MModalGPT, train_dataloader, val_dataloader, optimizer, device, max_steps,model_save_step,model_val_step,log_step,max_token_filter,tokenizer)