In [13]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from collections import defaultdict

In [3]:
# !pip install opencv-python

In [10]:
import cv2 

In [17]:
import gensim

In [18]:
print("version:", gensim.__version__)

version: 4.3.3


In [5]:
import gensim.downloader as api

# Download pretrained model (~1.6GB)
# word2vec = api.load("word2vec-google-news-300") 

In [6]:
import os
from pathlib import Path

# Get current working directory (where notebook is run)
PROJECT_ROOT = Path().absolute()  # or Path.cwd()
TRAIN_PATH = PROJECT_ROOT / os.path.join(PROJECT_ROOT, "data", "raw", "Flicker8k_Dataset", "train")
TEST_PATH = PROJECT_ROOT / os.path.join(PROJECT_ROOT, "data", "raw", "Flicker8k_Dataset", "test")
VAL_PATH = PROJECT_ROOT / os.path.join(PROJECT_ROOT, "data", "raw", "Flicker8k_Dataset", "val")
descriptions_path = PROJECT_ROOT / os.path.join(PROJECT_ROOT, "data", "raw", "Flicker8k_Dataset", "Flickr8k.token.txt")

Extract features from images and caption words:

In [7]:
# def extract_image_features(image):
#     """Color histograms + edge features"""
#     hist = cv2.calcHist([image], [0, 1, 2], None, [8, 8, 8], [0, 256, 0, 256, 0, 256])
#     edges = cv2.Canny(image, 100, 200)
#     return np.concatenate([hist.flatten(), edges.flatten()])

# def word_features(prev_word, image_feats):
#     """Combine linguistic and visual features"""
#     return np.hstack([
#         image_feats,
#         word2vec[prev_word] if prev_word in word2vec else np.zeros(100)
#     ])

Training Data Prep

In [8]:
train_dir, val_dir = '/Users/ruhwang/Desktop/AI/spring2025_courses/aipi540-dl/caption_generator/data/raw/Flicker8k_Dataset/train', '/Users/ruhwang/Desktop/AI/spring2025_courses/aipi540-dl/caption_generator/data/raw/Flicker8k_Dataset/val'
test_dir = '/Users/ruhwang/Desktop/AI/spring2025_courses/aipi540-dl/caption_generator/data/raw/Flicker8k_Dataset/test'
tokens_dir = '/Users/ruhwang/Desktop/AI/spring2025_courses/aipi540-dl/caption_generator/data/raw/Flicker8k_Dataset/Flickr8k.token.txt'

Train MEMM (Logistic Regression)

Generate descriptions

In [2]:
import os
import numpy as np
from collections import defaultdict
from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction
from gensim.models import KeyedVectors
import joblib  # For caching
from datetime import datetime

In [7]:
import sklearn

In [8]:
print(sklearn.__version__)

1.6.1


In [10]:
# Initialize components with memoization
class FeatureProcessor:
    def __init__(self):
        self.scaler = StandardScaler()
        self.pca = PCA(n_components=0.95)
        self._is_fitted = False
        self.cache_path = "./feature_cache"
        os.makedirs(self.cache_path, exist_ok=True)
        
    def normalize(self, features):
        if not self._is_fitted:
            features = self.scaler.fit_transform(features)
            features = self.pca.fit_transform(features)
            self._is_fitted = True
        else:
            features = self.scaler.transform(features)
            features = self.pca.transform(features)
        return features

# Initialize at global scope
fp = FeatureProcessor()

In [11]:
class MEMMCaptionGenerator:
    def __init__(self, cache_path="./feature_cache"):
        self.model = LogisticRegression(
            max_iter=1000,
            C=0.1,
            penalty='l2',
            solver='saga',
            n_jobs=-1,
            verbose=1
        )
        self.vec = DictVectorizer(sparse=False)
        self.word2idx = {}
        self.idx2word = {}
        self.word2vec = None  # Initialize as None, will be loaded later
        self.cache_path = cache_path
        os.makedirs(self.cache_path, exist_ok=True)
        
        # Initialize feature processor
        from sklearn.preprocessing import StandardScaler
        from sklearn.decomposition import PCA
        self.scaler = StandardScaler()
        self.pca = PCA(n_components=0.95)

    def train(self, train_images, train_captions, val_images=None, val_captions=None, patience=3):
        """Train the MEMM model with optional early stopping"""
        # Load word embeddings if not already loaded
        if self.word2vec is None:
            self.load_word_embeddings()
        
        # Create vocabulary
        all_words = [word for caps in train_captions for cap in caps for word in cap]
        self.word2idx = {w: i for i, w in enumerate(set(all_words))}
        self.idx2word = {i: w for w, i in self.word2idx.items()}
        
        # Prepare features
        X, y = [], []
        for img_path, caps in zip(train_images, train_captions):
            img_feats = self.extract_image_features(img_path)
            for cap in caps:
                for i in range(1, len(cap)):
                    features = self.word_features(img_path, cap[i-1], img_feats)
                    feat_dict = self.create_feature_dict(cap[i-1], img_feats, features)
                    X.append(feat_dict)
                    y.append(self.word2idx[cap[i]])
        
        # Vectorize and train
        X_vec = self.vec.fit_transform(X)
        
        if val_images:
            return self.train_with_early_stopping(X_vec, y, val_images, val_captions, patience)
        else:
            self.model.fit(X_vec, y)
            return self

    def train_with_early_stopping(self, X_vec, y, val_images, val_captions, patience):
        """Helper method for early stopping during training"""
        best_bleu = -1
        no_improve = 0
        best_weights = None
        
        for epoch in range(100):  # Max epochs
            self.model.fit(X_vec, y)
            
            # Validate
            val_bleu = self.evaluate(val_images, val_captions)
            print(f"Epoch {epoch+1}: Val BLEU = {val_bleu:.4f}")
            
            # Early stopping check
            if val_bleu > best_bleu:
                best_bleu = val_bleu
                no_improve = 0
                best_weights = self.model.coef_.copy()
            else:
                no_improve += 1
                if no_improve >= patience:
                    print(f"Early stopping at epoch {epoch+1}")
                    self.model.coef_ = best_weights
                    break
        
        return self

    def evaluate(self, val_images, val_captions):
        """Evaluate model on validation set"""
        hypotheses = []
        references = []
        
        for img_path, caps in zip(val_images, val_captions):
            gen_caption = self.generate_caption(img_path)
            hypotheses.append(gen_caption.split())
            references.append(caps)
        
        from nltk.translate.bleu_score import corpus_bleu, SmoothingFunction
        smooth = SmoothingFunction().method1
        return corpus_bleu(references, hypotheses, smoothing_function=smooth)

In [12]:
!pip install torchvision


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [57]:
!pip install -U torch==2.3.1
!pip install torchvision==0.17.2


Collecting torch==2.3.1
  Downloading torch-2.3.1-cp312-none-macosx_11_0_arm64.whl.metadata (26 kB)
Downloading torch-2.3.1-cp312-none-macosx_11_0_arm64.whl (61.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 MB[0m [31m57.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: torch
  Attempting uninstall: torch
    Found existing installation: torch 2.7.0
    Uninstalling torch-2.7.0:
      Successfully uninstalled torch-2.7.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
torchvision 0.22.0 requires torch==2.7.0, but you have torch 2.3.1 which is incompatible.[0m[31m
[0mSuccessfully installed torch-2.3.1

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[3

In [9]:
import torchvision
print(torchvision.__version__)

0.22.0


In [13]:
import os
import numpy as np
import cv2
import joblib
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction import DictVectorizer
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from gensim.scripts.glove2word2vec import glove2word2vec
from gensim.models import KeyedVectors
import torch
# import torchvision.models as models
from torchvision.models import mobilenet_v2
import torchvision.transforms.v2 as transforms
from PIL import Image
from collections import defaultdict

In [24]:
import time

In [29]:
import gc  # Garbage collection
import time
import urllib.request
import zipfile


class MEMMCaptionGenerator:
    def __init__(self, glove_path=None, cache_path="./feature_cache", device='cpu',
                 batch_size=32, use_pca=True, pca_components=0.95, embedding_dim=100):
        self.model = LogisticRegression(
            max_iter=1000,
            C=0.1,
            penalty='l2',
            solver='saga',
            n_jobs=-1,
            verbose=0
        )
        self.vec = DictVectorizer(sparse=True)
        self.word2idx = {}
        self.idx2word = {}
        self.glove_path = glove_path
        self.word_embeddings = {}
        self.embedding_dim = embedding_dim  # Default embedding dimension
        self.cache_path = cache_path
        self.device = device
        self.batch_size = batch_size
        self.use_pca = use_pca
        os.makedirs(self.cache_path, exist_ok=True)

        # Feature processing
        self.scaler = StandardScaler()
        if use_pca:
            self.pca = PCA(n_components=pca_components)
        self.is_fitted = False

        # CNN feature extractor - load only when needed
        self.cnn_model = None
        self.transform = None

    def _load_cnn_model(self):
        """Lazy loading of CNN model to save memory"""
        if self.cnn_model is None:
            print("Loading CNN model...")
            self.cnn_model = mobilenet_v2(pretrained=True)
            self.cnn_model = torch.nn.Sequential(*list(self.cnn_model.children())[:-1])
            self.cnn_model.eval()
            self.cnn_model.to(self.device)
            self.transform = transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(224),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                    std=[0.229, 0.224, 0.225])
            ])

    def download_glove_embeddings(self, dimension=100):
        """Download GloVe embeddings if not available"""
        glove_dir = "./glove"
        os.makedirs(glove_dir, exist_ok=True)

        # Set file paths
        zip_path = os.path.join(glove_dir, "glove.6B.zip")
        glove_path = os.path.join(glove_dir, f"glove.6B.{dimension}d.txt")

        # Check if file already exists
        if os.path.exists(glove_path):
            print(f"GloVe embeddings already exist at {glove_path}")
            self.glove_path = glove_path
            return glove_path

        # Download if needed
        if not os.path.exists(zip_path):
            print("Downloading GloVe embeddings...")
            url = "https://nlp.stanford.edu/data/glove.6B.zip"
            try:
                urllib.request.urlretrieve(url, zip_path)
                print(f"Downloaded to {zip_path}")
            except Exception as e:
                print(f"Error downloading GloVe embeddings: {e}")
                print("Please download manually from https://nlp.stanford.edu/data/glove.6B.zip")
                print("and extract the files to ./glove/ directory")
                return None

        # Extract if needed
        if not os.path.exists(glove_path):
            print(f"Extracting {dimension}d GloVe embeddings...")
            try:
                with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                    zip_ref.extract(f"glove.6B.{dimension}d.txt", glove_dir)
                print(f"Extracted to {glove_path}")
            except Exception as e:
                print(f"Error extracting GloVe embeddings: {e}")
                return None

        self.glove_path = glove_path
        return glove_path

    def load_glove_embeddings(self):
        """Load GloVe embeddings directly from file"""
        if not self.word_embeddings:
            # Try to download if path not provided or file doesn't exist
            if not self.glove_path or not os.path.exists(self.glove_path):
                self.glove_path = self.download_glove_embeddings(dimension=self.embedding_dim)

            if not self.glove_path or not os.path.exists(self.glove_path):
                print("WARNING: No GloVe embeddings available. Using random embeddings instead.")
                return

            print(f"Loading GloVe embeddings from {self.glove_path}...")
            self.word_embeddings = {}
            try:
                with open(self.glove_path, 'r', encoding='utf-8') as f:
                    for line in f:
                        values = line.strip().split()
                        word = values[0]
                        vector = np.array(values[1:], dtype='float32')
                        self.word_embeddings[word] = vector

                        # Set embedding dimension based on first vector
                        if self.embedding_dim is None:
                            self.embedding_dim = len(vector)

                print(f"Loaded {len(self.word_embeddings)} word vectors with dimension {self.embedding_dim}")
            except Exception as e:
                print(f"Error loading GloVe embeddings: {e}")
                print("Using random embeddings instead.")
        else:
            print("Embeddings already loaded")

    def get_feature_cache_key(self, img_path, prev_word):
        """Generate a unique cache key for features"""
        img_name = os.path.basename(img_path)
        return f"{img_name}_{prev_word}.pkl"

    def extract_image_features(self, image_path):
        """Extract image features with caching"""
        # Check if features are already cached
        cache_file = os.path.join(self.cache_path, f"{os.path.basename(image_path)}_features.pkl")
        if os.path.exists(cache_file):
            return joblib.load(cache_file)

        # Load CNN model if not already loaded
        self._load_cnn_model()

        # Load image
        img = Image.open(image_path).convert('RGB')

        # CNN features
        img_tensor = self.transform(img).unsqueeze(0).to(self.device)
        with torch.no_grad():
            cnn_features = self.cnn_model(img_tensor).squeeze().cpu().numpy().flatten()

        # Traditional features - simplified to reduce computation
        img_cv = cv2.imread(image_path)
        if img_cv is None:
            raise ValueError(f"Could not load image at {image_path}")

        # Simplified color histogram (reduced bins)
        hsv = cv2.cvtColor(img_cv, cv2.COLOR_BGR2HSV)
        hist_hsv = cv2.calcHist([hsv], [0, 1], None, [8, 8], [0, 180, 0, 256]).flatten()

        # Simplified edge features
        gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
        edges = cv2.Canny(gray, 50, 150)
        edge_hist = cv2.calcHist([edges], [0], None, [16], [0, 256]).flatten()

        features = {
            'cnn_features': cnn_features,
            'color_hist': hist_hsv,
            'edge_feats': edge_hist
        }

        # Cache the features
        joblib.dump(features, cache_file)

        return features

    def create_feature_dict(self, prev_word, img_feats, word_embed):
        """Create feature dictionary for vectorization - optimized version"""
        # Create a dictionary with prev_word as a categorical feature
        feature_dict = {'prev_word': prev_word}

        # Add numerical features with proper naming - use fewer features
        # For CNN features - use only a subset to reduce dimensionality
        cnn_step = max(1, len(img_feats['cnn_features']) // 100)  # Sample only ~100 CNN features
        for i, val in enumerate(img_feats['cnn_features'][::cnn_step]):
            feature_dict[f'cnn_{i}'] = float(val)

        # For color histogram - use all
        for i, val in enumerate(img_feats['color_hist']):
            feature_dict[f'color_{i}'] = float(val)

        # For edge features - use all
        for i, val in enumerate(img_feats['edge_feats']):
            feature_dict[f'edge_{i}'] = float(val)

        # For word embeddings - use only a subset to reduce dimensionality
        embed_step = max(1, len(word_embed) // 50)  # Sample only ~50 embedding features
        for i, val in enumerate(word_embed[::embed_step]):
            feature_dict[f'embed_{i}'] = float(val)

        return feature_dict

    def get_word_embedding(self, word):
        """Get word embedding with fallback to zero vector"""
        if not self.word_embeddings:
            self.load_glove_embeddings()

        # If still no embeddings or word not found, use random embedding
        if not self.word_embeddings or word not in self.word_embeddings:
            return np.zeros(self.embedding_dim, dtype=np.float32)

        return self.word_embeddings[word]

    def train(self, train_images, train_captions, val_images=None, val_captions=None, patience=3):
        """Train the MEMM model with memory-efficient batching"""
        start_time = time.time()

        # Create vocabulary
        print("Building vocabulary...")
        all_words = set()
        for caps in train_captions:
            for cap in caps:
                all_words.update(cap)

        self.word2idx = {w: i for i, w in enumerate(all_words)}
        self.idx2word = {i: w for w, i in self.word2idx.items()}
        print(f"Vocabulary size: {len(self.word2idx)}")

        # Process in batches to save memory
        X_batches = []
        y_batches = []

        total_images = len(train_images)
        for batch_start in range(0, total_images, self.batch_size):
            batch_end = min(batch_start + self.batch_size, total_images)
            print(f"Processing batch {batch_start//self.batch_size + 1}/{(total_images-1)//self.batch_size + 1} "
                  f"(images {batch_start+1}-{batch_end}/{total_images})")

            X_batch = []
            y_batch = []

            # Process each image in the batch
            for img_idx in range(batch_start, batch_end):
                img_path = train_images[img_idx]
                caps = train_captions[img_idx]

                # Extract image features (cached)
                try:
                    img_feats = self.extract_image_features(img_path)
                except Exception as e:
                    print(f"Error processing image {img_path}: {e}")
                    continue

                # Process each caption
                for cap in caps:
                    for i in range(1, len(cap)):
                        prev_word = cap[i-1]
                        target_word = cap[i]

                        # Get word embedding
                        word_embed = self.get_word_embedding(prev_word)

                        # Create feature dictionary
                        feat_dict = self.create_feature_dict(prev_word, img_feats, word_embed)

                        X_batch.append(feat_dict)
                        y_batch.append(self.word2idx[target_word])

            # Vectorize this batch
            if X_batch:  # Only process if batch is not empty
                X_vec_batch = self.vec.fit_transform(X_batch) if not X_batches else self.vec.transform(X_batch)
                X_batches.append(X_vec_batch)
                y_batches.extend(y_batch)

                # Clear memory
                del X_batch, y_batch
                gc.collect()

        # Combine all batches
        print("Combining batches...")
        from scipy.sparse import vstack
        X_vec = vstack(X_batches) if all(hasattr(x, 'shape') for x in X_batches) else None
        y = np.array(y_batches)

        # Clear memory
        del X_batches, y_batches
        gc.collect()

        # Train the model
        print(f"Training model on {X_vec.shape[0]} examples with {X_vec.shape[1]} features...")
        self.model.fit(X_vec, y)

        print(f"Training completed in {(time.time() - start_time)/60:.2f} minutes")
        return self

    def generate_caption(self, image_path, beam_width=3, max_length=20):
        """Generate caption using beam search - simplified version"""
        # Extract image features
        img_feats = self.extract_image_features(image_path)

        # Initialize beam
        beam = [{'sequence': ['<start>'], 'score': 0.0}]

        for _ in range(max_length):
            if all(s['sequence'][-1] == '<end>' for s in beam):
                break

            new_beam = []

            for state in beam:
                if state['sequence'][-1] == '<end>':
                    new_beam.append(state)
                    continue

                # Get word embedding for previous word
                prev_word = state['sequence'][-1]
                word_embed = self.get_word_embedding(prev_word)

                # Create feature dictionary
                feat_dict = self.create_feature_dict(prev_word, img_feats, word_embed)

                # Predict next words
                feats = self.vec.transform([feat_dict])
                log_probs = self.model.predict_log_proba(feats)[0]

                # Get top candidates
                top_indices = np.argsort(log_probs)[-beam_width:]
                for idx in top_indices:
                    word = self.idx2word[idx]
                    new_score = state['score'] + log_probs[idx]
                    new_sequence = state['sequence'] + [word]
                    new_beam.append({
                        'sequence': new_sequence,
                        'score': new_score
                    })

            # Select top beam_width candidates
            beam = sorted(new_beam, key=lambda x: x['score'], reverse=True)[:beam_width]

        # Return best sequence
        best_sequence = max(beam, key=lambda x: x['score'])['sequence']
        return ' '.join(best_sequence[1:-1])  # Remove <start> and <end>

    def save_model(self, path):
        """Save model to disk"""
        print(f"Saving model to {path}...")
        model_data = {
            'model': self.model,
            'vec': self.vec,
            'word2idx': self.word2idx,
            'idx2word': self.idx2word,
            'scaler': self.scaler,
            'is_fitted': self.is_fitted,
            'embedding_dim': self.embedding_dim
        }
        if self.use_pca:
            model_data['pca'] = self.pca

        joblib.dump(model_data, path)
        print("Model saved successfully")

    def load_model(self, path):
        """Load model from disk"""
        print(f"Loading model from {path}...")
        model_data = joblib.load(path)
        self.model = model_data['model']
        self.vec = model_data['vec']
        self.word2idx = model_data['word2idx']
        self.idx2word = model_data['idx2word']
        self.scaler = model_data['scaler']
        self.is_fitted = model_data['is_fitted']
        self.embedding_dim = model_data.get('embedding_dim', 100)
        if 'pca' in model_data:
            self.pca = model_data['pca']
            self.use_pca = True
        print("Model loaded successfully")

In [None]:
# def load_image_captions(descriptions_path):
#     """Load {image_id.jpg: [captions]} dictionary"""
#     desc_dict = defaultdict(list)
#     with open(descriptions_path, 'r') as f:
#         for line in f:
#             img_file, caption = line.strip().split('\t', 1)
#             desc_dict[img_file].append(caption.split())
#     return desc_dict

In [15]:
tokens_dir = '/Users/ruhwang/Desktop/AI/spring2025_courses/aipi540-dl/caption_generator/data/raw/Flickr8k_text/Flickr8k.token.txt'

In [22]:
import os
from collections import defaultdict

def prepare_data(image_dir, desc_dict):
    """Return (images, captions) pairs for a directory"""
    images, captions = [], []
    for img_file in os.listdir(image_dir):
        if img_file.endswith('.jpg') and img_file in desc_dict:
            images.append(os.path.join(image_dir, img_file))
            captions.append(desc_dict[img_file])  # All reference captions
    return images, captions

def load_image_captions(tokens_file):
    """Load {image_filename: [list of tokenized captions]} from tokens file"""
    captions = defaultdict(list)
    with open(tokens_file) as f:
        for line in f:
            if line.strip():  # Skip empty lines
                parts = line.strip().split('\t')
                if len(parts) == 2:  # Ensure proper format
                    img_file = parts[0].split('#')[0]  # Remove #0, #1 etc.
                    caption = parts[1].lower().split()  # Tokenized and lowercase
                    captions[img_file].append(caption)
    return dict(captions)

def get_image_paths(image_dir, captions_dict):
    """Get aligned (image_path, captions) pairs for a directory"""
    return [
        (os.path.join(image_dir, img_file), captions_dict[img_file])
        for img_file in os.listdir(image_dir)
        if img_file.endswith('.jpg') and img_file in captions_dict
    ]

# Load all captions from token file
all_captions = load_image_captions(tokens_dir)

# Organize into splits
train_pairs = get_image_paths(train_dir, all_captions)
val_pairs = get_image_paths(val_dir, all_captions)
test_pairs = get_image_paths(test_dir, all_captions)

# Separate into images and captions
train_images, train_captions = zip(*train_pairs) if train_pairs else ([], [])
val_images, val_captions = zip(*val_pairs) if val_pairs else ([], [])
test_images, test_captions = zip(*test_pairs) if test_pairs else ([], [])

# Convert tuples to lists (for easier modification)
train_images, train_captions = list(train_images), list(train_captions)
val_images, val_captions = list(val_images), list(val_captions)
test_images, test_captions = list(test_images), list(test_captions)

In [30]:
memm = MEMMCaptionGenerator(
    glove_path="glove.6B.100d.txt",
    cache_path="./feature_cache",
    batch_size=16,  # Process fewer images at once
    use_pca=True   # Skip PCA to simplify
)

# Train
memm.train(train_images, train_captions)

# Save model
memm.save_model("/Users/ruhwang/Desktop/AI/spring2025_courses/aipi540-dl/caption_generator/models/memm_caption_model.pkl")

Building vocabulary...
Vocabulary size: 7600
Processing batch 1/354 (images 1-16/5663)
Downloading GloVe embeddings...
Downloaded to ./glove/glove.6B.zip
Extracting 100d GloVe embeddings...
Extracted to ./glove/glove.6B.100d.txt
Loading GloVe embeddings from ./glove/glove.6B.100d.txt...
Loaded 400000 word vectors with dimension 100
Loading CNN model...
Processing batch 2/354 (images 17-32/5663)
Processing batch 3/354 (images 33-48/5663)
Processing batch 4/354 (images 49-64/5663)
Processing batch 5/354 (images 65-80/5663)
Processing batch 6/354 (images 81-96/5663)
Processing batch 7/354 (images 97-112/5663)
Processing batch 8/354 (images 113-128/5663)
Processing batch 9/354 (images 129-144/5663)
Processing batch 10/354 (images 145-160/5663)
Processing batch 11/354 (images 161-176/5663)
Processing batch 12/354 (images 177-192/5663)
Processing batch 13/354 (images 193-208/5663)
Processing batch 14/354 (images 209-224/5663)
Processing batch 15/354 (images 225-240/5663)
Processing batch 16/

: 