In [None]:
# Forecast future values
future_days = 10
future_predictions = []

# Use the last available sequence
current_input = X_test[-1]

for _ in range(future_days):
    next_prediction = model.predict(current_input.reshape(1, SEQ_LENGTH, 1))[0]
    future_predictions.append(next_prediction)
    current_input = np.append(current_input[1:], next_prediction).reshape(SEQ_LENGTH, 1)

# Inverse transform the predictions
future_predictions = scaler.inverse_transform(future_predictions)

# Display predictions
print(f"Next {future_days} days predicted prices:")
print(future_predictions)

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, LSTM, Embedding, Input, Dropout
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import matplotlib.pyplot as plt
import os

In [None]:
# Download and extract the Flickr8k dataset (images + captions)
!wget https://github.com/jbrownlee/Datasets/releases/download/Flickr8k/Flickr8k_Dataset.zip
!wget https://github.com/jbrownlee/Datasets/releases/download/Flickr8k/Flickr8k_text.zip
!unzip Flickr8k_Dataset.zip
!unzip Flickr8k_text.zip

# Define paths
image_folder = 'Flickr8k_Dataset/Flicker8k_Dataset'
caption_file = 'Flickr8k_text/Flickr8k.token.txt'

# Load and display the caption file
captions = open(caption_file, 'r').read()
print("Sample Captions:\n", captions.split("\n")[:5])

In [None]:
import re

# Parse captions and map image names to their captions
def load_captions(caption_file):
    captions_dict = {}
    for line in captions.split("\n"):
        if len(line) < 2:
            continue
        image_id, caption = line.split("\t")
        image_id = image_id.split("#")[0]
        caption = re.sub(r"[^a-zA-Z0-9 ]", "", caption.lower())
        if image_id not in captions_dict:
            captions_dict[image_id] = []
        captions_dict[image_id].append("startseq " + caption + " endseq")
    return captions_dict

captions_dict = load_captions(caption_file)
print("Sample Processed Captions:", captions_dict[list(captions_dict.keys())[0]])

In [None]:
# Load the VGG16 model for image feature extraction
base_model = VGG16(weights='imagenet')
cnn_model = Model(inputs=base_model.input, outputs=base_model.layers[-2].output)

def extract_features(image_path):
    image = load_img(image_path, target_size=(224, 224))
    image = img_to_array(image)
    image = np.expand_dims(image, axis=0)
    image = tf.keras.applications.vgg16.preprocess_input(image)
    features = cnn_model.predict(image)
    return features.flatten()

# Extract features for one sample image
image_path = os.path.join(image_folder, list(captions_dict.keys())[0])
features = extract_features(image_path)
print("Extracted Features Shape:", features.shape)

In [None]:
# Tokenize captions
all_captions = [caption for captions_list in captions_dict.values() for caption in captions_list]
tokenizer = Tokenizer()
tokenizer.fit_on_texts(all_captions)
vocab_size = len(tokenizer.word_index) + 1

print("Vocabulary Size:", vocab_size)

# Prepare sequences
def create_sequences(tokenizer, captions, max_length):
    X_text, y_text = [], []
    for caption in captions:
        seq = tokenizer.texts_to_sequences([caption])[0]
        for i in range(1, len(seq)):
            X_text.append(seq[:i])
            y_text.append(seq[i])
    X_text = pad_sequences(X_text, maxlen=max_length, padding='post')
    y_text = np.array(y_text)
    return X_text, y_text

max_length = max(len(caption.split()) for caption in all_captions)
X_text, y_text = create_sequences(tokenizer, all_captions, max_length)
print("Text Sequences Shape:", X_text.shape, y_text.shape)

In [None]:
# Define the Image Captioning model
embedding_dim = 256

# Image feature input
image_input = Input(shape=(4096,))
image_features = Dense(256, activation='relu')(image_input)

# Caption input
text_input = Input(shape=(max_length,))
text_features = Embedding(vocab_size, embedding_dim, mask_zero=True)(text_input)
text_features = LSTM(256)(text_features)

# Combine both inputs
decoder = tf.keras.layers.add([image_features, text_features])
output = Dense(vocab_size, activation='softmax')(decoder)

# Compile the model
model = Model(inputs=[image_input, text_input], outputs=output)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
model.summary()


In [None]:
# Placeholder for training (requires feature extraction for all images)
# image_features_dict = {image_id: extract_features(image_folder + '/' + image_id) for image_id in captions_dict.keys()}

# Uncomment this code when features are ready
# X_image = np.array([image_features_dict[image_id] for image_id in captions_dict.keys()])
# model.fit([X_image, X_text], y_text, epochs=20, batch_size=64)

In [None]:
# Generate caption for an image
def generate_caption(model, tokenizer, image_features, max_length):
    caption = 'startseq'
    for _ in range(max_length):
        seq = tokenizer.texts_to_sequences([caption])[0]
        seq = pad_sequences([seq], maxlen=max_length, padding='post')
        y_pred = np.argmax(model.predict([image_features, seq]), axis=-1)
        word = tokenizer.index_word.get(y_pred[0], '')
        if word == 'endseq':
            break
        caption += ' ' + word
    return caption.replace('startseq', '').replace('endseq', '').strip()

# Test the model on a new image
# print("Generated Caption:", generate_caption(model, tokenizer, features, max_length))
