In [None]:
pip install tensorflow numpy matplotlib



In [None]:
import numpy as np
from PIL import Image
import os
from tensorflow.keras.preprocessing.text import Tokenizer
import tensorflow as tf

def load_images_and_labels(main_folder):
    images = []
    labels = []
    # Debug paths
    yes_folder = os.path.join(main_folder, 'yes')  # Lowercase 'yes'
    no_folder = os.path.join(main_folder, 'no')    # Lowercase 'no'
    print("Yes folder:", yes_folder, "Exists?", os.path.isdir(yes_folder))
    print("No folder:", no_folder, "Exists?", os.path.isdir(no_folder))

    # Load Yes (tumor) images
    if os.path.isdir(yes_folder):
        yes_files = os.listdir(yes_folder)
        print("Files in yes_folder:", yes_files)
        for filename in yes_files:
            if filename.endswith('.jpg'):
                img = Image.open(os.path.join(yes_folder, filename)).resize((128, 128)).convert('L')
                img = np.array(img) / 255.0
                images.append(img)
                labels.append('Yes')
    else:
        print("Yes folder not found!")

    # Load No (normal) images
    if os.path.isdir(no_folder):
        no_files = os.listdir(no_folder)
        print("Files in no_folder:", no_files)
        for filename in no_files:
            if filename.endswith('.jpg'):
                img = Image.open(os.path.join(no_folder, filename)).resize((128, 128)).convert('L')
                img = np.array(img) / 255.0
                images.append(img)
                labels.append('No')
    else:
        print("No folder not found!")

    return np.array(images), labels

def generate_reports(labels):
    reports = []
    for label in labels:
        if label == "Yes":
            reports.append("Findings: Tumor detected in brain tissue")
        elif label == "No":
            reports.append("Findings: No abnormalities observed in brain tissue")
    return reports

# Mount Drive (force remount if needed)
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# Check contents
main_folder = '/content/drive/MyDrive/BrainMRIzip/BrainMRI'
print("Main folder contents:", os.listdir(main_folder))

# Load data
images, labels = load_images_and_labels(main_folder)
reports = generate_reports(labels)

# Tokenize reports
tokenizer = Tokenizer()
tokenizer.fit_on_texts(reports)
report_sequences = tokenizer.texts_to_sequences(reports)
max_length = max(len(seq) for seq in report_sequences) if report_sequences else 0  # Avoid empty sequence error
report_padded = tf.keras.preprocessing.sequence.pad_sequences(report_sequences, maxlen=max_length)
vocab_size = len(tokenizer.word_index) + 1 if tokenizer.word_index else 0

print(f"Loaded {len(images)} images and {len(reports)} reports.")

Mounted at /content/drive
Main folder contents: ['no', 'yes']
Yes folder: /content/drive/MyDrive/BrainMRIzip/BrainMRI/yes Exists? True
No folder: /content/drive/MyDrive/BrainMRIzip/BrainMRI/no Exists? True
Files in yes_folder: ['Y117.JPG', 'Y100.JPG', 'Y13.jpg', 'Y14.jpg', 'Y112.JPG', 'Y114.JPG', 'Y116.JPG', 'Y11.jpg', 'Y105.jpg', 'Y111.JPG', 'Y147.JPG', 'Y104.jpg', 'Y113.JPG', 'Y12.jpg', 'Y1.jpg', 'Y101.jpg', 'Y107.jpg', 'Y102.jpg', 'Y115.JPG', 'Y10.jpg', 'Y106.jpg', 'Y120.JPG', 'Y109.JPG', 'Y108.jpg', 'Y146.JPG', 'Y103.jpg', 'Y186.jpg', 'Y170.JPG', 'Y185.jpg', 'Y183.jpg', 'Y159.JPG', 'Y157.JPG', 'Y166.JPG', 'Y18.JPG', 'Y187.jpg', 'Y188.jpg', 'Y182.JPG', 'Y181.jpg', 'Y20.jpg', 'Y154.jpg', 'Y15.jpg', 'Y148.JPG', 'Y22.jpg', 'Y156.JPG', 'Y169.jpg', 'Y153.jpg', 'Y163.JPG', 'Y167.JPG', 'Y193.JPG', 'Y161.JPG', 'Y184.JPG', 'Y21.jpg', 'Y168.jpg', 'Y164.JPG', 'Y155.JPG', 'Y16.JPG', 'Y195.JPG', 'Y19.JPG', 'Y17.jpg', 'Y2.jpg', 'Y162.jpg', 'Y194.jpg', 'Y158.JPG', 'Y165.JPG', 'Y180.jpg', 'Y160.JPG

In [None]:
import numpy as np
from PIL import Image
import os
from tensorflow.keras.preprocessing.text import Tokenizer
import tensorflow as tf
from tensorflow.keras import layers
import pickle

# Load images and labels
def load_images_and_labels(main_folder):
    images = []
    labels = []
    yes_folder = os.path.join(main_folder, 'yes')
    no_folder = os.path.join(main_folder, 'no')
    print("Yes folder:", yes_folder, "Exists?", os.path.isdir(yes_folder))
    print("No folder:", no_folder, "Exists?", os.path.isdir(no_folder))

    if os.path.isdir(yes_folder):
        yes_files = os.listdir(yes_folder)
        print("Files in yes_folder:", yes_files)
        for filename in yes_files:
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                img = Image.open(os.path.join(yes_folder, filename)).resize((128, 128)).convert('L')
                img = np.array(img) / 255.0
                images.append(img)
                labels.append('Yes')
    if os.path.isdir(no_folder):
        no_files = os.listdir(no_folder)
        print("Files in no_folder:", no_files)
        for filename in no_files:
            if filename.lower().endswith(('.jpg', '.jpeg', '.png')):
                img = Image.open(os.path.join(no_folder, filename)).resize((128, 128)).convert('L')
                img = np.array(img) / 255.0
                images.append(img)
                labels.append('No')
    return np.array(images), labels

def generate_reports(labels):
    reports = []
    for label in labels:
        if label == "Yes":
            reports.append("Findings: Tumor detected in brain tissue")
        elif label == "No":
            reports.append("Findings: No abnormalities observed in brain tissue")
    return reports

# Mount Drive
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# Check contents
main_folder = '/content/drive/MyDrive/BrainMRIzip/BrainMRI'
print("Main folder contents:", os.listdir(main_folder))

# Load data
images, labels = load_images_and_labels(main_folder)
reports = generate_reports(labels)

# Tokenize reports
tokenizer = Tokenizer()
tokenizer.fit_on_texts(reports)
report_sequences = tokenizer.texts_to_sequences(reports)
max_length = max(len(seq) for seq in report_sequences) if report_sequences else 0
report_padded = tf.keras.preprocessing.sequence.pad_sequences(report_sequences, maxlen=max_length)
vocab_size = len(tokenizer.word_index) + 1 if tokenizer.word_index else 0

print(f"Loaded {len(images)} images and {len(reports)} reports.")

# Build CNN
def build_cnn():
    model = tf.keras.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 1)),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.Flatten(),
        layers.Dense(256, activation='relu')
    ])
    return model

# Build LSTM (fixed to output sequences)
def build_lstm(vocab_size, max_length):
    model = tf.keras.Sequential([
        layers.LSTM(128, return_sequences=True, input_shape=(max_length, 256)),
        layers.LSTM(128, return_sequences=True),  # Keep sequence output
        layers.TimeDistributed(layers.Dense(vocab_size, activation='softmax'))  # Predict word at each timestep
    ])
    return model

# Combine
cnn = build_cnn()
lstm = build_lstm(vocab_size, max_length)
image_input = layers.Input(shape=(128, 128, 1))
cnn_output = cnn(image_input)
cnn_output = layers.RepeatVector(max_length)(cnn_output)  # Shape: (batch, max_length, 256)
lstm_output = lstm(cnn_output)  # Shape: (batch, max_length, vocab_size)
model = tf.keras.Model(inputs=image_input, outputs=lstm_output)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
model.summary()



Mounted at /content/drive
Main folder contents: ['no', 'yes']
Yes folder: /content/drive/MyDrive/BrainMRIzip/BrainMRI/yes Exists? True
No folder: /content/drive/MyDrive/BrainMRIzip/BrainMRI/no Exists? True
Files in yes_folder: ['Y117.JPG', 'Y100.JPG', 'Y13.jpg', 'Y14.jpg', 'Y112.JPG', 'Y114.JPG', 'Y116.JPG', 'Y11.jpg', 'Y105.jpg', 'Y111.JPG', 'Y147.JPG', 'Y104.jpg', 'Y113.JPG', 'Y12.jpg', 'Y1.jpg', 'Y101.jpg', 'Y107.jpg', 'Y102.jpg', 'Y115.JPG', 'Y10.jpg', 'Y106.jpg', 'Y120.JPG', 'Y109.JPG', 'Y108.jpg', 'Y146.JPG', 'Y103.jpg', 'Y186.jpg', 'Y170.JPG', 'Y185.jpg', 'Y183.jpg', 'Y159.JPG', 'Y157.JPG', 'Y166.JPG', 'Y18.JPG', 'Y187.jpg', 'Y188.jpg', 'Y182.JPG', 'Y181.jpg', 'Y20.jpg', 'Y154.jpg', 'Y15.jpg', 'Y148.JPG', 'Y22.jpg', 'Y156.JPG', 'Y169.jpg', 'Y153.jpg', 'Y163.JPG', 'Y167.JPG', 'Y193.JPG', 'Y161.JPG', 'Y184.JPG', 'Y21.jpg', 'Y168.jpg', 'Y164.JPG', 'Y155.JPG', 'Y16.JPG', 'Y195.JPG', 'Y19.JPG', 'Y17.jpg', 'Y2.jpg', 'Y162.jpg', 'Y194.jpg', 'Y158.JPG', 'Y165.JPG', 'Y180.jpg', 'Y160.JPG

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(**kwargs)


In [None]:
# Train
images = images.reshape(-1, 128, 128, 1)
report_labels = report_padded  # Shape: (batch, max_length) - no extra dim needed
model.fit(images, report_labels, epochs=20, batch_size=16, validation_split=0.2)



Epoch 1/20
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 93ms/step - loss: 2.0936 - val_loss: 2.0254
Epoch 2/20
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - loss: 1.5841 - val_loss: 1.7834
Epoch 3/20
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - loss: 1.1037 - val_loss: 1.4875
Epoch 4/20
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - loss: 0.8183 - val_loss: 1.0903
Epoch 5/20
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step - loss: 0.6192 - val_loss: 1.0984
Epoch 6/20
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - loss: 0.4685 - val_loss: 0.8027
Epoch 7/20
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step - loss: 0.3042 - val_loss: 0.8218
Epoch 8/20
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - loss: 0.2374 - val_loss: 0.6763
Epoch 9/20
[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7f58f1dd33d0>

In [1]:
# Save model and tokenizer
model.save('/content/brain_tumor_report_generator.h5')
with open('/content/tokenizer.pkl', 'wb') as f:
    pickle.dump(tokenizer, f)

from google.colab import files
files.download('/content/brain_tumor_report_generator.h5')
files.download('/content/tokenizer.pkl')

NameError: name 'model' is not defined

In [None]:
# Test in Colab (corrected)
test_img = Image.open('/content/drive/MyDrive/BrainMRIzip/BrainMRI/yes/Y1.jpg').resize((128, 128)).convert('L')
test_img = np.array(test_img) / 255.0
test_img = test_img.reshape(1, 128, 128, 1)
report_seq = model.predict(test_img)  # Shape: (1, max_length, vocab_size)
report_words = []
for timestep in report_seq[0]:
    word_idx = np.argmax(timestep)
    if word_idx > 0:  # Skip padding (0)
        try:
            word = list(tokenizer.word_index.keys())[list(tokenizer.word_index.values()).index(word_idx)]
            report_words.append(word)
        except ValueError:
            continue  # Skip if word_idx isn’t in word_index (shouldn’t happen but adds robustness)
report = ' '.join(report_words)
print("Test Report:", report)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
Test Report: findings tumor detected in brain tissue
