In [1]:
pip install easyocr keras tensorflow scikit-learn numpy


Note: you may need to restart the kernel to use updated packages.


In [5]:
import easyocr
import re
import numpy as np
import pickle
from keras.models import load_model
from keras.utils import pad_sequences

# === Set your file paths ===
model_path = r"C:\Users\sagni\Downloads\Cancer Detection\cancer_model.h5"
gene_encoder_path = r"C:\Users\sagni\Downloads\Cancer Detection\gene_encoder.pickle"
tokenizer_path = r"C:\Users\sagni\Downloads\Cancer Detection\tokenizer.pickle"
variation_encoder_path = r"C:\Users\sagni\Downloads\Cancer Detection\variation_encoder.pickle"
image_path = r"C:\Users\sagni\Downloads\Cancer Detection\images.jpg"

# === Load all components ===
model = load_model(model_path)

with open(tokenizer_path, 'rb') as handle:
    tokenizer = pickle.load(handle)

with open(gene_encoder_path, 'rb') as f:
    gene_encoder = pickle.load(f)

with open(variation_encoder_path, 'rb') as f:
    variation_encoder = pickle.load(f)

# === Load and process the image ===
reader = easyocr.Reader(['en'])  # Use CPU (GPU not available)
results = reader.readtext(image_path, detail=0)
text = " ".join(results)
print("📝 Extracted Text: ", text)

# === Extract gene and variation using regex ===
gene_match = re.search(r'Gene\s*:\s*(\w+)', text, re.IGNORECASE)
variation_match = re.search(r'Variation\s*:\s*([\w\d\.\+\-]+)', text, re.IGNORECASE)

if gene_match and variation_match:
    gene = gene_match.group(1)
    variation = variation_match.group(1)
    print(f"🧬 Gene: {gene} | 🔬 Variation: {variation}")

    # Encode gene and variation
    gene_encoded = gene_encoder.transform([gene])[0]
    variation_encoded = variation_encoder.transform([variation])[0]

    # Text preprocessing
    sequence = tokenizer.texts_to_sequences([text])
    padded_seq = pad_sequences(sequence, maxlen=100)

    # Make prediction
    prediction = model.predict([padded_seq, np.array([gene_encoded]), np.array([variation_encoded])])
    predicted_class = np.argmax(prediction)

    print(f"✅ Predicted Cancer Class: {predicted_class}")
else:
    print("❌ Could not extract gene/variation from image text.")




📝 Extracted Text:  
❌ Could not extract gene/variation from image text.
