In [1]:
import streamlit as st
from joblib import load
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from tensorflow.keras.applications import EfficientNetB0
from transformers import DistilBertTokenizer, DistilBertModel
import numpy as np
from PIL import Image
import torch
import os

# Load models for different fusion techniques
early_model = load(r'C:\Users\laksh\Downloads\mlops\early_model.joblib')
late_model_text = load(r'C:\Users\laksh\Downloads\mlops\late_model_text.joblib')
late_model_image = load(r'C:\Users\laksh\Downloads\mlops\late_model_image.joblib')
hybrid_model = load_model(r'C:\Users\laksh\Downloads\mlops\hybrid_model.h5')

# Load pretrained models for feature extraction
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
text_model = DistilBertModel.from_pretrained('distilbert-base-uncased')
image_model = EfficientNetB0(weights='imagenet', include_top=False, pooling='avg')

# Preprocess and predict functions
def get_text_features(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        return text_model(**inputs).last_hidden_state.mean(dim=1).numpy()

def get_image_features(image_path):
    image = load_img(image_path, target_size=(224, 224))
    image = img_to_array(image) / 255.0
    image = np.expand_dims(image, axis=0)
    return image_model.predict(image).flatten()

def predict_early_fusion(claim, image_path):
    text_features = get_text_features(claim)
    image_features = get_image_features(image_path)
    combined_features = np.concatenate([text_features, image_features], axis=1)
    prediction = early_model.predict(combined_features)
    return prediction

def predict_late_fusion(claim, image_path):
    text_features = get_text_features(claim)
    image_features = get_image_features(image_path)
    text_pred = late_model_text.predict(text_features)
    image_pred = late_model_image.predict(image_features)
    return int((text_pred + image_pred) / 2 > 0.5)  # Averaged soft voting

def predict_hybrid_fusion(claim, image_path):
    text_features = get_text_features(claim)
    image_features = get_image_features(image_path)
    prediction = hybrid_model.predict([text_features, image_features])
    return np.argmax(prediction)

# Streamlit UI
st.title("FinFact - Claim Verification")
st.write("This app verifies financial claims using multimodal fusion techniques.")

# Text input for the claim
claim_text = st.text_area("Enter the Claim Text")

# Image upload input for the claim image
image_file = st.file_uploader("Upload an Image", type=["jpg", "jpeg", "png"])

# Option to select fusion technique
fusion_technique = st.selectbox(
    "Choose a Fusion Technique",
    ("Early Fusion", "Late Fusion", "Hybrid Fusion")
)

# Show entered claim and image
if claim_text and image_file:
    # Create a temporary directory for images if it doesn't exist
    temp_dir = 'temp_images'
    if not os.path.exists(temp_dir):
        os.makedirs(temp_dir)

    # Save the uploaded image temporarily for prediction
    temp_image_path = os.path.join(temp_dir, 'temp_image.jpg')
    with open(temp_image_path, "wb") as f:
        f.write(image_file.getbuffer())

    # Display the claim and image
    image = Image.open(image_file)
    st.write("### Claim:")
    st.write(claim_text)
    st.write("### Image:")
    st.image(image, caption="Claim Image", use_column_width=True)

    # Predict claim's truthfulness
    if st.button("Verify Claim"):
        # Predict using the selected fusion technique
        if fusion_technique == "Early Fusion":
            prediction = predict_early_fusion(claim_text, temp_image_path)
        elif fusion_technique == "Late Fusion":
            prediction = predict_late_fusion(claim_text, temp_image_path)
        elif fusion_technique == "Hybrid Fusion":
            prediction = predict_hybrid_fusion(claim_text, temp_image_path)

        # Display result
        result = "True" if prediction == 1 else "False" if prediction == 0 else "Not enough information"
        st.write("Prediction:", result)
else:
    st.write("Please enter a claim and upload an image to verify.")


TypeError: Descriptors cannot be created directly.
If this call came from a _pb2.py file, your generated code is out of date and must be regenerated with protoc >= 3.19.0.
If you cannot immediately regenerate your protos, some other possible workarounds are:
 1. Downgrade the protobuf package to 3.20.x or lower.
 2. Set PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python (but this will use pure-Python parsing and will be much slower).

More information: https://developers.google.com/protocol-buffers/docs/news/2022-05-06#python-updates

In [None]:
!pip install streamlit

In [None]:
pip install --upgrade protobuf
