In [None]:
from flask import Flask, request, render_template_string, send_file
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import os
import io

app = Flask(__name__)

# Load your model
model_path = "D:/DATA FOR RESEARCH PROJECT/biobert_pacemaker_final-20250623T084209Z-1-001/biobert_pacemaker_final"  # path to downloaded model
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)
model.eval()

# Classification function
def classify_text(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.nn.functional.softmax(outputs.logits, dim=1)
        pred = torch.argmax(probs).item()
        conf = probs[0][pred].item()
    label = "Adverse Event" if pred == 1 else "Not Adverse"
    return label, round(conf, 3)

# Home route with upload form
@app.route("/", methods=["GET", "POST"])
def home():
    return '''
        <h2>BioBERT Classifier - Upload CSV</h2>
        <form action="/upload" method="post" enctype="multipart/form-data">
            <p>Select a CSV file (must contain 'FOI_TEXT' column):</p>
            <input type="file" name="file">
            <input type="submit" value="Upload and Classify">
        </form>
    '''

# Handle CSV upload and return predictions
@app.route("/upload", methods=["POST"])
def upload():
    if 'file' not in request.files:
        return "No file uploaded"

    file = request.files['file']
    # df = pd.read_csv(file)
    df = pd.read_csv(file, low_memory=False)

    if 'FOI_TEXT' not in df.columns:
        return "CSV must contain a column named 'FOI_TEXT'"

    # Classify each row
    predictions = []
    confidences = []

    for text in df['FOI_TEXT'].astype(str).tolist():
        label, conf = classify_text(text)
        predictions.append(label)
        confidences.append(conf)

    df['Prediction'] = predictions
    df['Confidence'] = confidences

    # Save result to in-memory file
    output = io.StringIO()
    df.to_csv(output, index=False)
    output.seek(0)

    return send_file(
        io.BytesIO(output.getvalue().encode()),
        mimetype='text/csv',
        as_attachment=True,
        download_name='classified_results.csv'
    )

if __name__ == "__main__":
    app.run()


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
127.0.0.1 - - [09/Jul/2025 13:19:17] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [09/Jul/2025 13:20:34] "POST /upload HTTP/1.1" 200 -


In [5]:
import pandas as pd

# Load your full dataset
df = pd.read_csv("D:/DATA FOR RESEARCH PROJECT/Clean data/pacemakerClean2.csv", low_memory=False)

# Extract first 100 rows of only the FOI_TEXT column
df_subset = df[['FOI_TEXT']].head(100)

# Save the subset to a new CSV file
df_subset.to_csv("D:/DATA FOR RESEARCH PROJECT/Clean data/foi_text_first100.csv", index=False)

print("First 100 foi_text columns extracted")


First 100 foi_text columns extracted


In [9]:
import pandas as pd

# Load your full dataset
df = pd.read_csv("D:/DATA FOR RESEARCH PROJECT/Clean data/pacemakerClean2.csv", low_memory=False)

# Extract first 100 rows of only the FOI_TEXT column
df_subset = df[['FOI_TEXT']].head(500)

# Save the subset to a new CSV file
df_subset.to_csv("D:/DATA FOR RESEARCH PROJECT/Clean data/foi_text_first500.csv", index=False)

print("First 500 foi_text columns extracted")

First 500 foi_text columns extracted
