In [None]:
!pip install fastapi uvicorn nest-asyncio pyngrok transformers pdfminer.six PyPDF2


In [3]:
!pip install safetensors




In [None]:
!pip install python-multipart


In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [23]:
from fastapi import FastAPI, UploadFile, File, HTTPException
from PyPDF2 import PdfReader
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from transformers import (
    pipeline,
    AutoTokenizer,
    TFAutoModelForSeq2SeqLM,
    AutoModelForTokenClassification,
)
import uvicorn
import nest_asyncio
from pdfminer.high_level import extract_text
import csv
from pyngrok import ngrok



In [31]:
!ngrok authtoken ***********
ngrok_tunnel = ngrok.connect(8000)
print(f"Public URL: {ngrok_tunnel.public_url}")


Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml
Public URL: https://6e7b-35-233-177-138.ngrok-free.app


In [None]:

# Initialize FastAPI
app = FastAPI()

# Allow CORS for frontend communication
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Paths to saved models and tokenizer directories
summarization_model_path = "/content/drive/MyDrive/text_summarization"
summarization_tokenizer_path = "/content/drive/MyDrive/text_summariztion_tf"
ner_model_path = "/content/drive/MyDrive/NER/Model/NER"
ner_tokenizer_path = "/content/drive/MyDrive/NER/Model/NER_tf"

# Load models and tokenizers
# Text Summarization (TensorFlow)
summarization_tokenizer = AutoTokenizer.from_pretrained(summarization_tokenizer_path)
summarization_model = TFAutoModelForSeq2SeqLM.from_pretrained(summarization_model_path)
summarization_pipeline = pipeline(
    "summarization",
    model=summarization_model,
    tokenizer=summarization_tokenizer,
    framework="tf",
)

# Named Entity Recognition (PyTorch)
ner_tokenizer = AutoTokenizer.from_pretrained(ner_tokenizer_path)
ner_model = AutoModelForTokenClassification.from_pretrained(ner_model_path)
ner_pipeline = pipeline(
    "ner",
    model=ner_model,
    tokenizer=ner_tokenizer,
    aggregation_strategy="simple"
)

# API Models
class TextRequest(BaseModel):
    text: str


# Root endpoint
@app.get("/")
def root():
    return {"message": "Backend is running!"}

# Summarization endpoint
@app.post("/summarization")
def summarize_text(request: TextRequest):
    try:
        summary = summarization_pipeline(request.text, max_length=130, min_length=70, do_sample=False)
        return {"summary": summary[0]["summary_text"]}
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error during summarization: {str(e)}")

# NER endpoint
@app.post("/ner")
def ner_text(request: TextRequest):
    try:
        # Tokenize input
        inputs = ner_tokenizer(
            request.text,
            return_tensors="pt",
            truncation=True,
            padding=True,
            max_length=128
        )

        # Perform inference
        outputs = ner_model(**inputs)
        predictions = outputs.logits.argmax(-1)

        # Convert predictions to labels
        tokens = ner_tokenizer.convert_ids_to_tokens(inputs["input_ids"].squeeze())
        label_map = {
                    0: "O",         # Outside of an entity
                    1: "B-PER",     # Beginning of a person's name
                    2: "I-PER",     # Inside a person's name
                    3: "B-LOC",     # Beginning of a location
                    4: "I-LOC",     # Inside a location
                    5: "B-ORG",     # Beginning of an organization
                    6: "I-ORG",     # Inside an organization
                    7: "B-MISC",    # Beginning of a miscellaneous entity
                    8: "I-MISC",    # Inside a miscellaneous entity
                }
        predicted_labels = [label_map[label] for label in predictions.squeeze().tolist()]
        tokens = [token.replace("▁", "") for token in tokens]

        # Filter out "O" labels
        filtered_results = [
            (token, label)
            for token, label in zip(tokens, predicted_labels)
            if label != "O"
        ]
        for token, label in filtered_results:
          print(f"{token}: {label}")
        return {"entities": filtered_results}
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error during NER: {str(e)}")


# File upload endpoint (supports .txt and .pdf)
@app.post("/upload")
async def upload_file(file: UploadFile = File(...)):
    try:
        if file.filename.endswith(".txt"):
            content = await file.read()
            text = content.decode("utf-8")
        elif file.filename.endswith(".pdf"):
            pdf_reader = PdfReader(file.file)
            text = ""
            for page in pdf_reader.pages:
                text += page.extract_text()
        else:
            raise HTTPException(status_code=400, detail="Unsupported file format")
        return {"text": text}
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error processing file: {str(e)}")

# Health check endpoint for testing the backend
@app.get("/health")
def health_check():
    return {"status": "Healthy!"}

nest_asyncio.apply()
uvicorn.run(app, host="0.0.0.0", port=8000)