In [1]:
import json
from typing import Dict
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from transformers import pipeline
from pymongo import MongoClient
from bson import ObjectId
# import transformers
# import tensorflow
# import torch

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
app = FastAPI()

# Initialize MongoDB client
client = MongoClient("mongodb://localhost:27017/")
db = client.myDatabase
articles_collection = db.articles

In [3]:
summarizer = pipeline("summarization", model="facebook/bart-large-cnn", framework="tf")
class ArticleSourceLocation(BaseModel):
    country: str
    state: str
    city: str
    coordinates: Dict[str, float]

class ArticleSource(BaseModel):
    domain: str
    location: ArticleSourceLocation

class Article(BaseModel):
    uri: str
    title: str
    body: str
    publication_datetime: str
    lang: str
    url: str
    source: ArticleSource





To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
All PyTorch model weights were used when initializing TFBartForConditionalGeneration.

All the weights of TFBartForConditionalGeneration were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBartForConditionalGeneration for predictions without further training.


In [4]:
@app.post("/summarize_article")
async def summarize_article(article: Article):
    # Summarize the article
    summary = summarizer(article.body, max_length=130, min_length=30, do_sample=False)[0]['summary_text']
    
    # Prepare the article document
    article_doc = {
        "uri": article.uri,
        "title": article.title,
        "body": article.body,
        "publication_datetime": article.publication_datetime,
        "lang": article.lang,
        "url": article.url,
        "source": {
            "domain": article.source.domain,
            "location": {
                "country": article.source.location.country,
                "state": article.source.location.state,
                "city": article.source.location.city,
                "coordinates": {
                    "lat": article.source.location.coordinates['lat'],
                    "lon": article.source.location.coordinates['lon']
                }
            }
        },
        "summary": summary
    }
    
    # Store the document in MongoDB
    result = articles_collection.update_one(
        {"uri": article.uri},
        {"$set": article_doc},
        upsert=True
    )
    
    if result.upserted_id or result.modified_count:
        return {"uri": article.uri, "summary": summary}
    else:
        raise HTTPException(status_code=500, detail="Failed to save the article")


In [5]:
@app.get("/result/{uri}")
async def get_result(uri: str):
    # Retrieve the summarized article from MongoDB
    article = articles_collection.find_one({"uri": uri})
    
    if article:
        return {
            "uri": article["uri"],
            "title": article["title"],
            "summary": article["summary"]
        }
    else:
        raise HTTPException(status_code=404, detail="Article not found")