In [None]:
import pymongo

import pandas as pd
from datetime import datetime
import plotly.express as px

In [None]:
conn_str = "mongodb://localhost:27017"
client = pymongo.MongoClient(conn_str, serverSelectionTimeoutMS=10_000)

In [None]:
db = client.bigdata

In [None]:
def get_trend(text, from_dt, to_dt):
    query = db.news.aggregate([

        {"$match": {"$text": {
            "$search": text,
            "$caseSensitive": False
        }}},

        {"$group": {
            "_id": "$pub_date_day",
            "count": {"$sum": 1}
        }},

        {"$densify": {
            "field": "_id",
            "range": {
                "bounds": [from_dt, to_dt],
                "step": 1,
                "unit": "day"
            }
        }},

        {"$set": {
            "count": { "$cond":  ["$count", "$count", 0]},
        }},

        {"$sort": {"_id": 1}}

    ])
    
    df = pd.json_normalize(query)
    df.columns = ["date", text]
    
    return df

In [None]:
topics = ['coronavirus', 'trump', 'china']
from_dt = datetime(2020, 1, 1)
to_dt = datetime(2020, 7, 1)

df = pd.DataFrame({'date': []})
for topic in topics:
    new = get_trend(topic, from_dt=from_dt, to_dt=to_dt)
    df = pd.merge(df, new, on='date', how='outer')
    
for column in df.columns:
    if column == 'date':
        continue
    df[column] = df[column].rolling(7, min_periods=1).mean()

In [None]:
df.tail()

In [None]:
fig = px.line(
    df,
    x='date',
    y=topics,
    template='plotly_white'
)

fig.update_layout(legend=dict(
    yanchor='top', y=1,
    xanchor='left', x=0
))

fig.show()