In [1]:
import pandas as pd      
import os
import time
from pymongo import MongoClient
from dotenv import load_dotenv
from datetime import datetime,timezone
from pytrends.request import TrendReq
pytrend = TrendReq()

In [2]:
#pytrend.build_payload(kw_list=['Taylor Swift'])
# Interest by Region
#df = pytrend.interest_by_region()
#df.head(10)

In [3]:
kw_list=['Cardano','ADA']
#pytrend.build_payload(kw_list,timeframe='today 1-m')
#df = pytrend.interest_over_time()
#df

In [4]:
#df = pytrend.get_historical_interest(kw_list, year_start=2021, month_start=3, day_start=7, hour_start=0, year_end=2022, month_end=4, day_end=7, hour_end=22, cat=0, geo='', gprop='', sleep=10)

In [5]:
#df

In [6]:
df = pytrend.realtime_trending_searches(pn='US')
df.head()

Unnamed: 0,title,entityNames
0,"Newcastle United F.C., Wolverhampton Wanderers...","[Newcastle United F.C., Wolverhampton Wanderer..."
1,"Borussia Dortmund, VfB Stuttgart, Bundesliga","[Borussia Dortmund, VfB Stuttgart, Bundesliga]"
2,"Éric Zemmour, France, History of the Jews in F...","[Éric Zemmour, France, History of the Jews in ..."
3,"International Space Station, SpaceX, NASA","[International Space Station, SpaceX, NASA]"
4,"Kyle Schwarber, Philadelphia Phillies","[Kyle Schwarber, Philadelphia Phillies]"


In [7]:
# Load Environment Variables
load_dotenv()

def connect_to_db():
    # Gets MongoDB Connection String
    MDB_CONNECTION_STRING = os.getenv('MDB_CONNECTION_STRING')

    # Function to connect to the Mongo DB
    def get_database():
        try:
            client = MongoClient(MDB_CONNECTION_STRING)
            db = client["project-02"]
            return db
        except Exception as e:
            print(e)

    # Connect to the db
    db = get_database()

    # Test Connection
    serverStatusResult=db.command("serverStatus")
    print(serverStatusResult["version"])
    return db
db = connect_to_db()

5.0.6


In [8]:
# Gets the coinpairs to process from the database
def get_keywords(coinpair):
    # Get the coinpairs from the Database
    db_coinpairs = db["coinpairs"].find({"pair": coinpair, "exchange" : "binance"})

    # convert the dictionary objects to dataframe
    binance_coinpairs_df = pd.DataFrame(db_coinpairs)

    # see the magic
    coinpair_list = list(binance_coinpairs_df['keywords'][0])
    
    return coinpair_list

# Gets the main coin name to process from the database
def get_main_coin(coinpair):
    # Get the coin name from the Database
    db_coinpairs = db["coinpairs"].find({"pair": coinpair, "exchange" : "binance"})

    # convert the dictionary objects to dataframe
    binance_coinpairs_df = pd.DataFrame(db_coinpairs)

    # see the magic
    coinpair_list = binance_coinpairs_df['main'][0]
    
    return coinpair_list

# Gets the coinpairs to process from the database
def get_coinpairs():
    # Get the coinpairs from the Database
    db_coinpairs = db["coinpairs"].find({"exchange" : "binance"})

    # convert the dictionary objects to dataframe
    binance_coinpairs_df = pd.DataFrame(db_coinpairs)

    # see the magic
    coinpair_list = list(binance_coinpairs_df['pair'])
    return coinpair_list

# Save trends to Database
def update_trends_db(coin_name, trends_df):
    # Resets the index
    trends_df = trends_df.reset_index()
    
    # Update the coinpairs in the Database
    db["trends_"+coin_name].insert_many(trends_df.to_dict("records"))
    db["trends_"+coin_name].create_index([ ("date", -1) ])
    
    # Returns the coin pair
    return coinpair

In [9]:
# Processes the Google Trends and returns the dataframe with the most relevant column
def process_trends(keywords, df, coinpair):
    totals = []
    # Sum the total values of the keys, given the last 30 days to choose the highest keyword to analyse
    for key in keywords:
        totals.append({"key": key, "sum": df[key].iloc[-720:].sum()})
    totals_df = pd.DataFrame(totals)
    sum_df = totals_df.sort_values(by=['sum'], ascending=False)
    highest_keyword = sum_df.iloc[0]["key"]
    new_df = df.copy()
    new_df = new_df[new_df['isPartial'] != True]
    
    # Drop Columns that are not needed
    new_df = new_df.drop(columns=['isPartial'])
    return new_df

In [10]:
def populate_google_trends(coinpair, keywords, coin_name, months_back=24):
    i = months_back
    now_utc = datetime.now(timezone.utc)
    while i > 0:
        date_start = now_utc - pd.offsets.DateOffset(months=i)
        date_end = now_utc - pd.offsets.DateOffset(months=i-1)
        print(f"{date_start} - {date_end}")
        try:
            df = pytrend.get_historical_interest(keywords, year_start=date_start.year, month_start=date_start.month, day_start=date_start.day, hour_start=date_start.hour, year_end=date_end.year, month_end=date_end.month, day_end=date_end.month, hour_end=date_end.hour, cat=0, geo='', gprop='', sleep=0)
            trends_df = process_trends(keywords, df, coinpair)
            update_trends_db(coin_name, trends_df)
            i -=1
        except:
            print(f"Error with Google Trends, retrying in 3 seconds..")
            time.sleep(3)

In [11]:
# Function to initialize the databse with the coinpairs registered
def init_google_trends(db, coinpairs):
    for coinpair in coinpairs:
        coin_name = get_main_coin(coinpair)
        collection_name="trends_"+coin_name
        if collection_name not in db.list_collection_names():  
            print(f"Pulling trends for {coin_name}")
            keywords = get_keywords(coinpair)
            populate_google_trends(coinpair, keywords, coin_name)
        else:
            print(f"Collection {collection_name} found, skipping..")
            
    print(f"Done pulling trends.")

In [12]:
coinpairs = get_coinpairs()
init_google_trends(db, coinpairs)

Collection trends_ADA found, skipping..
Collection trends_SOL found, skipping..
Collection trends_BTC found, skipping..
Collection trends_ETH found, skipping..
Collection trends_LUNA found, skipping..
Collection trends_DOGE found, skipping..
Collection trends_MATIC found, skipping..
Collection trends_MANA found, skipping..
Done pulling trends.
