In [None]:
import pandas as pd
import shutil
import os
import requests
from datetime import datetime
from dotenv import load_dotenv
from pathlib import Path
from zipfile import ZipFile
from os.path import exists

In [None]:
# Import our utils libraries
from utils.mongo import (
    connect_to_db,
    csv_to_mongo,
    update_fng,
    update_coinpairs,
    update_timeframes,
    get_keywords,
    get_main_coin,
    get_coinpairs
)
from utils.fear_and_greed import (
    get_fng
)
from utils.binance import (
    get_coinpair_kline
)

from utils.url_exists import (
    url_exists
)
from utils.google_trends import (
    process_trends,
    populate_google_trends
)

In [None]:
# Load Environment Variables
load_dotenv()

# Connect to the DB
db = connect_to_db(os.getenv('MDB_CONNECTION_STRING'))

5.0.6


In [None]:
# Function to download the Zip files locally, unzip them then merge the csv files to upload them to the database. Deletes the folders after
def process_binance_files(db, timeframe, coinpair, binance_timeframes_list):
    max_years = 5
    current_year = datetime.now().year
    last_month = datetime.now().month - 1
    year = current_year
    months = 12
    month = last_month
    os.chdir("..")
    while (month <= months) and (year >= (current_year-max_years)):
        str_month = str(month)
        if len(str_month)==1:
            str_month = "0" + str(month)
        else:
            str_month = str(month)
        directory = "./"+coinpair+"_"+timeframe
        filename = coinpair+'-'+timeframe+'-'+str(year)+'-'+str_month+'.zip'
        csv_filename = coinpair+'-'+timeframe+'-'+str(year)+'-'+str_month+'.csv'
        # Check if file exists if it does, skip.
        file_exists = exists(directory+"/"+csv_filename)
        url = 'https://data.binance.vision/data/spot/monthly/klines/'+coinpair+'/'+timeframe+'/'+filename
        print(url)
        valid=url_exists(url)
        if valid==True:
            if file_exists == False:
                r = requests.get(url, allow_redirects=True)
                if not os.path.exists(directory):
                    os.makedirs(directory)
                location = directory +"/"+filename
                open(location, 'wb').write(r.content)
                os.chdir(coinpair+"_"+timeframe)
                with ZipFile(filename, 'r') as zipObj:
                    # Extract all the contents of zip file in current directory
                    zipObj.extractall()
                    if os.path.exists(filename):
                        os.remove(filename)
                os.chdir("..")
            else:
                print(f"File {csv_filename} exists, skipping download..")
            # Load csv and upload to mongodb
            csv_to_mongo(db, directory+"/"+csv_filename, coinpair+"_"+timeframe)

        else:
            year = year - max_years
        # Update month and year
        if month==1:
            month = 12
            year = year-1
        else:
            month = month - 1

In [None]:
def process(db, binance_timeframes_list, coinpair_list):
    for timeframe in binance_timeframes_list:
        for pair in coinpair_list:
            collection_name=pair["pair"]+"_"+timeframe
            if collection_name not in db.list_collection_names():  
                print(f"Creating collection: {collection_name}")
                process_binance_files(db, timeframe, pair["pair"], binance_timeframes_list)
                path = f"./{collection_name}"
                shutil.rmtree(path)
            else:
                print(f"Collection {collection_name} found, skipping..")

In [None]:
# Function to initialize the databse with the coinpairs registered
def init_google_trends(db, coinpairs):
    for coinpair in coinpairs:
        coin_name = get_main_coin(db, coinpair)
        collection_name="trends_"+coin_name
        if collection_name not in db.list_collection_names():  
            print(f"Pulling trends for {coin_name}")
            keywords = get_keywords(db, coinpair)
            populate_google_trends(db, coinpair, keywords, coin_name)
        else:
            print(f"Collection {collection_name} found, skipping..")
            
    print(f"Done pulling trends.")

In [None]:
# Choose what to import
coinpair_list = [
    {'pair': 'ADABUSD', 'main': 'ADA', 'keywords': ['ADA','CARDANO']},
    {'pair': 'SOLBUSD', 'main': 'SOL', 'keywords': ['SOL','SOLANA']},
    {'pair': 'BTCBUSD', 'main': 'BTC', 'keywords': ['BTC','BITCOIN']},
    {'pair': 'ETHBUSD', 'main': 'ETH', 'keywords': ['ETH','ETHEREUM']},
    {'pair': 'LUNABUSD', 'main': 'LUNA', 'keywords': ['LUNA COIN','TERRA COIN']},
    {'pair': 'DOGEBUSD', 'main': 'DOGE', 'keywords': ['DOGECOIN','DOGE']},
    {'pair': 'MATICBUSD', 'main': 'MATIC', 'keywords': ['MATIC','POLYGON', 'POLYGON MATIC']},
    {'pair': 'MANABUSD', 'main': 'MANA', 'keywords': ['DECENTRALAND']},
    {'pair': 'SHIBBUSD', 'main': 'SHIB', 'keywords': ['SHIBA INU']},
]
binance_timeframes_list = ['1m', '5m', '30m', '1h', '4h', '1d']

# Call the function to populate the Database with the data downloaded from Binance website as zip
process(db, binance_timeframes_list, coinpair_list)

Collection ADABUSD_1m found, skipping..
Collection SOLBUSD_1m found, skipping..
Collection BTCBUSD_1m found, skipping..
Collection ETHBUSD_1m found, skipping..
Collection LUNABUSD_1m found, skipping..
Collection DOGEBUSD_1m found, skipping..
Collection MATICBUSD_1m found, skipping..
Collection MANABUSD_1m found, skipping..
Collection SHIBBUSD_1m found, skipping..
Collection ADABUSD_5m found, skipping..
Collection SOLBUSD_5m found, skipping..
Collection BTCBUSD_5m found, skipping..
Collection ETHBUSD_5m found, skipping..
Collection LUNABUSD_5m found, skipping..
Collection DOGEBUSD_5m found, skipping..
Collection MATICBUSD_5m found, skipping..
Collection MANABUSD_5m found, skipping..
Collection SHIBBUSD_5m found, skipping..
Collection ADABUSD_30m found, skipping..
Collection SOLBUSD_30m found, skipping..
Collection BTCBUSD_30m found, skipping..
Collection ETHBUSD_30m found, skipping..
Collection LUNABUSD_30m found, skipping..
Collection DOGEBUSD_30m found, skipping..
Collection MATICBUSD

In [None]:
# Download and update the Fear and Greed index data to the database
fng_data_df = get_fng()
update_fng(db, fng_data_df)

Done downloading Fear and Greed Index


In [None]:
# Update the coin pairs and the timeframes if needed
for coinpair in coinpair_list:
    update_coinpairs(db, coinpair)
for timeframe in binance_timeframes_list:
    update_timeframes(db, timeframe)

In [None]:
# Get coinpair list from database
coinpairs = get_coinpairs(db)

# Populate Google Trends
init_google_trends(db, coinpairs) 

Collection trends_ADA found, skipping..
Collection trends_SOL found, skipping..
Collection trends_BTC found, skipping..
Collection trends_ETH found, skipping..
Collection trends_LUNA found, skipping..
Collection trends_DOGE found, skipping..
Collection trends_MATIC found, skipping..
Collection trends_MANA found, skipping..
Pulling trends for SHIB
2020-04-10 03:32:09.772802+00:00 - 2020-05-10 03:32:09.772802+00:00 | ['SHIBA INU']
2020-05-10 03:32:09.772802+00:00 - 2020-06-10 03:32:09.772802+00:00 | ['SHIBA INU']
2020-06-10 03:32:09.772802+00:00 - 2020-07-10 03:32:09.772802+00:00 | ['SHIBA INU']
2020-07-10 03:32:09.772802+00:00 - 2020-08-10 03:32:09.772802+00:00 | ['SHIBA INU']
2020-08-10 03:32:09.772802+00:00 - 2020-09-10 03:32:09.772802+00:00 | ['SHIBA INU']
2020-09-10 03:32:09.772802+00:00 - 2020-10-10 03:32:09.772802+00:00 | ['SHIBA INU']
2020-10-10 03:32:09.772802+00:00 - 2020-11-10 03:32:09.772802+00:00 | ['SHIBA INU']
2020-11-10 03:32:09.772802+00:00 - 2020-12-10 03:32:09.772802+0