In [1]:
from pytrends.request import TrendReq
from pytrends import dailydata
from requests import ReadTimeout, ReadTimeoutError
import pymongo
import re
import time
import datetime
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
%matplotlib inline
%config InlineBackend.figure_format = 'svg'

In [11]:
def get_daily_pytrends_data(coin_symbol, coin_name, start_year, start_month, stop_year, stop_month):

    # list of words to query the API for
    search_terms = [
        f'{coin_name} price',
        f'{coin_name} coin',
        f'{coin_symbol} price',
        f'{coin_symbol} coin'
    ]
    
    # make sure that the start dates are all the same for the dfs
    if int(stop_month) == 12:
        i_stop_month = 1
        i_stop_year = int(stop_year) + 1
    else:
        i_stop_month = int(stop_month) + 1
        i_stop_year = int(stop_year)
    end = datetime.datetime.strptime(f"{str(i_stop_year)}-{str(i_stop_month)}-01", '%Y-%m-%d') - datetime.timedelta(days=1)
    
    date_range = pd.DataFrame(pd.date_range(start=f"{start_year}-{start_month}-01", end=end, freq='1D'))
    date_range.columns = ['date']
    
    # get pytrends data
    dfs = []
    for search_term in search_terms:
        try:
            df = dailydata.get_daily_data(search_term, start_year, start_month, stop_year, stop_month, geo='US', verbose=True, wait_time=5)
        except:
            df = date_range.copy()
            df = df.set_index('date', drop=True)
            df[search_term] = -1
        time.sleep(1)
        df = df[[search_term]]
        dfs.append(df)
    

    # bring to the same date range
    for ii in range(0, len(dfs)):
        df = dfs[ii]
        a = pd.merge(date_range, df, left_on='date', right_on=df.index, how='outer').fillna(0)
        a = a.set_index('date', drop=True)
        dfs[ii] = a
        
    # bring to one dataframe
    df_all = pd.DataFrame()
    for df in dfs:
        col = df.columns[0]
        df_all[col] = df[col]
    df_all = df_all.reset_index()
    
    return df_all

In [12]:
# cred = '../01.Original_data/credentials/mongodb.pw'
# data_cred = pd.read_csv(cred, index_col=0, header=None)
# USER = data_cred.loc['USER'].values[0]
# DB = data_cred.loc['DB'].values[0]
# PW = data_cred.loc['PW'].values[0]
# HOST = data_cred.loc['HOST'].values[0]

# client = pymongo.MongoClient(f"mongodb://{USER}:{PW}@{HOST}/{DB}") # defaults to port 27017

# db = client.coincaster
# # get the filenames of the coins
# coins = db.list_collection_names()

# # make sure to only capture csv
# coins = [f for f in coins if f.endswith('csv')]

# # Select date range
# start_year = 2014
# start_month = 11

# # stop_year = int(datetime.datetime.now().year)
# # stop_month = int(datetime.datetime.now().month)
# stop_year = 2014
# stop_month = 12

# ii = 0
# coin = coins[0]
# coin_symbol = re.findall(r'__(.+).csv', coin)[0]
# coin_name = re.findall(r'(.+)__', coin)[0]
# df_all = get_daily_pytrends_data(coin_symbol, coin_name, start_year, start_month, stop_year, stop_month)

    



In [None]:
cred = '../01.Original_data/credentials/mongodb.pw'
data_cred = pd.read_csv(cred, index_col=0, header=None)
USER = data_cred.loc['USER'].values[0]
DB = data_cred.loc['DB'].values[0]
PW = data_cred.loc['PW'].values[0]
HOST = data_cred.loc['HOST'].values[0]

client = pymongo.MongoClient(f"mongodb://{USER}:{PW}@{HOST}/{DB}") # defaults to port 27017

db = client.coincaster
# get the filenames of the coins
coins = db.list_collection_names()

# make sure to only capture csv
coins = [f for f in coins if f.endswith('csv')]

# Select date range
start_year = 2014
start_month = 1

# stop_year = int(datetime.datetime.now().year)
# stop_month = int(datetime.datetime.now().month)
stop_year = 2014
stop_month = 12

ii = 4
for jj in range(ii, len(coins)):
    coin = coins[jj]
    coin_symbol = re.findall(r'__(.+).csv', coin)[0]
    coin_name = re.findall(r'(.+)__', coin)[0]
    df_all = get_daily_pytrends_data(coin_symbol, coin_name, start_year, start_month, stop_year, stop_month)

    # upload to database
    db = client.coincaster_pytrends
    # get the filenames of the coins
    coll_name = f"{coin_name}__{coin_symbol}.pytrends"
    coll = db[coll_name]

    coll.insert_many(df_all.to_dict('records'))
    print(f"Coin {coin} is done. This is jj {jj}")

    



Paypex price:2014-01-01 2014-01-31
Paypex price:2014-02-01 2014-02-28
Paypex price:2014-03-01 2014-03-31
Paypex price:2014-04-01 2014-04-30
Paypex price:2014-05-01 2014-05-31
Paypex price:2014-06-01 2014-06-30
Paypex price:2014-07-01 2014-07-31
Paypex price:2014-08-01 2014-08-31
Paypex price:2014-09-01 2014-09-30
Paypex price:2014-10-01 2014-10-31
Paypex price:2014-11-01 2014-11-30
Paypex price:2014-12-01 2014-12-31
Paypex coin:2014-01-01 2014-01-31
Paypex coin:2014-02-01 2014-02-28
Paypex coin:2014-03-01 2014-03-31
Paypex coin:2014-04-01 2014-04-30
Paypex coin:2014-05-01 2014-05-31
Paypex coin:2014-06-01 2014-06-30
Paypex coin:2014-07-01 2014-07-31
Paypex coin:2014-08-01 2014-08-31
Paypex coin:2014-09-01 2014-09-30
Paypex coin:2014-10-01 2014-10-31
Paypex coin:2014-11-01 2014-11-30
Paypex coin:2014-12-01 2014-12-31
PAYX price:2014-01-01 2014-01-31
PAYX price:2014-02-01 2014-02-28
PAYX price:2014-03-01 2014-03-31
PAYX price:2014-04-01 2014-04-30
PAYX price:2014-05-01 2014-05-31
PAYX pr

In [None]:
df_all

## Plan of attack


- [ ] Read name and symbol from the collections of coincaster 
- [ ] Use symbol and `coin` and `price` as search terms, get daily data
- [ ] Query cryptocurrency only once and put it as an extra collection. Search terms in that collection are `cryptocurrency`, `crypto`, and `crypto coin`
- [ ] Upload to the database
- [ ] Write a program that executes daily to get only updates and put it into the database. Make sure here that the order of the data is fine


In [166]:
p = df_all['DIVI price']
plt.plot()

0        0.0
1        0.0
2        0.0
3        0.0
4        0.0
        ... 
2535     0.0
2536     0.0
2537     0.0
2538    42.0
2539     0.0
Name: DIVI price, Length: 2540, dtype: float64