## Get Current DF

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('data.csv')

In [3]:
df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace = True)

In [4]:
df[-61:-30]

Unnamed: 0_level_0,Bitcoin,Bitcoin Crash,Blockchain,Crisis,Cryptocurrency,Nicehash,Hashing,GPU,GeForce,mining,Tesla,Price
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2022-03-01,22.0,0.68,23.14,61.0,21.45,9.24,55.25,65.45,36.75,53.68,51.46,44793.601562
2022-03-02,18.92,1.36,21.58,59.17,18.15,10.34,57.85,63.91,46.06,50.63,52.29,45077.578125
2022-03-03,17.6,0.68,22.62,56.12,17.16,14.52,59.8,64.68,41.65,46.97,52.29,44021.578125
2022-03-04,16.28,0.0,17.16,53.68,17.16,13.86,48.75,63.91,38.71,49.41,54.78,42479.613281
2022-03-05,15.62,0.76,13.52,44.53,15.84,16.72,39.65,61.6,37.73,43.31,51.46,39566.335938
2022-03-06,14.74,0.76,14.82,41.48,17.16,9.46,33.15,61.6,43.12,47.58,52.29,39640.175781
2022-03-07,17.38,1.4,18.46,54.29,17.82,5.94,61.1,60.06,38.71,46.97,62.25,39430.226562
2022-03-08,17.82,0.68,20.8,60.39,27.72,13.64,61.1,63.14,49.0,61.0,71.38,39304.441406
2022-03-09,20.24,3.44,23.14,59.17,33.0,11.0,65.0,65.45,48.02,57.34,78.85,42465.671875
2022-03-10,18.04,0.72,21.84,56.12,28.71,7.26,64.35,68.53,38.71,56.12,73.04,42004.726562


## Get today's date

In [5]:
from datetime import datetime

In [6]:
today = datetime.today()
stop_year = today.year
stop_mon = today.month
stop_day = today.day

## Get start date

We will start from one month before the last data point.
If the last data point is in April, we will fetch data starting from March until today. 
We will use March to calculate scale. 

In [7]:
last_point = df.index[-1]
if last_point.to_pydatetime().month == 1:
    start_year = last_point.to_pydatetime().year - 1
    start_mon = 12
else:
    start_year = last_point.to_pydatetime().year
    start_mon = last_point.to_pydatetime().month - 1

## Get keywords from file

In [8]:
import pickle

In [9]:
with open("raw_keywords", "rb") as fp:   # Unpickling
    raw_keywords = pickle.load(fp)

In [10]:
with open("exact_keywords", "rb") as fp:   # Unpickling
    exact_keywords = pickle.load(fp)

In [12]:
transform_dict = {exact_keywords[i] : raw_keywords[i] for i in range(len(exact_keywords))}
transform_dict

{'/m/05p0rrx': 'Bitcoin',
 'Bitcoin Crash': 'Bitcoin Crash',
 'Blockchain': 'Blockchain',
 'Crisis': 'Crisis',
 '/m/0vpj4_b': 'Cryptocurrency',
 '/g/11gf8fjrxc': 'Nicehash',
 '/m/03l5h': 'Hashing',
 '/m/022l1f': 'GPU',
 '/m/0132b8': 'GeForce',
 '/m/053rd': 'mining',
 '/m/0dr90d': 'Tesla'}

## Pytrends - Getting New Data

In [11]:
import pytrends
from pytrends.request import TrendReq
from pytrends import dailydata
pytrend = TrendReq(hl='en-US', tz=360)
import time

In [12]:
df_list = []
for word in exact_keywords:
    thisword = dailydata.get_daily_data(word = word, start_year = start_year, start_mon = start_mon, stop_year = stop_year, stop_mon = stop_mon, verbose = False)
    thisword = thisword[thisword['isPartial'] == False]
    df_list.append(thisword[word])
    print(word + ' done\n')
    time.sleep(10)

/m/05p0rrx done

Bitcoin Crash done

Blockchain done

Crisis done

/m/0vpj4_b done

/g/11gf8fjrxc done

/m/03l5h done

/m/022l1f done

/m/0132b8 done

/m/053rd done

/m/0dr90d done



## Debug

In [36]:
timeframe = f"{start_year}-{start_mon:02}-01 {stop_year}-{stop_mon:02}-{stop_day}"
timeframe

'2022-03-01 2022-06-25'

In [45]:
kw_list = ["Blockchain"]
#kw_list = ["Bitcoin"]
pytrend.build_payload(kw_list, cat=0, timeframe=timeframe, geo='US', gprop='')

In [46]:
pytrend.interest_over_time()

Unnamed: 0_level_0,Blockchain,isPartial
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-03-01,77,False
2022-03-02,73,False
2022-03-03,72,False
2022-03-04,57,False
2022-03-05,42,False
...,...,...
2022-06-20,52,False
2022-06-21,61,False
2022-06-22,66,False
2022-06-23,52,False


In [14]:
df_trends = pd.concat(df_list, axis = 1)
df_trends.rename(columns = transform_dict, inplace = True)
df_trends

NameError: name 'df_list' is not defined

## Pytrends - Calculate Scaling Factor

In [None]:
original = df.loc[(df.index.month == start_mon) & (df.index.year == start_year)].drop(columns = ['Price'])

In [None]:
original

In [None]:
new = df_trends.loc[(df_trends.index.month == start_mon) & (df_trends.index.year == start_year)]

In [None]:
new

In [None]:
for keyword in original.columns:
    print(original[keyword] / new[keyword])

## YFinance

In [None]:
import yfinance as yf

In [None]:
df_price = yf.Ticker('BTC-USD').history(period = '10y', interval = '1d')

In [None]:
df_price.index.names = ['date']

In [None]:
df_price = df_price['High'].to_frame().rename(columns = {'High' : 'Price'})

In [None]:
df_price

## Join

In [None]:
new_data = pd.concat([df_trends, df_price], axis = 1, join = 'inner')

## Inspect

In [None]:
df

In [None]:
new_data

In [None]:
dailydata.get_daily_data(word = 'Bitcoin', start_year = 2022, start_mon = 5, stop_year = 2022, stop_mon = 6)