In [None]:
# Data Analysis Packages
from sklearn import preprocessing
import pandas as pd
import numpy as np
import pickle as pk

In [None]:
# Graphing Packages
import seaborn as sns
import matplotlib.pyplot as plt
plt.style.use('ggplot')

# Import and Save Data

### Crypto Search Data (from Google Trends)

In [None]:
# Import pytrends package
import pytrends as pt
from pytrends.request import TrendReq

# initialize a new Google Trends Request Object
pt = TrendReq(hl="en-US", tz=360)

# Set the keyword & timeframe
keywords = ["Bitcoin", "Ethereum"]
pt.build_payload(keywords, timeframe="all")

# Get the interest over time
crypto_search = pt.interest_over_time()

# Save search data as pickled pandas datafame and .csv file
pk.dump(crypto_search, open( "binary\\og_search_data.p", "wb" ) )
crypto_search.to_csv("csv\\og_search_data_relative.csv")

### Crypto Price Data (from Yahoo Finance)

In [None]:
# Import yfinance package
import yfinance as yf
crypto_price = yf.download("BTC-USD", start="2009-01-01", end="2021-01-19",interval = "1d")

# Save price data as pickled pandas datafame and .csv file
pk.dump(crypto_price, open( "binary\\og_price_data.p", "wb" ) )
crypto_price.to_csv("binary\\og_price_data.csv")

### Check that everything was imported and saved successfully

In [None]:
# Crypto search data
pk.load(open("binary\\og_search_data.p", "rb"))

In [None]:
# BTC-USD price data
pk.load(open("binary\\og_price_data.p", "rb"))

### Import the original datasets

In [None]:
# Bitcoin block data
og_block_data = pk.load(open("og_data_binary\\og_block_data.p", "rb"))
# Crypto search data
og_search_data = pk.load(open("og_data_binary\\og_search_data.p", "rb"))

### Match the date range for both datasets

In [None]:
# Truncate the data to match the date range of the bitcoin block dataset
new_search_data = og_search_data.truncate(
    before=list(og_search_data.index)[60],
    after=list(og_search_data.index)[204]
)
# Save the pickeled pandas dataframe
pk.dump(new_search_data, open("block_and_search\\new_search_data.p", "wb" ))

In [None]:
# Drop columns with non-numerical data and height (because it's useless)
block_data = og_block_data.drop(['host'],inplace=False,axis=1)

dic = {}

for i in list(block_data.index)[:]:
    lst = [ ]
    for k in [block_data[c][i] for c in block_data.columns]:
        if type(k) == str:
            key = k.split(" ")[0]
            var = key.split('/')  
            new_key = var[-2] + '/' + var[-1]
        else:
            lst.append(k)
            
    if key not in dic.keys():
        dic[new_key] = lst
        
# Convert the dictionary to a pandas dataframe and rename the columns
new_block_data = pd.DataFrame.from_dict(data=dic,orient='index',columns=list(block_data.columns)[:-1])

# Save the pickeled pandas dataframe
pk.dump(new_block_data, open("block_and_search\\new_block_data.p", "wb" ))

### Check that the data was cleaned and saved correctly

In [None]:
pk.load(open("block_and_search\\new_block_data.p", "rb" ))

In [None]:
import seaborn as sns

plt.subplots(figsize= (20,12))

plt.title("Block & Search Data Correlation")
sns.set_theme(color_codes=True)

test = pk.load(open("block_and_search\\new_block_data.p", "rb" ))

for i in test.columns:
    if i != 'height':
        # plt.plot(test[i], label=i)
        sns.regplot(x='height', y=i, data=test)    

# plt.savefig("block_and_search\\block_and_search_corr.jpg", dpi = 300)

In [None]:
pk.load(open("block_and_search\\new_search_data.p", "rb" ))

### Concatenate Block and Search Data

In [None]:
# Load binary files
new_block_data = pk.load(open("block_and_search\\new_block_data.p", "rb" ))
new_search_data = pk.load(open("block_and_search\\new_search_data.p", "rb" ))

# Match the indexes
new_block_data = new_block_data.set_index(new_search_data.index)

# Concatenate the block and search datasets
block_and_search_concat = pd.concat([new_search_data,new_block_data], axis=1)
block_and_search_concat.drop(['isPartial'],inplace=True,axis=1)

# Save the concatenated dataset
block_and_search_concat.to_csv("block_and_search\\block_and_search_concat.csv")
pk.dump(block_and_search_concat, open("block_and_search\\block_and_search_concat.p", "wb" ))

# Show the dataset
block_and_search_concat

In [None]:
import seaborn as sns

# plt.subplots(figsize= (20,12))

# plt.title("Price & Block Data Correlation")
# sns.set_theme(color_codes=True)

# test = block_and_search_concat

for i in test.columns:
    if i != 'height' and i != 'block_reward_tips' and i != 'block_reward':
        # plt.plot(test[i], label=i)
        # sns.regplot(x='height', y=i, data=test)    
        plt.subplots(figsize= (20,12))
        sns.lmplot(x="height", y=i, data=test, x_jitter=.05)
        sns.set_theme(color_codes=True)
        plt.title("Price & Block Data Correlation")
        plt.savefig(f"block_and_search\\{i}_reg.jpg", dpi = 300)

In [None]:
plt.subplots(figsize= (20,12))
plt.title("Price & Block Data Correlation")
sns.regplot(x='height', y='size', data=test)    
# sns.lmplot(x="height", y=i, data=test, x_jitter=.05)
sns.set_theme(color_codes=True)
plt.savefig(f"block_and_search\\size_reg.jpg", dpi = 300)

plt.subplots(figsize= (20,12))
plt.title("Price & Block Data Correlation")
sns.regplot(x='height', y='volume', data=test)    
# sns.lmplot(x="height", y=i, data=test, x_jitter=.05)
sns.set_theme(color_codes=True)
plt.savefig(f"block_and_search\\volume_reg.jpg", dpi = 300)

plt.subplots(figsize= (20,12))
plt.title("Price & Block Data Correlation")
sns.regplot(x='height', y='weight', data=test)    
# sns.lmplot(x="height", y=i, data=test, x_jitter=.05)
sns.set_theme(color_codes=True)
plt.savefig(f"block_and_search\\weight_reg.jpg", dpi = 300)

### Calculate the correlation between the data points

In [None]:
# Load binary files
block_and_search_concat = pk.load(open("block_and_search\\block_and_search_concat.p", "rb" ))

# Show Correlation
block_and_search_corr = block_and_search_concat.corr(method='pearson')

# Saved the dataset
pk.dump(block_and_search_corr, open("block_and_search\\block_and_search_corr.p", "wb" ))

# Show the dataset
block_and_search_corr

### Visualize the correlation as a heat map

In [None]:
plt.subplots(figsize= (20,12))
sns.heatmap(block_and_search_corr, annot= True, fmt= '.2%')
plt.title("Block & Search Data Correlation")
plt.savefig("block_and_search\\block_and_search_corr.jpg", dpi = 300)

### Min-Max Scale the data so we can better visualize the change over time

In [None]:
# Load binary files
block_and_search_concat = pk.load(open("block_and_search\\block_and_search_concat.p", "rb" ))

# Scale the data
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0, 100))
scaled = min_max_scaler.fit_transform(block_and_search_concat)
block_and_search_scaled = pd.DataFrame(scaled, columns = block_and_search_concat.columns)
block_and_search_scaled = block_and_search_scaled.set_index(block_and_search_concat.index)

pk.dump(block_and_search_scaled, open("block_and_search\\block_and_search_scaled.p", "wb" ))
block_and_search_scaled

### Visualize Block and search Data (Unscaled) 

In [None]:
# Create subplots
fig, ax = plt.subplots()
line1, = ax.plot(block_and_search_concat['Bitcoin'].truncate(before=list(block_and_search_concat.index)[14]), label="BTC")
line2, = ax.plot(block_and_search_concat['Ethereum'].truncate(before=list(block_and_search_concat.index)[14]), label="ETH")
# line3, = ax.plot(block_and_search_concat['size'].truncate(before=list(block_and_search_concat.index)[14]), label="Size")
# line4, = ax.plot(block_and_search_concat['stripped_size'].truncate(before=list(block_and_search_concat.index)[14]), label="Stripped Size")

# Create a legend for the plot
first_legend = ax.legend(handles=[line1,line2], loc='upper left')

# Add the legend manually to the Axes.
ax.add_artist(first_legend)

# Label the x and y axis
plt.xlabel("Date")
plt.ylabel("y-axis")

# Add a title to the plot
plt.title("Block & Search Data Over Time")

# Resize the plot
plt.rcParams["figure.figsize"] = (20,12)

# Save the plot
# plt.savefig("plots\\crypto_iot.jpg", dpi = 300)

# Show the plot
plt.show()

### Visualize Block and Search Data (Scaled)

In [None]:
# Create subplots
fig, ax = plt.subplots()
line1, = ax.plot(block_and_search_scaled['Bitcoin'].truncate(before=list(block_and_search_scaled.index)[14]), label="BTC")
# line2, = ax.plot(block_and_search_scaled['Ethereum'].truncate(before=list(block_and_search_scaled.index)[14]), label="ETH")
line3, = ax.plot(block_and_search_scaled['size'].truncate(before=list(block_and_search_scaled.index)[14]), label="Size")
# line4, = ax.plot(block_and_search_scaled['stripped_size'].truncate(before=list(block_and_search_scaled.index)[14]), label="Stripped Size")

# Create a legend for the plot
first_legend = ax.legend(handles=[line1,line3], loc='upper left')

# Add the legend manually to the Axes.
ax.add_artist(first_legend)

# Label the x and y axis
plt.xlabel("Date")
plt.ylabel("Relative Interest")

# Add a title to the plot
plt.title("Block & Search Data Over Time")

# Resize the plot
plt.rcParams["figure.figsize"] = (20,12)

# Save the plot
# plt.savefig("plots\\crypto_iot.jpg", dpi = 300)

# Show the plot
plt.show()

### Import the original dataset

In [None]:
# Bitcoin block data
og_block_data = pk.load(open("og_data_binary\\og_block_data.p", "rb"))
# Crypto search data
og_price_data = pk.load(open("og_data_binary\\og_price_data.p", "rb"))

### Match the date range for both datasets

In [None]:
# Match the indexes
dic = {}
for i in list(og_block_data.index):
    lst = [ ]
    for k in [og_block_data[c][i] for c in og_block_data.columns]:
        if type(k) == str:
            key = k.split(" ")[0]
        else:
            lst.append(k)
            
    if key not in dic.keys():
        dic[key] = lst
        
# Convert back to dataframe and rename columns
block_data_new = pd.DataFrame.from_dict(data=dic, orient='index', columns=['Height', 'Trans Volume', 'Stripped Size', 'Size', 'Weight', 'Avg. Transaction Fee', 'Block Reward', 'Block Reward Tips' ])

# Truncate the data and match indexes
block_data_new = block_data_new.reset_index()
block_data_new = block_data_new.truncate(before=2078)
block_data_new = block_data_new.set_index(og_price_data.index)

# Save the dataframe
pk.dump(block_data_new,open("price_and_block\\block_data_new.p", "wb"))
# block_data_new

### Check that the data was cleaned and saved correctly

In [None]:
pk.load(open("price_and_block\\block_data_new.p", "rb"))

In [None]:
pk.load(open("og_data_binary\\og_price_data.p", "rb"))

### Concatenate Price and Block Data

In [None]:
# Load in pickled data
block_data_new =  pk.load(open("price_and_block\\block_data_new.p", "rb"))

# Concatenate the data
price_and_block_concat = pd.concat([block_data_new,og_price_data],axis=1)
price_and_block_concat = price_and_block_concat.drop(['index','Height'],axis=1)

# Save the dataframe
pk.dump(price_and_block_concat,open("price_and_block\\price_and_block_concat.p", "wb"))

# Show the dataframe
price_and_block_concat

### Calculate the correlation between the data points

In [None]:
# Load binary files
price_and_block_concat  = pk.load(open("price_and_block\\price_and_block_concat.p", "rb"))

# Show Correlation
price_and_block_corr = price_and_block_concat.corr(method='pearson')

# Saved the dataset
pk.dump(price_and_block_corr, open("price_and_block\\price_and_block_corr.p", "wb" ))

# Show the dataset
price_and_block_corr

### Visualize the correlation as a heat map

In [None]:
plt.subplots(figsize= (30,12))
sns.heatmap(price_and_block_corr, annot= True, fmt= '.2%')
plt.title("Bitcoin Block & Price Data")
plt.savefig("price_and_block\\price_and_block_corr.jpg", dpi = 300)

### Min-Max Scale the data so we can better visualize the change over time

In [None]:
# Load binary files
price_and_block_concat = pk.load(open("price_and_block\\price_and_block_concat.p", "rb"))

#Scale the data
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0, 100))
scaled = min_max_scaler.fit_transform(price_and_block_concat)
price_and_block_scaled = pd.DataFrame(scaled, columns = price_and_block_concat.columns)
price_and_block_scaled = price_and_block_scaled.set_index(price_and_block_concat.index)

# Save the dataframe
pk.dump(price_and_block_scaled, open("price_and_block\\price_and_block_scaled.p", "wb" ))
price_and_block_scaled

### Visualize Price and Block data (Unscaled)

In [None]:
# Create subplots
fig, ax = plt.subplots()
# line1, = ax.plot(price_and_block_concat['Size'], label="Block Size")
line2, = ax.plot(price_and_block_concat['Volume'], label="Transaction Volume")
# line3, = ax.plot(df_scale['High'], label="Price")
# line4, = ax.plot(df_scale['High'], label="Price")
# line5, = ax.plot(df_scale['High'], label="Price")

# Create a legend for the plot
first_legend = ax.legend(handles=[line2], loc='upper left')

# Add the legend manually to the Axes.
ax.add_artist(first_legend)

# Label the x and y axis
plt.xlabel("Time")
plt.ylabel("y-axis")

# Add a title to the plot
plt.title("Price & Block Data Over Time")

# Save the plot to present directory
plt.rcParams["figure.figsize"] = (20,12)

# Save the plot to present directory
plt.savefig("plots\\crypto_iot.jpg", dpi = 300)

# Show the plot
plt.show()

### Visualize Price and Block data (Scaled)

In [None]:
# Create subplots
fig, ax = plt.subplots()
line1, = ax.plot(price_and_block_scaled['Size'], label="Bitcoin Transaction Fees")
line2, = ax.plot(price_and_block_scaled['Volume'], label="Trading Volume")
line3, = ax.plot(price_and_block_scaled['Open'], label="Trading Volume")
# line3, = ax.plot(df_scale['High'], label="Price")
# line4, = ax.plot(df_scale['High'], label="Price")
# line5, = ax.plot(df_scale['High'], label="Price")

# Create a legend for the plot
first_legend = ax.legend(handles=[line1,line2,line3], loc='upper left')

# Add the legend manually to the Axes.
ax.add_artist(first_legend)

# Label the x and y axis
plt.xlabel("Time")
plt.ylabel("Price and Interest (Scaled from 1-100)")

# Add a title to the plot
plt.title("Crypto Price and Block Data")

# Adjust the plot's size
plt.rcParams["figure.figsize"] = (20,12)

# Save the plot to present directory
plt.savefig("price_and_block\\price_and_block_scaled.jpg", dpi = 300)

# Show the plot
plt.show()

# Search & Price Data

### Import the original datasets

In [None]:
# Crypto Search Data
og_search_data = pk.load(open("binary\\og_search_data.p", "rb"))

In [None]:
# Set the keyword & timeframe
keywords = ["Bitcoin", "Dogecoin", "Ethereum", "Binance Coin", "Litecoin"]
pt.build_payload(keywords, timeframe="all")

# Get the interest over time
crypto_search_data = pt.interest_over_time()

# Save search data as pickled pandas datafame and .csv file
pk.dump(crypto_search, open( "binary\\crypto_search_data.p", "wb" ) )

In [None]:
# Import yfinance package
import yfinance as yf
crypto_price = yf.download("BTC-USD ETH-USD DOGE-USD BNB-USD LTC-USD", start="2009-01-01", end="2022-05-01",interval = "1d")

# Save price data as pickled pandas datafame and .csv file
pk.dump(crypto_price, open( "binary\\crypto_price_data.p", "wb" ) )
crypto_price

In [None]:
crypto_price.to_csv('csv\\crypto_price_a.csv')

### Match the date range for both datasets

In [None]:
# Match the indexes
for i in crypto_search_data.index:
    if int(str(i)[2:4]) < 14:
        crypto_search_data.drop(i, inplace=True)
    if str(i)[2:4] == '14' and int(str(i)[5:7]) < 10:
        crypto_search_data.drop(i, inplace=True)

# Drop the column
new_search_data = crypto_search_data.drop('isPartial',inplace=False,axis=1)

# Save the dataframe
pk.dump(new_search_data, open("binary\\new_search_data.p", "wb"))

In [None]:
# Load Binary file
crypto_price_data = pk.load(open("binary\\crypto_price_data.p", "rb"))

# Match the indexes
for i in crypto_price_data.index:
    if str(i)[8:10] != '01':
        crypto_price_data.drop(i, inplace=True)

# Save the dataframe
pk.dump(crypto_price_data, open("binary\\crypto_price_data.p", "wb"))

### Check that the data was cleaned and saved correctly

In [None]:
pk.load(open("binary\\crypto_price_data.p", "rb"))

In [None]:
pk.load(open("binary\\new_search_data.p", "rb"))

### Concatenate Search and Price Data

In [None]:
search_data

In [None]:
price_data = pk.load(open("binary\\crypto_price_data.p", "rb"))
search_data = pk.load(open("binary\\new_search_data.p", "rb"))
search_data.drop(search_data.index[-1], inplace=True)
search_data

# Iterate over the price_data columns and drop the columns that do not have the string "Close" in them
for i in price_data.columns:
    if "Open" not in i:
        price_data.drop(i, inplace=True, axis=1)

# Concatendate the two dataframes
search_and_price_concat = pd.concat([search_data,price_data],axis=1)

# Add a column to the dataframe that contains the length of the dataframe
search_and_price_concat['Time'] = [i for i in range(len(search_and_price_concat.index))]

search_and_price_concat.columns = ['Bitcoin Searches', 'Dogecoin Searches', 'Ethereum Searches', 'Binance Coin Searches', 'Litecoin Searches', 'BNB-USD', 'BTC-USD', 'DOGE-USD','ETH-USD','LTC-USD','Time']
search_and_price_concat.drop(['Binance Coin Searches','BNB-USD'], 
                             inplace=True,
                             axis='columns')
            

# Save the dataframe
pk.dump(search_and_price_concat, open("binary\\search_and_price_concat.p", "wb"))
search_and_price_concat

### Calculate the correlation between the data points

In [None]:
# Calculate the correlation
search_and_price_corr = search_and_price_concat.corr(method='pearson')

# Save the dataframe
pk.dump(search_and_price_corr, open("binary\\search_and_price_corr.p", "wb"))

#### Visualize the correlation as a heat map

In [None]:
plt.subplots(figsize= (25,12))
sns.heatmap(search_and_price_corr, annot= True, fmt= '.2%')
plt.title("Crypto Price & Search Data")
plt.savefig("plots\\search_and_price_corr.jpg", dpi = 300)

### Min-Max Scale the data so we can better visualize the change over time

In [None]:
#Scale the data
from sklearn import preprocessing
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0, 100))
scaled = min_max_scaler.fit_transform(search_and_price_concat)
search_and_price_scale = pd.DataFrame(scaled, columns = search_and_price_concat.columns)
search_and_price_scale = search_and_price_scale.set_index(search_and_price_concat.index)

# Save the dataframe
search_and_price_scale.to_csv("csv\\search_and_price_scale.csv")
search_and_price_scale.to_pickle("binary\\search_and_price_scale.p")

# Show the plot
search_and_price_scale['dates'] = search_and_price_scale.index
search_and_price_scale

### Visualize Price and Search data (Unscaled)

In [None]:
plt.subplots(figsize= (20,12))

sns.set_theme(color_codes=True)

sns.lineplot(x='dates',
            y='Dogecoin Searches',
            data=search_and_price_scale,
            label='Dogecoin Searches')

sns.lineplot(x='dates', 
            y='DOGE-USD', 
            data=search_and_price_scale,
            label='DOGE-USD',
            linestyle="dashed")

plt.title("Dogecoin Search & Price Plot")
plt.xlabel('Time')
plt.ylabel('y-axis')
# plt.ylim([0, 100])

plt.legend(loc='upper left')

# Save plot
plt.savefig("plots\\search_and_price_plot_doge.jpg", dpi = 300)
plt.show()

In [None]:
plt.subplots(figsize= (20,12))

sns.set_theme(color_codes=True)

sns.lineplot(x='dates',
            y='Litecoin Searches',
            data=search_and_price_scale,
            label='Litecoin Searches')

sns.lineplot(x='dates', 
            y='LTC-USD', 
            data=search_and_price_scale,
            label='LTC-USD',
            linestyle="dashed")

plt.title("Litecoin Search & Price Plot")
plt.xlabel('Time')
plt.ylabel('y-axis')
# plt.ylim([0, 100])

plt.legend(loc='upper left')

# Save plot
plt.savefig("plots\\search_and_price_plot_ltc.jpg", dpi = 300)
plt.show()

In [None]:
plt.subplots(figsize= (20,12))

sns.set_theme(color_codes=True)

sns.lineplot(x='Time',
            y='Ethereum Searches',
            data=search_and_price_scale,
            label='Ethereum Searches')

sns.lineplot(x='Time', 
            y='ETH-USD', 
            data=search_and_price_scale,
            label='ETH-USD',
            linestyle="dashed")

plt.title("Ethereum Search & Price Plot")
plt.xlabel('Time')
plt.ylabel('y-axis')
# plt.ylim([0, 100])

plt.legend(loc='upper left')

# Save plot
plt.savefig("plots\\search_and_price_plot_eth.jpg", dpi = 300)
plt.show()

In [None]:
plt.subplots(figsize= (20,12))

sns.set_theme(color_codes=True)

sns.lineplot(x='Time',
            y='Bitcoin Searches',
            data=search_and_price_scale,
            label='Bitcoin Searches')

sns.lineplot(x='Time', 
            y='BTC-USD', 
            data=search_and_price_scale,
            label='BTC-USD',
            linestyle="dashed")

plt.title("Bitcoin Search & Price Plot")
plt.xlabel('Time')
plt.ylabel('y-axis')
# plt.ylim([0, 100])

plt.legend(loc='upper left')

# Save plot
plt.savefig("plots\\search_and_price_plot_btc.jpg", dpi = 300)
plt.show()

In [None]:
plt.subplots(figsize= (20,12))

sns.set_theme(color_codes=True)

sns.regplot(x='Time',
            y='Bitcoin Searches',
            data=search_and_price_scale,
            label='Bitcoin Searches',
            scatter=False,
            # ci=True,
            order=3)    

sns.regplot(x='Time',
            y='BTC-USD',
            data=search_and_price_scale,
            label='BTC-USD',
            scatter=False,
            # ci=True,
            order=3)    

# sns.lineplot(x='Time',
#             y='Bitcoin Searches',
#             data=search_and_price_scale,
#             label='Bitcoin Searches')

# sns.lineplot(x='Time',
#             y='Ethereum Searches',
#             data=search_and_price_scale,
#             label='Ethereum Searches',
#             linestyle="dashed")    

# sns.lineplot(x='Time', 
#             y='BTC-USD', 
#             data=search_and_price_scale,
#             label='BTC-USD',
#             linestyle="dashed")
#             # scatter=False,)    

# sns.lineplot(x='Time',
#             y='ETH-USD',
#             data=search_and_price_scale,
#             label='ETH-USD')

# sns.regplot(x='Time', 
#             y='BTC-USD', 
#             data=search_and_price_scale,
#             label='BTC-USD',
#             scatter=False,
#             order=3)    

# sns.regplot(x='Time',
#             y='ETH-USD',
#             data=search_and_price_scale,
#             label='ETH-USD',
#             scatter=False,
#             order=3)    


plt.title("Search & Price Plot")
plt.xlabel('Time')
plt.ylabel('y-axis')
# plt.ylim([0, 100])

plt.legend(loc='upper left')

# Save plot
# plt.savefig("plots\\search_and_price_plot.jpg", dpi = 300)
plt.show()

In [None]:
# Create subplots
fig, ax = plt.subplots()
line1, = ax.plot(search_and_price_concat['Bitcoin'], label="BTC-UDS")
line2, = ax.plot(search_and_price_concat['Ethereum'], label="ETH-USD")
line3, = ax.plot(search_and_price_concat['Binance Coin'], label="BNB-USD")

# Create a legend for the plot
first_legend = ax.legend(handles=[line1,line2,line3], loc='upper left')

# Add the legend manually to the Axes.
ax.add_artist(first_legend)

# Label the x and y axis
plt.xlabel("Time")
plt.ylabel("y-axis")

# Add a title to the plot
plt.title("Bitcoin Price & Search Data")

# Save the plot to present directory
plt.rcParams["figure.figsize"] = (20,12)
plt.savefig("search_and_price\\search_and_price_data_unsacled.jpg", dpi = 300)

# Show the plot
plt.show()


### Visualize Price and Search data (Scaled)

In [None]:
# Create subplots
fig, ax = plt.subplots()
line1, = ax.plot(search_and_price_scale['Ethereum'], label="Ethereum Searches")
line2, = ax.plot(search_and_price_scale[('Close','BTC-USD')], label="BTC-USD")
# line3, = ax.plot(search_and_price_scale[('Close','BNB-USD')], label="BNB-USD")
# line4, = ax.plot(search_and_price_scale[('Close','ETH-USD')], label="ETH-USD")

# Create a legend for the plot
first_legend = ax.legend(handles=[line1,line2], loc='upper left')

# Add the legend manually to the Axes.
ax.add_artist(first_legend)

# Label the x and y axis
plt.xlabel("Time")
plt.ylabel("y-axis")

# Add a title to the plot
plt.title("Bitcoin Price & Search Data")

# Save the plot to present directory
plt.rcParams["figure.figsize"] = (20,12)
plt.savefig("search_and_price\\search_and_price_data_scaled.jpg", dpi = 300)

# Show the plot
plt.show()

# Regression Plots

In [None]:
plt.subplots(figsize= (20,12))
plt.title("Dogecoin Search and Price correlation")
sns.set_theme(color_codes=True)
sns.regplot(x='Time', y='Open', data=test_scaled)    
sns.regplot(x='Time', y='Dogecoin', data=test_scaled)    
# plt.legend(labels=["Open","Dogecoin"])
# sns.lmplot(x="height", y=i, data=test, x_jitter=.05)
# plt.savefig(f"block_and_search\\weight_reg.jpg", dpi = 300)

In [None]:
# Generate data
# x = rng.uniform(0, 10, size=100)
# y = x + rng.normal(size=100)

# Initialize layout
fig, ax = plt.subplots(figsize = (20, 12))
plt.subplots(figsize= (20,12))

x = search_and_price_concat['Length']
y = search_and_price_concat['Ethereum']

ax = sns.regplot(x="Length", y="Ethereum", data=search_and_price_concat)
ax = sns.regplot(x="Length", y="Ethereum", data=search_and_price_concat)



# OTHER

In [None]:
# Plot BTC Volume
btc['Volume'].plot(figsize=(10, 6))
# Label the x and y axis
plt.xlabel("Date")
plt.ylabel("Price")
# Save the plot to present directory
plt.savefig("plots\\btc_volume.png")

In [None]:
# initialize a new Google Trends Request Object
pt = TrendReq(hl="en-US", tz=360)
# set the keyword & timeframe
keywords = ["Coinbase", "Binance"]
pt.build_payload(keywords, timeframe="all")
# get the interest over time
exch_iot = pt.interest_over_time()
# Save iot as a .csv file in the current directory
exch_iot.to_csv("data\\exch_iot.csv")
exch_iot

In [None]:
# Create subplots
fig, ax = plt.subplots()
line1, = ax.plot(exch_iot['Coinbase'].truncate(before=list(exch_iot.index)[145]), label="Coinbase")
line2, = ax.plot(exch_iot['Binance'].truncate(before=list(exch_iot.index)[145]), label="Binance")

# Create a legend for the plot
first_legend = ax.legend(handles=[line1,line2], loc='upper left')

# Add the legend manually to the Axes.
ax.add_artist(first_legend)

# Label the x and y axis
plt.xlabel("Date")
plt.ylabel("Relative Interest")

# Add a title to the plot
plt.title("Crypto Exchange Interest Over Time")

# Save the plot to present directory
plt.rcParams["figure.figsize"] = (20,12)
plt.savefig("plots\\exch_iot.jpg", dpi = 300)

# Show the plot
plt.show()


In [None]:
# Plot BTC Volume
btc['High'].plot(figsize=(10, 6))
# Label the x and y axis
plt.xlabel("Date")
plt.ylabel("Price")
# Save the plot to present directory
plt.savefig("plots\\btc_volume.png")

In [None]:
# the keyword to extract data
keyword = ["Bitcoin"]
pt.build_payload(keyword, timeframe="all")
# get the interest by country
ibr = pt.interest_by_region("COUNTRY", inc_low_vol=True, inc_geo_code=True)
# Convert the series ibr to pandas dataframe
ibr = pd.DataFrame(ibr)
# Sort the dataframe by the value of the column "Bitcoin"
ibr.sort_values("Bitcoin", inplace=True, ascending=False)
# Save the dataframe to a csv file
ibr.to_csv("data\\ibr.csv")
ibr