# Block & Search Data

In [1]:
# Data Analysis Packages
from sklearn import preprocessing
import pandas as pd
import numpy as np
import pickle as pk

In [2]:
# Graphing Packages
import seaborn as sns
import matplotlib.pyplot as plt
plt.style.use('ggplot')

# Import and Save Data

In [None]:
for i in range(10):
    

### Bitcoin Block Data (from Prof. Zhang)

In [None]:
# Import BTC Dataset given to us by Prof. Zhang
file_location = '..\\dataset.csv'
btc_block_data = pd.read_csv(file_location,low_memory=False)

# Save the data as a pickled pandas dataframe
pk.dump(btc_block_data, open( "binary\\og_block_data.p", "wb" ))

### New Bitcoin Block Data (from Blockchain.com)

In [None]:
# Import BTC Dataset given to us by Prof. Zhang
file_location = '..\\new_data.csv'
btc_block_data = pd.read_csv(file_location,low_memory=False,index_col=0)

# Save the data as a pickled pandas dataframe
pk.dump(btc_block_data, open( "binary\\new_btc_data.p", "wb" ))

### Crypto Search Data (from Google Trends) Absolute

In [None]:
# Import pytrends package
import pytrends as pt
from pytrends.request import TrendReq

# initialize a new Google Trends Request Object
pt = TrendReq(hl="en-US", tz=360)

# Set the keyword & timeframe
keywords = ["Bitcoin", "Ethereum", "Dogecoin", "Binance Coin", "Litecoin"]
pt.build_payload(keywords, timeframe="all")

# Get the interest over time
crypto_search = pt.interest_over_time()

# Save search data as pickled pandas datafame and .csv file
pk.dump(crypto_search, open( "binary\\og_search_data.p", "wb" ) )
crypto_search.to_csv("csv\\og_search_data.csv")


### Crypto Search Data (from Google Trends) Relative

In [None]:
# initialize a new Google Trends Request Object
pt = TrendReq(hl="en-US", tz=360)

# Set the keyword & timeframe
keywords = ["Bitcoin", "Ethereum", "Dogecoin", "Binance Coin", "Litecoin"]

crypto_search_relative = pd.DataFrame()

for i in keywords:
    pt.build_payload([i], timeframe="all")

    # Get the interest over time
    df = pt.interest_over_time()
    crypto_search_relative = pd.concat([crypto_search_relative,df],axis=1)

# Save search data as pickled pandas datafame and .csv file
pk.dump(crypto_search_relative, open( "binary\\og_search_data_relative.p", "wb" ) )
crypto_search.to_csv("csv\\og_search_data_relative.csv")

### Check that everything was imported and saved successfully

In [None]:
# Bitcoin block data
pk.load(open("binary\\og_block_data.p", "rb"))

In [None]:
# Crypto search data
pk.load(open("binary\\og_search_data.p", "rb"))

In [None]:
# Bitcoin block data
pk.load(open("binary\\og_search_data_relative.p", "rb"))

In [None]:
# New Bitcoin block data
pk.load(open("binary\\new_btc_data.p", "rb"))

# Analysis

### Import the original datasets

In [None]:
# Bitcoin block data
og_block_data = pk.load(open("binary\\og_block_data.p", "rb"))

# New Bitcoin Block data
new_btc_block_data = pk.load(open("binary\\og_search_data_relative.p", "rb"))

# Crypto search data
og_search_data = pk.load(open("binary\\og_search_data.p", "rb"))

# Crypto search data relative
og_search_data_relative = pk.load(open("binary\\og_search_data_relative.p", "rb"))


In [None]:
pk.load(open("binary/og_block_data.p", "rb"))

### Match the date range for both datasets

In [None]:
# Drop columns with non-numerical data and height (because it's useless)
block_data = og_block_data.drop(['host'],inplace=False,axis=1)

dic = {}

for i in list(block_data.index)[:]:
    lst = [ ]
    for k in [block_data[c][i] for c in block_data.columns]:
        if type(k) == str:
            key = k.split(" ")[0]
            var = key.split('/')  
            new_key = var[-2] + '/' + var[-1]
        else:
            lst.append(k)
            
    if key not in dic.keys():
        dic[new_key] = lst
        
# Convert the dictionary to a pandas dataframe and rename the columns
new_block_data = pd.DataFrame.from_dict(data=dic,orient='index',columns=list(block_data.columns)[:-1])

# Save the pickeled pandas dataframe
pk.dump(new_block_data, open("binary\\new_block_data.p", "wb" ))

Absolute Search Data

In [None]:
# Truncate the data to match the date range of the bitcoin block dataset
new_search_data = og_search_data.truncate(
    before=list(og_search_data.index)[60],
    after=list(og_search_data.index)[204]
)

# Drop Useless columns
new_search_data.drop('isPartial',inplace=True,axis=1)

# Save the pickeled pandas dataframe
pk.dump(new_search_data, open("binary\\new_search_data.p", "wb" ))

Relative Search Data

In [None]:
# Truncate the data to match the date range of the bitcoin block dataset
new_search_data_relative = og_search_data_relative.truncate(
    before=list(og_search_data_relative.index)[60],
    after=list(og_search_data_relative.index)[204]
)

# Drop Useless columns
new_search_data_relative.drop('isPartial',inplace=True,axis=1)

# Save the pickeled pandas dataframe
pk.dump(new_search_data_relative, open("binary\\new_search_data_relative.p", "wb" ))

Block Data

### Check that the data was cleaned and saved correctly

In [None]:
pk.load(open("binary\\new_block_data.p", "rb" ))

In [None]:
pk.load(open("binary\\new_search_data.p", "rb" ))

In [None]:
pk.load(open("binary\\new_search_data_relative.p", "rb" ))

### Concatenate Block and Search Data

In [None]:
# Load binary files
new_block_data = pk.load(open("binary\\new_block_data.p", "rb" ))
new_search_data = pk.load(open("binary\\new_search_data.p", "rb" ))

# Match the indexes
new_block_data = new_block_data.set_index(new_search_data.index)
new_block_data.columns=['Height', 'Transaction Volume', 'Stripped Size', 'Size', 'Weight', 
                        'Avg. Transaction Fee', 'Block Reward', 'Block Reward Tips' ]

# Concatenate the block and search datasets
block_and_search_concat = pd.concat([new_search_data,new_block_data], axis=1)
# block_and_search_concat.drop(['isPartial'],inplace=True,axis=1)

block_and_search_concat =  block_and_search_concat.rename(columns={'Bitcoin':'Bitcoin Searches', 'Ethereum':'Ethereum Searches',
                                                                   'Binance Coin':'Binance Coin Searches', 'Dogecoin':'Dogecoin Searches',
                                                                   'Litecoin':'Litecoin Searches'})

# Save the concatenated dataset
pk.dump(block_and_search_concat, open("binary\\block_and_search_concat.p", "wb" ))
block_and_search_concat.to_csv("csv\\block_and_search_concat.csv")

# Show the dataset
block_and_search_concat

In [None]:
block_and_search_concat = block_and_search_concat.describe()
block_and_search_concat.to_csv("csv\\price_and_block_concat_describe.csv")

In [None]:
# Load binary files
new_block_data = pk.load(open("binary\\new_block_data.p", "rb" ))
new_search_data_relative = pk.load(open("binary\\new_search_data_relative.p", "rb" ))

# Match the indexes
new_block_data = new_block_data.set_index(new_search_data_relative.index)
new_block_data.columns=['Height', 'Transaction Volume', 'Stripped Size', 'Size', 
                        'Weight', 'Avg. Transaction Fee', 'Block Reward', 'Block Reward Tips' ]

# Concatenate the block and search datasets
block_and_search_concat_relative = pd.concat([new_search_data_relative,new_block_data], axis=1)
# block_and_search_concat.drop(['isPartial'],inplace=True,axis=1)
block_and_search_concat_relative =  block_and_search_concat_relative.rename(columns={'Bitcoin':'Bitcoin Searches',
                                                                   'Ethereum':'Ethereum Searches',
                                                                   'Binance Coin':'Binance Coin Searches',
                                                                   'Dogecoin':'Dogecoin Searches',
                                                                   'Litecoin':'Litecoin Searches'
                                                                  })

# Save the concatenated dataset
pk.dump(block_and_search_concat_relative, open("binary\\block_and_search_concat_relative.p", "wb" ))
block_and_search_concat.to_csv("csv\\block_and_search_concat_relative.csv")

# Show the dataset
block_and_search_concat_relative

### Calculate the correlation between the data points

In [None]:
# Load binary files
block_and_search_concat = pk.load(open("binary\\block_and_search_concat.p", "rb" ))

# Show Correlation
block_and_search_corr = block_and_search_concat.corr(method='pearson')

# Saved the dataset
pk.dump(block_and_search_corr, open("binary\\block_and_search_corr.p", "wb" ))

# Show the dataset
block_and_search_corr

### Visualize the correlation as a heat map

In [None]:
# Import data
block_and_search_corr = pk.load(open("binary\\block_and_search_corr.p", "rb" ))

block_and_search_corr_temp = block_and_search_corr.drop(['Dogecoin Searches','Litecoin Searches',
                                                         'Ethereum Searches','Binance Coin Searches'], inplace=False, axis='index') 
block_and_search_corr_temp = block_and_search_corr_temp.drop(['Dogecoin Searches','Litecoin Searches',
                                                              'Ethereum Searches','Binance Coin Searches'], inplace=False, axis='columns') 

# Make the plot
plt.subplots(figsize= (20,12))
sns.heatmap(data=block_and_search_corr_temp,
            annot= True,
            fmt= '.1%',
            # linewidth=0.5
           )
            
# Add features
plt.title("Block & Search Data Correlation")

# Save the plot
plt.savefig("plots\\block_and_search_corr.jpg", dpi = 300)

### Min-Max Scale the data so we can better visualize the change over time

In [None]:
# Load binary files
block_and_search_concat = pk.load(open("binary\\block_and_search_concat.p", "rb" ))

# Scale the data
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0, 100))
scaled = min_max_scaler.fit_transform(block_and_search_concat)
block_and_search_scaled = pd.DataFrame(scaled, columns = block_and_search_concat.columns)
block_and_search_scaled = block_and_search_scaled.set_index(block_and_search_concat.index)

# Make adjustments
block_and_search_scaled['Time'] = [i for i in range(len(block_and_search_scaled.index))]

# Save the dataframe
pk.dump(block_and_search_scaled, open("binary\\block_and_search_scaled.p", "wb" ))

# Show the dataframe
block_and_search_scaled

### Visualize Search Data (Absolute)

In [None]:
# Import data
block_and_search_concat = pk.load(open("binary\\block_and_search_concat.p", "rb" ))

# Plot
plt.subplots(figsize= (20,12))

# Set theme
sns.set_theme(color_codes=True)

# Plot lines
sns.lineplot(x='date', 
            y='Bitcoin Searches', 
            data=block_and_search_concat,
            label='Bitcoin Searches')    

sns.lineplot(x='date',
            y='Ethereum Searches',
            data=block_and_search_concat,
            label='Ethereum Searches')    

sns.lineplot(x='date',
            y='Litecoin Searches',
            data=block_and_search_concat,
            label='Litecoin Searches')    

sns.lineplot(x='date',
            y='Dogecoin Searches',
            data=block_and_search_concat,
            label='Dogecoin Searches')    

# Add features
plt.title("Crypto Search Graph")
plt.xlabel('Time')
plt.ylabel('Searches')
plt.legend(loc='upper left')

# Save 
plt.savefig("plots\\crypto_search_lineplot.jpg", dpi = 300)

# Show data
plt.show()

### Visualize Search Data (Relative)

In [None]:
# Import data
block_and_search_concat_relative = pk.load(open("binary\\block_and_search_concat_relative.p", "rb" ))

# Plot
plt.subplots(figsize= (20,12))
sns.set_theme(color_codes=True)

# Plot lines
sns.lineplot(x='Height', 
            y='Bitcoin Searches', 
            data=block_and_search_concat,
            label='Bitcoin Searches')    

sns.lineplot(x='Height',
            y='Ethereum Searches',
            data=block_and_search_concat,
            label='Ethereum Searches')    

sns.lineplot(x='Height',
            y='Litecoin Searches',
            data=block_and_search_concat,
            label='Litecoin Searches')    

sns.lineplot(x='Height',
            y='Dogecoin Searches',
            data=block_and_search_concat,
            label='Dogecoin Searches')    

# Add features
plt.title("Crypto Search Graph")
plt.xlabel('Time')
plt.ylabel('y-axis')
plt.legend(loc='upper left')

# Save 
plt.savefig("plots\\crypto_search_lineplot_relative.jpg", dpi = 300)

# Show data
plt.show()

### Import and Prep New Block Data

In [None]:
# Import 
new_btc_block_data = pk.load(open("binary\\new_btc_data.p", "rb" ))

# Prep
new_btc_block_data['Time'] = [i for i in range(len(new_btc_block_data.index))]

print(list(new_btc_block_data.columns))

In [None]:
# Scale the data
min_max_scaler = preprocessing.MinMaxScaler(feature_range=(0, 100))
scaled = min_max_scaler.fit_transform(new_btc_block_data)
new_btc_block_data_scaled = pd.DataFrame(scaled, columns = new_btc_block_data.columns)
new_btc_block_data_scaled = new_btc_block_data_scaled.set_index(new_btc_block_data.index)


# Save the dataframe
pk.dump(new_btc_block_data_scaled, open("binary\\new_btc_block_data_scaled.p", "wb" ))

# Show the dataframe
new_btc_block_data_scaled

### Visualize Block Data (Scaled)

In [None]:
# Plot 
plt.subplots(figsize= (20,12))
sns.set_theme(color_codes=True)

sns.regplot(x='Time', 
            y='avg-block-size', 
            data=new_btc_block_data_scaled,
            scatter=False,
            label='Size',
            order=3)    

sns.regplot(x='Time',
            y='n-transactions-per-block',
            data=new_btc_block_data_scaled,
            scatter=False,
            label='Trans. Volume',
            order=3)    

sns.regplot(x='Time',
            y='transaction-fees',
            data=new_btc_block_data_scaled,
            scatter=False,
            label='Trans. Fees',
            order=3)    

# sns.regplot(x='Time',
#             y='transaction-fees-usd',
#             data=new_btc_block_data_scaled,
#             scatter=False,
#             label='Transaction Fees USD',
#             order=3)    

sns.regplot(x='Time',
            y='fee-per-transaction',
            data=new_btc_block_data_scaled,
            label='Fee Per Trans.',
            scatter=False,
            order=3)    

# sns.regplot(x='Time',
#             y='fee-per-transaction-usd',
#             data=new_btc_block_data_scaled,
#             label='Fee Per Transaction USD',
#             scatter=False,
#             order=3)    

            # linestyle='dashed'

# Add features
plt.title("New Block Data Regression Plot (Blockchain.com)")
plt.xlabel('Time')
plt.ylabel('Values')
plt.legend(loc='upper left')

# Save plot
plt.savefig("plots\\new_block_data_regression.jpg", dpi = 300)

In [None]:
# Plot 
plt.subplots(figsize= (20,12))
sns.set_theme(color_codes=True)

sns.regplot(x='Time', 
            y='avg-block-size', 
            data=new_btc_block_data_scaled,
            scatter=False,
            label='Size',
            order=3)    

sns.regplot(x='Time',
            y='n-transactions-per-block',
            data=new_btc_block_data_scaled,
            scatter=False,
            label='Transaction Volume',
            order=3)    

sns.regplot(x='Time',
            y='transaction-fees',
            data=new_btc_block_data_scaled,
            scatter=False,
            label='Transaction Fees BTC',
            order=3)    

sns.regplot(x='Time',
            y='transaction-fees-usd',
            data=new_btc_block_data_scaled,
            scatter=False,
            label='Transaction Fees USD',
            order=3)    

sns.regplot(x='Time',
            y='fee-per-transaction',
            data=new_btc_block_data_scaled,
            label='Fee Per Transaction BTC',
            scatter=False,
            order=3)    

# sns.regplot(x='Time',
#             y='fee-per-transaction-usd',
#             data=new_btc_block_data_scaled,
#             label='Fee Per Transaction USD',
#             scatter=False,
#             order=3)    

            # linestyle='dashed'

# Add features
plt.title("New Block Data Regression Plot (Blockchain.com)")
plt.xlabel('Time')
plt.ylabel('Values')
plt.legend(loc='upper left')

# Save plot
plt.savefig("plots\\new_block_data_regression_additional.jpg", dpi = 300)

In [None]:
# Import 
block_and_search_scaled = pk.load(open("binary\\block_and_search_scaled.p", "rb" ))

# Plot 
plt.subplots(figsize= (20,12))
sns.set_theme(color_codes=True)

sns.regplot(x='Height', 
            y='Size', 
            data=block_and_search_scaled,
            label='Size',
            scatter=False,
            order=3)    

sns.regplot(x='Height',
            y='Transaction Volume',
            data=block_and_search_scaled,
            label='Trans. Volume',
            scatter=False,
            order=3)    

# sns.regplot(x='Height',
#             y='Weight',
#             data=block_and_search_scaled,
#             label='Weight',
#             order=3)    

sns.regplot(x='Height',
            y='Block Reward Tips',
            data=block_and_search_scaled,
            label='Trans. Fees',
            scatter=False,
            order=3)    

sns.regplot(x='Height',
            y='Avg. Transaction Fee',
            data=block_and_search_scaled,
            label='Fee Per Trans.',
            scatter=False,
            order=3)    

# sns.regplot(x='Height',
#             y='Weight',
#             data=block_and_search_scaled,
#             label='Weight',
#             order=3)    
#             linestyle='dashed'

# Add features
plt.title("Block Data Regression Plot (Prof. Zhang)")
plt.xlabel('Time')
plt.ylabel('y-axis')
plt.legend(loc='upper left')

# Save plot
plt.savefig("plots\\block_data_regression.jpg", dpi = 300)

In [None]:
plt.subplots(figsize= (25,12))
sns.heatmap(block_and_search_corr_temp, annot= True, fmt= '.2%')
plt.title("Block & Search Data Correlation")
plt.savefig("plots\\block_and_search_corr.jpg", dpi = 300)

In [None]:
plt.subplots(figsize= (20,12))

sns.set_theme(color_codes=True)

sns.regplot(x='Height', 
            y='Size', 
            data=block_and_search_scaled,
            label='Size',
            scatter=False,
            order=3)    

sns.regplot(x='Height',
            y='Transaction Volume',
            data=block_and_search_scaled,
            label='Trans. Volume',
            scatter=False,
            order=3)    

sns.regplot(x='Height',
            y='Bitcoin Searches',
            data=block_and_search_scaled,
            label='Bitcoin Searches',
            scatter=False,
            ci=True,
            order=3)    

sns.regplot(x='Height',
            y='Ethereum Searches',
            data=block_and_search_scaled,
            label='Ethereum Searches',
            scatter=False,
            ci=True,
            order=3)    

# sns.lineplot(x='Height',
#             y='Bitcoin',
#             data=block_and_search_scaled,
#             label='Bitcoin',
#             dashes=True)    

# sns.lineplot(x='Height',
#             y='Ethereum',
#             data=block_and_search_scaled,
#             label='Ethereum',
#             linestyle="dashed",
#             )    

plt.title("Block Data Regression")
plt.xlabel('Time')
plt.ylabel('y-axis')
# plt.ylim([0, 100])

plt.legend(loc='upper left')

# Save plot
plt.savefig("plots\\block_and_search_data_regression.jpg", dpi = 300)
plt.show()

In [None]:
plt.subplots(figsize= (20,12))

sns.set_theme(color_codes=True)

sns.regplot(x='Height', 
            y='Size', 
            data=block_and_search_scaled,
            label='Size',
            scatter=False,
            order=3)    

sns.regplot(x='Height',
            y='Transaction Volume',
            data=block_and_search_scaled,
            label='Trans. Volume',
            scatter=False,
            order=3)    

sns.lineplot(x='Height',
            y='Bitcoin Searches',
            data=block_and_search_scaled,
            label='Bitcoin Searches',
            linestyle="dashed")

sns.lineplot(x='Height',
            y='Ethereum Searches',
            data=block_and_search_scaled,
            label='Ethereum Searches',
            linestyle="dashed")    

plt.title("Block Data Regression")
plt.xlabel('Time')
plt.ylabel('y-axis')
# plt.ylim([0, 100])

plt.legend(loc='upper left')

# Save plot
plt.savefig("plots\\block_and_search_data_regression_and_line.jpg", dpi = 300)
plt.show()