# Cryptocurrency Analytics

## Import Libraries

In [1]:
# Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import networkx as nx
from pyvis import network as net
from urllib.request import Request, urlopen
from bs4 import BeautifulSoup as soup
import time
from IPython.display import display
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings("ignore")


## Graph Network

### Prepare Data

In [12]:
# Load data from balances
transactional_data=pd.read_csv('C:/Users/JCRG/Desktop/Master/Dissertation/Data/Transactional_Network_Ether.csv')
transactional_data.head()

Unnamed: 0,from_address,to_address,block_timestamp,value_ether
0,0x7ac34681f6aaeb691e150c43ee494177c0e2c183,0x7fb6fefe6c524d9cb06025583c0ad59aad2f6f5b,2018-07-11 11:57:20 UTC,1.0
1,0xf1cf296c11d3b010e6dd3d48b6c5aaed613e96f1,0x2ec2c8729da460eb9743e86c3d16713f6aa47325,2018-07-11 01:53:49 UTC,3.04768
2,0x8e04af7f7c76daa9ab429b1340e0327b5b835748,0x0000000000085d4780b73119b644ae5ecd22b376,2019-08-20 14:27:26 UTC,0.0
3,0x8f6147a13514d7cc7c1c85bc4fad8f7f1bc63b57,0x0d8775f648430679a709e98d2b0cb6250d2887ef,2019-05-30 03:04:18 UTC,0.0
4,0x7b6f730249555a85d39327b90f0080e9dc63daff,0x1cb9ede1a43ab0265d6c44be4f3db554abf822c5,2019-05-30 13:17:56 UTC,250.0


### Generate Network Output

In [13]:
# Get nametag with the addresses from Etherscan through crawler. This is perform to identify easier which are the names of the addresses, if available.
def get_nametag(address):
    #Make API request to Etherscan
    url = 'https://etherscan.io/address/%s'%(address) # Initiate with the first page of accounts with 100 records
    req = Request(url,headers={'User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36'})   # Etherscan restricts the access for Webscrapping, reason why headers needs to be stated
    response = urlopen(req, timeout=500).read() # Read response
    page_soup = soup(response, "html.parser") # Parse HTML response
    page_soup.find('title')
    name=page_soup.find('title')
    name=str(name)
    name=((name.split('|')[0])[10:])[:-1]
    time.sleep(1) # Etherscan restricts the access if the requests are perform too fast. The function delays the code by two seconds
    return name

In [15]:
# This functions creates a dataframe with the addresses that have the greatest 'IN' degree in the network.
def network(transactional_data):
    # Prepare data for network. Group by sender and receiver and count the number of transactions
    network_data_grouped=transactional_data[['from_address','to_address','value_ether']].groupby(['from_address','to_address']).count()
    network_data_grouped.reset_index(inplace=True)
    network_data_grouped.sort_values(by=['value_ether'],ascending=False,inplace=True)
    # Create object with network data
    G = nx.from_pandas_edgelist(network_data_grouped,
                            source='from_address',
                            target='to_address',
                            create_using=nx.DiGraph)
    # Get the nodes where the whales are transacting with.
    df=pd.DataFrame(G.in_degree()).sort_values(1,ascending=False)
    df.columns=['Address','Count']
    # Select the top 100 addresses
    df=df[:100]
    # Create column with nametag
    df['tag'] = df['Address'].apply(get_nametag)
    return df

df=network(transactional_data)
df.head()

Unnamed: 0,Address,Count,tag
1,0xdac17f958d2ee523a2206206994597c13d831ec7,1166,Tether: USDT Stablecoin
4,0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48,992,Centre: USD Coin
66,0x6b175474e89094c44da98b954eedeac495271d0f,550,Maker: Dai Stablecoin
81,0xd26114cd6ee289accf82350c8d8487fedb8a0c07,532,OMG Network: OMG Token
39,0x7a250d5630b4cf539739df2c5dacb4c659f2488d,458,Uniswap V2: Router 2


## Insider Trading

### Import Listing Dates

In [114]:
# Import Listings from Binance
listings=pd.read_csv('Listing_Dates.csv')
listings=df.merge(listings,on='tag',how='left')
listings.columns=['to_address','count','tag','listing_date']
listings[(listings['listing_date']!='Not listed')&(~listings['listing_date'].isna())]

Unnamed: 0,to_address,count,tag,listing_date
6,0x1f9840a85d5af5bf1d1762f925bdaddc4201f984,398,Uniswap Protocol: UNI token,17-Sep-20
7,0x514910771af9ca656af840dff83e8264ecf986ca,341,Chainlink: LINK Token,16-Jan-19
10,0x2260fac5e5542a773aa44fbcfedf7c193bc2c599,311,Wrapped BTC: WBTC Token,24-Jun-22
11,0x0d8775f648430679a709e98d2b0cb6250d2887ef,278,Basic Attention: BAT Token,4-Mar-19
12,0x6b3595068778dd592e39a122f4f5a5cf09c90fe2,272,SushiSwap: SUSHI Token,1-Sep-20
13,0xd533a949740bb3306d119cc777fa900ba034cd52,268,Curve.fi: CRV Token,15-Aug-20
14,0x7fc66500c84a76ad7e9c93437bfc5ac33e2ddae9,263,Aave: AAVE Token,15-Oct-20
17,0xc00e94cb662c3520282e6f5717214004a7f26888,252,Compound: COMP Token,25-Jun-19
18,0x7d1afa7b718fb893db30a3abc0cfc608aacfebb0,248,Polygon (Matic): Matic Token,26-Apr-19
21,0x0bc529c00c6401aef6d220be8c6ea1667f6ad93e,225,Ygov.finance: YFI Token,10-Aug-20


### Defining Function

In [45]:
# This functions creates a dataframe with the transactions that could have been done by insider trading.
def insider_trading(transactional_data,listings):
    # Merge data with the transactional data from addresses and filter out the addresses that are not in the listings
    df2=transactional_data.merge(listings,on='to_address')
    df2=df2[(df2['listing_date']!='Not listed')&(~df2['listing_date'].isna())]
    # Configure time format for listing date
    df2['listing_date']=pd.to_datetime(df2['listing_date'],infer_datetime_format=True)
    # Configure time format for block_timestamp date
    df2['block_timestamp']=pd.to_datetime(df2['block_timestamp'],infer_datetime_format=True)
    df2['block_timestamp']=df2['block_timestamp'].dt.date
    df2['block_timestamp']=pd.to_datetime(df2['block_timestamp'],infer_datetime_format=True)

    # Create column with True & False for the difference between the listing date and the block timestamp. This are the transactions that has been done before 30 days of the listing in Binance.
    df2['date_before_listing']=(df2['block_timestamp'] < (df2['listing_date']-timedelta(days=30)))
    # Create column with True & False for transactions that has been done between the two stated dates, which are between the listing date and 30 days before it.
    df2['date_between_listing']=(df2['block_timestamp'] > (df2['listing_date']-timedelta(days=30))) & (df2['block_timestamp'] < df2['listing_date'])
    # Create column with key of address and tag for future filtering
    df2['key']=df2[['from_address','tag']].agg('-'.join,axis=1)

    # Create dataframe with 'from_address' and 'to_address' to filter 'from_address' that have had transactions 30 days before the listing in Binance.
    previous_transactions_true=df2[df2['date_before_listing']==True].groupby(['from_address','tag']).nunique()
    previous_transactions_true=previous_transactions_true.reset_index()
    previous_transactions_true=previous_transactions_true[['from_address','tag']]
    # Create column with key of address and tag for future filtering
    previous_transactions_true['key']=previous_transactions_true[['from_address','tag']].agg('-'.join,axis=1)

    # Create dataframe with filter of transactional data and listings
    df3 = df2[~df2.key.isin(list(previous_transactions_true['key']))]
    df3=df3[df3['date_between_listing']==True]

    # return df3
    return df3

# Save output in variable
df_insider_trading=insider_trading(transactional_data,listings)
df_insider_trading.head()

Unnamed: 0,from_address,to_address,block_timestamp,value_ether,count,tag,listing_date,date_before_listing,date_between_listing,key
7685,0x70cd5f374f496b299e7b3164eba4b32568a5cb79,0x0d8775f648430679a709e98d2b0cb6250d2887ef,2019-03-02,0.0,278,Basic Attention: BAT Token,2019-03-04,False,True,0x70cd5f374f496b299e7b3164eba4b32568a5cb79-Bas...
1184706,0xa205fd7344656c72fdc645b72faf5a3de0b3e825,0x2260fac5e5542a773aa44fbcfedf7c193bc2c599,2022-06-15,0.0,311,Wrapped BTC: WBTC Token,2022-06-24,False,True,0xa205fd7344656c72fdc645b72faf5a3de0b3e825-Wra...
1184708,0xa205fd7344656c72fdc645b72faf5a3de0b3e825,0x2260fac5e5542a773aa44fbcfedf7c193bc2c599,2022-06-15,0.0,311,Wrapped BTC: WBTC Token,2022-06-24,False,True,0xa205fd7344656c72fdc645b72faf5a3de0b3e825-Wra...
1184908,0xa205fd7344656c72fdc645b72faf5a3de0b3e825,0x2260fac5e5542a773aa44fbcfedf7c193bc2c599,2022-06-15,0.0,311,Wrapped BTC: WBTC Token,2022-06-24,False,True,0xa205fd7344656c72fdc645b72faf5a3de0b3e825-Wra...
1185748,0x3b69035f18e923ad2144f775a6156931f9b15d2d,0x2260fac5e5542a773aa44fbcfedf7c193bc2c599,2022-06-21,0.0,311,Wrapped BTC: WBTC Token,2022-06-24,False,True,0x3b69035f18e923ad2144f775a6156931f9b15d2d-Wra...


### Generate Results

In [None]:
df_insider_trading_detail = df_insider_trading.groupby(['from_address','tag']).agg({'tag':'count'}).rename(columns={'tag':'count'}).reset_index().sort_values(by=['count'],ascending=False)
df_insider_trading_detail

In [108]:
df_insider_trading_addresses=pd.DataFrame(df_insider_trading.groupby(['from_address']).tag.value_counts().groupby(level=0).count().sort_values(ascending=False))
df_insider_trading_addresses=df_insider_trading_addresses[df_insider_trading_addresses['tag']>3]
df_insider_trading_addresses

Unnamed: 0_level_0,tag
from_address,Unnamed: 1_level_1
0x024bcbcaad82e67f721799e259ca60bc7d363419,7
0x0c5a2c72c009252f0e7312f5a1ab87de02be6fbe,6
0xcfc50541c3deaf725ce738ef87ace2ad778ba0c5,5
0x7ac34681f6aaeb691e150c43ee494177c0e2c183,5
0xecb6a3e0e99700b32bb03ba14727d99fe8e538cf,5
0x187e3534f461d7c59a7d6899a983a5305b48f93f,5
0xa4517a2b21f85f6a6a63601cea7fa4e34da92856,4
0x22fa8cc33a42320385cbd3690ed60a021891cb32,4
0x49a2dcc237a65cc1f412ed47e0594602f6141936,4
0x31a47094c6325d357c7331c621d6768ba041916e,4


In [110]:
# Print a df with the tokens with more addresses in insider trading timeframe
df_coins_insider_trading=pd.DataFrame(df_insider_trading_addresses
                                        .merge(df_insider_trading,on='from_address',how='left')
                                        .groupby(['from_address']).tag_y.value_counts()
                                        .groupby(level=1)
                                        .count()
                                        .sort_values(ascending=False)
                                        ).rename(columns={'tag_y':'count'}).reset_index()
df_coins_insider_trading

Unnamed: 0,tag_y,count
0,Curve.fi: CRV Token,14
1,Ygov.finance: YFI Token,14
2,SushiSwap: SUSHI Token,10
3,Compound: Comptroller,9
4,Balancer: BAL Token,5
5,Aave: AAVE Token,4
6,UMA: UMA Token,4
7,ENS: ENS Token,3
8,Synthetix: Proxy SNX Token,2
9,Abracadabra.money: SPELL Token,1
