<a href="https://colab.research.google.com/github/sunshineluyao/UTXO/blob/main/UTXO_data_analysis_Task_1_with_extended_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import pandas as pd
import datetime

In [None]:
!pip install chart_studio

Collecting chart_studio
[?25l  Downloading https://files.pythonhosted.org/packages/ca/ce/330794a6b6ca4b9182c38fc69dd2a9cbff60fd49421cb8648ee5fee352dc/chart_studio-1.1.0-py3-none-any.whl (64kB)
[K     |█████                           | 10kB 14.4MB/s eta 0:00:01[K     |██████████▏                     | 20kB 14.7MB/s eta 0:00:01[K     |███████████████▎                | 30kB 7.7MB/s eta 0:00:01[K     |████████████████████▍           | 40kB 3.1MB/s eta 0:00:01[K     |█████████████████████████▍      | 51kB 3.8MB/s eta 0:00:01[K     |██████████████████████████████▌ | 61kB 4.3MB/s eta 0:00:01[K     |████████████████████████████████| 71kB 3.1MB/s 
Installing collected packages: chart-studio
Successfully installed chart-studio-1.1.0


## Import Data from Google Drive and Data Wrangling

In [None]:
# Importing drive method from colab for accessing google drive
from google.colab import drive

# Mounting drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import numpy as np
import pandas as pd
df_2010=pd.read_csv('/content/drive/My Drive/joint_2010.csv')
df_2010.head()

Unnamed: 0,num,value,block_date,spent_block_date
0,21553,5000000000.0,1/9/2009,1/12/2009
1,1,5000000000.0,1/9/2009,
2,2,5000000000.0,1/9/2009,
3,3,5000000000.0,1/9/2009,
4,4,5000000000.0,1/9/2009,


In [None]:
df_2011=pd.read_csv('/content/drive/My Drive/joint_2011.csv')
df_2011.tail()

Unnamed: 0,num,value,block_date,spent_block_date
1048570,415429,8496683.0,8/6/2011,8/8/2011
1048571,415965,120000000.0,8/6/2011,8/8/2011
1048572,416087,17000000.0,8/6/2011,8/8/2011
1048573,416508,58844100.0,8/6/2011,8/8/2011
1048574,420271,1222000000.0,8/6/2011,8/8/2011


In [None]:
merge = [df_2010, df_2011]
df = pd.concat(merge)
df.head()

Unnamed: 0,num,value,block_date,spent_block_date
0,21553,5000000000.0,1/9/2009,1/12/2009
1,1,5000000000.0,1/9/2009,
2,2,5000000000.0,1/9/2009,
3,3,5000000000.0,1/9/2009,
4,4,5000000000.0,1/9/2009,


In [None]:
# Generate the UTXO value in bitcoin unit, which = $value/10^{8}$
df['UTXO'] = df['value']*10**(-8)
df = df.reset_index()
df = df.drop(['value','index','num'], axis = 1)
df.tail()

Unnamed: 0,block_date,spent_block_date,UTXO
1186094,8/6/2011,8/8/2011,0.084967
1186095,8/6/2011,8/8/2011,1.2
1186096,8/6/2011,8/8/2011,0.17
1186097,8/6/2011,8/8/2011,0.588441
1186098,8/6/2011,8/8/2011,12.22


In [None]:
# Change the block_date and spent_block_date to datatime object
df['block_date'] = pd.to_datetime(df['block_date'], format='%m/%d/%Y')
df['spent_block_date'] = pd.to_datetime(df['spent_block_date'], format='%m/%d/%Y')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1186099 entries, 0 to 1186098
Data columns (total 3 columns):
 #   Column            Non-Null Count    Dtype         
---  ------            --------------    -----         
 0   block_date        1186099 non-null  datetime64[ns]
 1   spent_block_date  1164547 non-null  datetime64[ns]
 2   UTXO              1186099 non-null  float64       
dtypes: datetime64[ns](2), float64(1)
memory usage: 27.1 MB


# Task 1: Calculate Daily UTXO


 For each date, calculate the total UTXO generated on that date

In [None]:
#sort the data by blockdate
df = df.sort_values(by = 'block_date')
df.reset_index(drop = True, inplace = True)
df.tail()

Unnamed: 0,block_date,spent_block_date,UTXO
1186094,2011-08-06,2011-08-07,137.097721
1186095,2011-08-06,2011-08-07,0.029247
1186096,2011-08-06,2011-08-07,0.53
1186097,2011-08-06,2011-08-07,0.091483
1186098,2011-08-06,2011-08-08,12.22


In [None]:
### use reset_index() to keep the grouping variable as a column
df_newborn = df.groupby(df['block_date']).sum()['UTXO'].reset_index()
df_newborn.rename(columns={"UTXO": "UTXO_newborn"}, inplace=True)
df_newborn.tail()

Unnamed: 0,block_date,UTXO_newborn
935,2011-08-02,85296.723708
936,2011-08-03,113896.630558
937,2011-08-04,88667.639105
938,2011-08-05,147648.11074
939,2011-08-06,77115.643571


In [None]:
df_newborn.to_csv('/content/drive/My Drive/df_newborn.csv')

 For each date, calculate the total UTXO spent on that date

In [None]:
#sort the data by spent_block_date
df = df.sort_values(by = 'spent_block_date')
df.reset_index(drop = True, inplace = True)
df.head()

Unnamed: 0,block_date,spent_block_date,UTXO
0,2009-01-09,2009-01-12,50.0
1,2009-01-12,2009-01-12,29.0
2,2009-01-12,2009-01-12,1.0
3,2009-01-12,2009-01-12,40.0
4,2009-01-12,2009-01-12,28.0


In [None]:
### use reset_index() to keep the grouping variable as a column
df_dead = df.groupby(df['spent_block_date']).sum()['UTXO'].reset_index()
df_dead.rename(columns={"UTXO": "UTXO_dead"}, inplace=True)
df_dead.tail()

Unnamed: 0,spent_block_date,UTXO_dead
3791,2020-10-11,1050.0
3792,2020-10-14,64.859446
3793,2020-10-15,0.487548
3794,2020-10-16,5.51
3795,2020-10-17,50.0


In [None]:
df_dead.to_csv('/content/drive/My Drive/df_dead.csv')

 Merge the two dataframe to get the time series for newborn and dead UTXO for each date from 2009-01-12 to 2011-08-06

In [None]:
np.size(pd.date_range(start='2009-01-09', end='2011-08-06'))

940

In [None]:
df_UTXO = pd.DataFrame(np.zeros((940, 1)))
df_UTXO.columns=['date']
df_UTXO['date']=pd.date_range(start='2009-01-09', end='2011-08-06')
df_UTXO.tail()

Unnamed: 0,date
935,2011-08-02
936,2011-08-03
937,2011-08-04
938,2011-08-05
939,2011-08-06


In [None]:
### merge
df_UTXO=df_UTXO.merge(df_newborn, how='left',left_on='date',right_on='block_date')
df_UTXO.head()

Unnamed: 0,date,block_date,UTXO_newborn
0,2009-01-09,2009-01-09,700.0
1,2009-01-10,2009-01-10,3050.0
2,2009-01-11,2009-01-11,4650.0
3,2009-01-12,2009-01-12,4879.0
4,2009-01-13,2009-01-13,6150.0


In [None]:
### merge
df_UTXO=df_UTXO.merge(df_dead, how='left',left_on='date',right_on='spent_block_date')
df_UTXO.head()

Unnamed: 0,date,block_date,UTXO_newborn,spent_block_date,UTXO_dead
0,2009-01-09,2009-01-09,700.0,NaT,
1,2009-01-10,2009-01-10,3050.0,NaT,
2,2009-01-11,2009-01-11,4650.0,NaT,
3,2009-01-12,2009-01-12,4879.0,2009-01-12,179.0
4,2009-01-13,2009-01-13,6150.0,NaT,


In [None]:
df_UTXO = df_UTXO.drop(['block_date','spent_block_date'], axis = 1)

In [None]:
df_UTXO=df_UTXO[['date','UTXO_newborn','UTXO_dead']].fillna(0)
df_UTXO.head()

Unnamed: 0,date,UTXO_newborn,UTXO_dead
0,2009-01-09,700.0,0.0
1,2009-01-10,3050.0,0.0
2,2009-01-11,4650.0,0.0
3,2009-01-12,4879.0,179.0
4,2009-01-13,6150.0,0.0


Calculate the Daily Net new UTXO =UTXO_newborn-UTXO_dead

In [None]:
df_UTXO['Net_New']=df_UTXO['UTXO_newborn']-df_UTXO['UTXO_dead']
df_UTXO.head()

Unnamed: 0,date,UTXO_newborn,UTXO_dead,Net_New
0,2009-01-09,700.0,0.0,700.0
1,2009-01-10,3050.0,0.0,3050.0
2,2009-01-11,4650.0,0.0,4650.0
3,2009-01-12,4879.0,179.0,4700.0
4,2009-01-13,6150.0,0.0,6150.0


Calculate the Accumulated Sum of Net new UTXO to get the total UTXO not spent for each date

In [None]:
df_UTXO['UTXO_Cum']=df_UTXO['Net_New'].cumsum()
df_UTXO.head()

Unnamed: 0,date,UTXO_newborn,UTXO_dead,Net_New,UTXO_Cum
0,2009-01-09,700.0,0.0,700.0,700.0
1,2009-01-10,3050.0,0.0,3050.0,3750.0
2,2009-01-11,4650.0,0.0,4650.0,8400.0
3,2009-01-12,4879.0,179.0,4700.0,13100.0
4,2009-01-13,6150.0,0.0,6150.0,19250.0


In [None]:
df_UTXO.columns

Index(['date', 'UTXO_newborn', 'UTXO_dead', 'Net_New', 'UTXO_Cum'], dtype='object')

In [None]:
df_UTXO.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 940 entries, 0 to 939
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   date          940 non-null    datetime64[ns]
 1   UTXO_newborn  940 non-null    float64       
 2   UTXO_dead     940 non-null    float64       
 3   Net_New       940 non-null    float64       
 4   UTXO_Cum      940 non-null    float64       
dtypes: datetime64[ns](1), float64(4)
memory usage: 44.1 KB


In [None]:
import plotly.graph_objects as go
from plotly.offline import iplot

# Create traces
fig0 = go.Figure()
fig0.add_trace(go.Scatter(x=df_UTXO.date, y=df_UTXO.UTXO_newborn,
                    mode='lines',
                    name='UTXO Newborn'))

fig0.add_trace(go.Scatter(x=df_UTXO.date, y=df_UTXO.UTXO_dead,
                    mode='lines+markers',
                    name='UTXO Dead'))

fig0.add_trace(go.Scatter(x=df_UTXO.date, y=df_UTXO.Net_New,
                    mode='markers', name='UTXO Netnew'))
fig0.update_layout(title='Daily Newborn and Dead UTXO',
                   xaxis_title='Date',
                   yaxis_title='UTXO')
fig0.update_yaxes(type="log")


# Add range slider
fig0.update_layout(
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                     label="1m",
                     step="month",
                     stepmode="backward"),
                dict(count=6,
                     label="6m",
                     step="month",
                     stepmode="backward"),
                dict(count=1,
                     label="YTD",
                     step="year",
                     stepmode="todate"),
                dict(count=1,
                     label="1y",
                     step="year",
                     stepmode="backward"),
                dict(step="all")
            ])
        ),
        rangeslider=dict(
            visible=True
        ),
        type="date"
    )
)

iplot(fig0)

In [None]:
import plotly.graph_objects as go
from plotly.offline import iplot

# Create traces
fig1 = go.Figure()
fig1.add_trace(go.Scatter(x=df_UTXO.date, y=df_UTXO.UTXO_Cum,
                    mode='markers', name='UTXO Cum'))
fig1.update_layout(title='The Accumulated UTXO',
                   xaxis_title='Date',
                   yaxis_title='UTXO')
# Add range slider
fig1.update_layout(
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                     label="1m",
                     step="month",
                     stepmode="backward"),
                dict(count=6,
                     label="6m",
                     step="month",
                     stepmode="backward"),
                dict(count=1,
                     label="YTD",
                     step="year",
                     stepmode="todate"),
                dict(count=1,
                     label="1y",
                     step="year",
                     stepmode="backward"),
                dict(step="all")
            ])
        ),
        rangeslider=dict(
            visible=True
        ),
        type="date"
    )
)

iplot(fig1)