<a href="https://colab.research.google.com/github/sunshineluyao/UTXO/blob/main/UTXO_data_analysis_Task_1_START_2020_11_17.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [77]:
import numpy as np
import pandas as pd
import datetime

In [78]:
!pip install chart_studio



## Import Data from Google Drive and Data Wrangling

In [79]:
# Importing drive method from colab for accessing google drive
from google.colab import drive

# Mounting drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [80]:
import numpy as np
import pandas as pd
df_2010=pd.read_csv('/content/drive/My Drive/UTXO/joint_2010.csv',index_col='Unnamed: 0')
df_2010.head()

Unnamed: 0,value,block_date,spent_block_date
0,5000000000,2009-01-03,
21553,5000000000,2009-01-09,2009-01-12
1,5000000000,2009-01-09,
2,5000000000,2009-01-09,
3,5000000000,2009-01-09,


In [81]:
df=df_2010

In [82]:
# Generate the UTXO value in bitcoin unit, which = $value/10^{8}$
df['UTXO'] = df['value']*10**(-8)
df = df.reset_index(drop=True)
df = df.drop(['value'], axis = 1)
df.head()

Unnamed: 0,block_date,spent_block_date,UTXO
0,2009-01-03,,50.0
1,2009-01-09,2009-01-12,50.0
2,2009-01-09,,50.0
3,2009-01-09,,50.0
4,2009-01-09,,50.0


In [83]:
# Change the block_date and spent_block_date to datatime object
df['block_date'] = pd.to_datetime(df['block_date'], format='%Y-%m-%d')
df['spent_block_date'] = pd.to_datetime(df['spent_block_date'], format='%Y-%m-%d')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 137525 entries, 0 to 137524
Data columns (total 3 columns):
 #   Column            Non-Null Count   Dtype         
---  ------            --------------   -----         
 0   block_date        137525 non-null  datetime64[ns]
 1   spent_block_date  115972 non-null  datetime64[ns]
 2   UTXO              137525 non-null  float64       
dtypes: datetime64[ns](2), float64(1)
memory usage: 3.1 MB


# Task 1: Calculate Daily UTXO


 For each date, calculate the total UTXO generated on that date

In [84]:
#sort the data by blockdate
df = df.sort_values(by = 'block_date')
df.reset_index(drop = True, inplace = True)
df.tail()

Unnamed: 0,block_date,spent_block_date,UTXO
137520,2010-12-31,2011-01-05,50.04
137521,2010-12-31,2011-01-05,11.93
137522,2010-12-31,2011-01-05,0.04
137523,2010-12-31,2011-01-04,0.01
137524,2010-12-31,2019-10-16,0.05


In [85]:
### use reset_index() to keep the grouping variable as a column
df_newborn = df.groupby(df['block_date']).sum()['UTXO'].reset_index()
df_newborn.rename(columns={"UTXO": "UTXO_newborn"}, inplace=True)
df_newborn.tail()

Unnamed: 0,block_date,UTXO_newborn
718,2010-12-27,58656.806641
719,2010-12-28,30374.153958
720,2010-12-29,86575.966836
721,2010-12-30,38568.962386
722,2010-12-31,29212.393351


 For each date, calculate the total UTXO spent on that date

In [86]:
#sort the data by spent_block_date
df = df.sort_values(by = 'spent_block_date')
df.reset_index(drop = True, inplace = True)
df.head()

Unnamed: 0,block_date,spent_block_date,UTXO
0,2009-01-09,2009-01-12,50.0
1,2009-01-12,2009-01-12,29.0
2,2009-01-12,2009-01-12,1.0
3,2009-01-12,2009-01-12,40.0
4,2009-01-12,2009-01-12,28.0


In [87]:
### use reset_index() to keep the grouping variable as a column
df_dead = df.groupby(df['spent_block_date']).sum()['UTXO'].reset_index()
df_dead.rename(columns={"UTXO": "UTXO_dead"}, inplace=True)
df_dead.tail()

Unnamed: 0,spent_block_date,UTXO_dead
2115,2020-09-22,50.0
2116,2020-10-01,50.0
2117,2020-10-11,1050.0
2118,2020-10-14,50.0
2119,2020-10-17,50.0


 Merge the two dataframe to get the time series for newborn and dead UTXO for each date from 2009-01-12 to 2011-08-06

In [88]:
size=np.size(pd.date_range(start='2009-01-09', end='2011-08-06'))

In [89]:
df_UTXO = pd.DataFrame(np.zeros((size, 1)))
df_UTXO.columns=['date']
df_UTXO['date']=pd.date_range(start='2009-01-09', end='2011-08-06')
df_UTXO.tail()

Unnamed: 0,date
935,2011-08-02
936,2011-08-03
937,2011-08-04
938,2011-08-05
939,2011-08-06


In [90]:
### merge
df_UTXO=df_UTXO.merge(df_newborn, how='left',left_on='date',right_on='block_date')
df_UTXO.head()

Unnamed: 0,date,block_date,UTXO_newborn
0,2009-01-09,2009-01-09,700.0
1,2009-01-10,2009-01-10,3050.0
2,2009-01-11,2009-01-11,4650.0
3,2009-01-12,2009-01-12,4879.0
4,2009-01-13,2009-01-13,6150.0


In [91]:
### merge
df_UTXO=df_UTXO.merge(df_dead, how='left',left_on='date',right_on='spent_block_date')
df_UTXO.head()

Unnamed: 0,date,block_date,UTXO_newborn,spent_block_date,UTXO_dead
0,2009-01-09,2009-01-09,700.0,NaT,
1,2009-01-10,2009-01-10,3050.0,NaT,
2,2009-01-11,2009-01-11,4650.0,NaT,
3,2009-01-12,2009-01-12,4879.0,2009-01-12,179.0
4,2009-01-13,2009-01-13,6150.0,NaT,


In [92]:
df_UTXO = df_UTXO.drop(['block_date','spent_block_date'], axis = 1)

In [93]:
df_UTXO=df_UTXO[['date','UTXO_newborn','UTXO_dead']].fillna(0)
df_UTXO.head()

Unnamed: 0,date,UTXO_newborn,UTXO_dead
0,2009-01-09,700.0,0.0
1,2009-01-10,3050.0,0.0
2,2009-01-11,4650.0,0.0
3,2009-01-12,4879.0,179.0
4,2009-01-13,6150.0,0.0


Calculate the Daily Net new UTXO =UTXO_newborn-UTXO_dead

In [94]:
df_UTXO['Net_New']=df_UTXO['UTXO_newborn']-df_UTXO['UTXO_dead']
df_UTXO.head()

Unnamed: 0,date,UTXO_newborn,UTXO_dead,Net_New
0,2009-01-09,700.0,0.0,700.0
1,2009-01-10,3050.0,0.0,3050.0
2,2009-01-11,4650.0,0.0,4650.0
3,2009-01-12,4879.0,179.0,4700.0
4,2009-01-13,6150.0,0.0,6150.0


Calculate the Accumulated Sum of Net new UTXO to get the total UTXO not spent for each date

In [95]:
df_UTXO['UTXO_Cum']=df_UTXO['Net_New'].cumsum()
df_UTXO.head()

Unnamed: 0,date,UTXO_newborn,UTXO_dead,Net_New,UTXO_Cum
0,2009-01-09,700.0,0.0,700.0,700.0
1,2009-01-10,3050.0,0.0,3050.0,3750.0
2,2009-01-11,4650.0,0.0,4650.0,8400.0
3,2009-01-12,4879.0,179.0,4700.0,13100.0
4,2009-01-13,6150.0,0.0,6150.0,19250.0


In [96]:
df_UTXO.columns

Index(['date', 'UTXO_newborn', 'UTXO_dead', 'Net_New', 'UTXO_Cum'], dtype='object')

In [97]:
df_UTXO.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 940 entries, 0 to 939
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   date          940 non-null    datetime64[ns]
 1   UTXO_newborn  940 non-null    float64       
 2   UTXO_dead     940 non-null    float64       
 3   Net_New       940 non-null    float64       
 4   UTXO_Cum      940 non-null    float64       
dtypes: datetime64[ns](1), float64(4)
memory usage: 44.1 KB


In [98]:
import plotly.graph_objects as go
from plotly.offline import iplot

# Create traces
fig0 = go.Figure()
fig0.add_trace(go.Scatter(x=df_UTXO.date, y=df_UTXO.UTXO_newborn,
                    mode='lines',
                    name='UTXO Newborn'))

fig0.add_trace(go.Scatter(x=df_UTXO.date, y=df_UTXO.UTXO_dead,
                    mode='lines+markers',
                    name='UTXO Dead'))

fig0.add_trace(go.Scatter(x=df_UTXO.date, y=df_UTXO.Net_New,
                    mode='markers', name='UTXO Netnew'))
fig0.update_layout(title='Daily Newborn and Dead UTXO',
                   xaxis_title='Date',
                   yaxis_title='UTXO')
fig0.update_yaxes(type="log")


# Add range slider
fig0.update_layout(
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                     label="1m",
                     step="month",
                     stepmode="backward"),
                dict(count=6,
                     label="6m",
                     step="month",
                     stepmode="backward"),
                dict(count=1,
                     label="YTD",
                     step="year",
                     stepmode="todate"),
                dict(count=1,
                     label="1y",
                     step="year",
                     stepmode="backward"),
                dict(step="all")
            ])
        ),
        rangeslider=dict(
            visible=True
        ),
        type="date"
    )
)

iplot(fig0)

In [99]:
import plotly.graph_objects as go
from plotly.offline import iplot

# Create traces
fig1 = go.Figure()
fig1.add_trace(go.Scatter(x=df_UTXO.date, y=df_UTXO.UTXO_Cum,
                    mode='markers', name='UTXO Cum'))
fig1.update_layout(title='The Accumulated UTXO',
                   xaxis_title='Date',
                   yaxis_title='UTXO')
# Add range slider
fig1.update_layout(
    xaxis=dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                     label="1m",
                     step="month",
                     stepmode="backward"),
                dict(count=6,
                     label="6m",
                     step="month",
                     stepmode="backward"),
                dict(count=1,
                     label="YTD",
                     step="year",
                     stepmode="todate"),
                dict(count=1,
                     label="1y",
                     step="year",
                     stepmode="backward"),
                dict(step="all")
            ])
        ),
        rangeslider=dict(
            visible=True
        ),
        type="date"
    )
)

iplot(fig1)

# Save the Datasets

### 1. The Datasets for Final Products

In [100]:
df_UTXO.to_csv('/content/drive/My Drive/UTXO/df_UTXO.csv')

### 2. The Datasets for calculating Final Products in the future dates

Note:  We need the dead dataset  for date beyond the current range 2010-12-31

In [102]:
df_dead.tail()

Unnamed: 0,spent_block_date,UTXO_dead
2115,2020-09-22,50.0
2116,2020-10-01,50.0
2117,2020-10-11,1050.0
2118,2020-10-14,50.0
2119,2020-10-17,50.0


In [103]:
df_dead=df_dead[df_dead['spent_block_date']>'2010-12-31']

In [104]:
df_dead.head()

Unnamed: 0,spent_block_date,UTXO_dead
468,2011-01-01,11299.459153
469,2011-01-02,19522.082666
470,2011-01-03,18643.645624
471,2011-01-04,5385.99775
472,2011-01-05,12522.985279


In [105]:
df_dead.to_csv('/content/drive/My Drive/UTXO/df_dead1.csv')