<a href="https://colab.research.google.com/github/sunshineluyao/UTXO/blob/main/UTXO_Data_analysis_Revised.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import datetime

# Import Data from Google Drive and Data Wrangling

In [2]:
# Importing drive method from colab for accessing google drive
from google.colab import drive

In [3]:
# Mounting drive
# This will require authentication : Follow the steps as guided
drive.mount('/content/drive')

Mounted at /content/drive


Note：Read data from the csv in drive: you have put the csv data in Google Drive folder "UTXO." The data contains all UTXO that is generated by 2010-12-31. The information includes


*   the value of the UTXO （in naive coin)
*   the data that the UTXO was generated
*   the data that the UTXO was spent. "NaN" if not spent by 2020-10-12




In [16]:
import pandas as pd
df_2010=pd.read_csv('/content/drive/My Drive/UTXO/joint_2010.csv',index_col='Unnamed: 0')
df_2010.head()

Unnamed: 0,value,block_date,spent_block_date
0,5000000000,2009-01-03,
21553,5000000000,2009-01-09,2009-01-12
1,5000000000,2009-01-09,
2,5000000000,2009-01-09,
3,5000000000,2009-01-09,


Generate the UTXO value in bitcoin unit, which = $value/10^{8}$

In [17]:
df_2010['UTXO'] = df_2010['value']*10**(-8)
df_2010.head()

Unnamed: 0,value,block_date,spent_block_date,UTXO
0,5000000000,2009-01-03,,50.0
21553,5000000000,2009-01-09,2009-01-12,50.0
1,5000000000,2009-01-09,,50.0
2,5000000000,2009-01-09,,50.0
3,5000000000,2009-01-09,,50.0


In [18]:
# drop value and reset index
df_2010=df_2010.reset_index()
df_2010 = df_2010.drop(['value','index'], axis = 1)
df_2010.head()

Unnamed: 0,block_date,spent_block_date,UTXO
0,2009-01-03,,50.0
1,2009-01-09,2009-01-12,50.0
2,2009-01-09,,50.0
3,2009-01-09,,50.0
4,2009-01-09,,50.0


In [19]:
df_2010.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 137525 entries, 0 to 137524
Data columns (total 3 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   block_date        137525 non-null  object 
 1   spent_block_date  115972 non-null  object 
 2   UTXO              137525 non-null  float64
dtypes: float64(1), object(2)
memory usage: 3.1+ MB


# Change the block_date and spent_block_date to datatime object

In [27]:
df_2010['block_date'] = pd.to_datetime(df_2010['block_date'], format='%Y/%m/%d')
df_2010['spent_block_date'] = pd.to_datetime(df_2010['spent_block_date'], format='%Y/%m/%d')
df_2010.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 137525 entries, 0 to 137524
Data columns (total 3 columns):
 #   Column            Non-Null Count   Dtype         
---  ------            --------------   -----         
 0   block_date        137525 non-null  datetime64[ns]
 1   spent_block_date  115972 non-null  datetime64[ns]
 2   UTXO              137525 non-null  float64       
dtypes: datetime64[ns](2), float64(1)
memory usage: 3.1 MB


# Task 1: Calculate Daily UTXO


# For each date, calculate the total UTXO generated on that date

In [28]:
#sort the data by blockdate
df_2010 = df_2010.sort_values(by = 'block_date')
df_2010.reset_index(drop = True, inplace = True)
df_2010.head()


Unnamed: 0,block_date,spent_block_date,UTXO
0,2009-01-03,NaT,50.0
1,2009-01-09,NaT,50.0
2,2009-01-09,NaT,50.0
3,2009-01-09,NaT,50.0
4,2009-01-09,NaT,50.0


In [38]:
### use reset_index() to keep the grouping variable as a column
df_newborn = df_2010.groupby(df_2010['block_date']).sum()['UTXO'].reset_index()
df_newborn.rename(columns={"UTXO": "UTXO_newborn"}, inplace=True)
df_newborn.head()

Unnamed: 0,block_date,UTXO_newborn
0,2009-01-03,50.0
1,2009-01-09,700.0
2,2009-01-10,3050.0
3,2009-01-11,4650.0
4,2009-01-12,4879.0


# For each date, calculate the total UTXO spent on that date

In [39]:
#sort the data by spent_block_date
df_2010 = df_2010.sort_values(by = 'spent_block_date')
df_2010.reset_index(drop = True, inplace = True)
df_2010.head()

Unnamed: 0,block_date,spent_block_date,UTXO
0,2009-01-09,2009-01-12,50.0
1,2009-01-12,2009-01-12,29.0
2,2009-01-12,2009-01-12,1.0
3,2009-01-12,2009-01-12,40.0
4,2009-01-12,2009-01-12,28.0


In [40]:
### use reset_index() to keep the grouping variable as a column
df_dead = df_2010.groupby(df_2010['spent_block_date']).sum()['UTXO'].reset_index()
df_dead.rename(columns={"UTXO": "UTXO_dead"}, inplace=True)
df_dead.head()

Unnamed: 0,spent_block_date,UTXO_dead
0,2009-01-12,179.0
1,2009-01-14,61.0
2,2009-01-15,500.0
3,2009-01-16,200.0
4,2009-01-18,150.0


# Merge the two dataframe to get the time series for newborn and dead UTXO for each date from 2009-01-12 to 2010-12-31

In [57]:
np.size(pd.date_range(start='2009-01-09', end='2010-12-31'))

722

In [58]:
df_UTXO=df = pd.DataFrame(np.zeros((722, 1)))
df_UTXO.columns=['date']
df_UTXO['date']=pd.date_range(start='2009-01-09', end='2010-12-31')
df_UTXO.head()

Unnamed: 0,date
0,2009-01-09
1,2009-01-10
2,2009-01-11
3,2009-01-12
4,2009-01-13


In [59]:
### merge
df_UTXO=df_UTXO.merge(df_newborn, how='left',left_on='date',right_on='block_date')
df_UTXO.head()

Unnamed: 0,date,block_date,UTXO_newborn
0,2009-01-09,2009-01-09,700.0
1,2009-01-10,2009-01-10,3050.0
2,2009-01-11,2009-01-11,4650.0
3,2009-01-12,2009-01-12,4879.0
4,2009-01-13,2009-01-13,6150.0


In [60]:
### merge
df_UTXO=df_UTXO.merge(df_dead, how='left',left_on='date',right_on='spent_block_date')
df_UTXO.head()

Unnamed: 0,date,block_date,UTXO_newborn,spent_block_date,UTXO_dead
0,2009-01-09,2009-01-09,700.0,NaT,
1,2009-01-10,2009-01-10,3050.0,NaT,
2,2009-01-11,2009-01-11,4650.0,NaT,
3,2009-01-12,2009-01-12,4879.0,2009-01-12,179.0
4,2009-01-13,2009-01-13,6150.0,NaT,


In [61]:
df_UTXO = df_UTXO.drop(['block_date','spent_block_date'], axis = 1)

In [62]:
df_UTXO=df_UTXO[['date','UTXO_newborn','UTXO_dead']].fillna(0)
df_UTXO.head()

Unnamed: 0,date,UTXO_newborn,UTXO_dead
0,2009-01-09,700.0,0.0
1,2009-01-10,3050.0,0.0
2,2009-01-11,4650.0,0.0
3,2009-01-12,4879.0,179.0
4,2009-01-13,6150.0,0.0


#Calculate the Daily Net new UTXO =UTXO_newborn-UTXO_dead

In [63]:
df_UTXO['Net_New']=df_UTXO['UTXO_newborn']-df_UTXO['UTXO_dead']
df_UTXO.head()

Unnamed: 0,date,UTXO_newborn,UTXO_dead,Net_New
0,2009-01-09,700.0,0.0,700.0
1,2009-01-10,3050.0,0.0,3050.0
2,2009-01-11,4650.0,0.0,4650.0
3,2009-01-12,4879.0,179.0,4700.0
4,2009-01-13,6150.0,0.0,6150.0


Calculate the Accumulated Sum of Net new UTXO to get the total UTXO not spent for each date

In [65]:
df_UTXO['UTXO_Cum']=df_UTXO['Net_New'].cumsum()
df_UTXO.head()

Unnamed: 0,date,UTXO_newborn,UTXO_dead,Net_New,UTXO_Cum
0,2009-01-09,700.0,0.0,700.0,700.0
1,2009-01-10,3050.0,0.0,3050.0,3750.0
2,2009-01-11,4650.0,0.0,4650.0,8400.0
3,2009-01-12,4879.0,179.0,4700.0,13100.0
4,2009-01-13,6150.0,0.0,6150.0,19250.0


In [66]:
df_UTXO.columns

Index(['date', 'UTXO_newborn', 'UTXO_dead', 'Net_New', 'UTXO_Cum'], dtype='object')

In [82]:
import plotly.graph_objects as go
# Create traces
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_UTXO.date, y=df_UTXO.UTXO_newborn,
                    mode='lines',
                    name='UTXO Newborn'))

fig.add_trace(go.Scatter(x=df_UTXO.date, y=df_UTXO.UTXO_dead,
                    mode='lines+markers',
                    name='UTXO Dead'))

fig.add_trace(go.Scatter(x=df_UTXO.date, y=df_UTXO.Net_New,
                    mode='markers', name='UTXO Netnew'))
fig.update_layout(title='Daily Newborn and Dead UTXO',
                   xaxis_title='Date',
                   yaxis_title='UTXO')


fig.show()

In [81]:
import plotly.graph_objects as go
# Create traces
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_UTXO.date, y=df_UTXO.UTXO_Cum,
                    mode='markers', name='UTXO Cum'))
fig.update_layout(title='The Accumulated UTXO',
                   xaxis_title='Date',
                   yaxis_title='UTXO')
fig.show()

#Task 2: Calculate the life expectancy of every UTXO

In [None]:
import pandas as pd
df2 = pd.read_csv('joint_2010.csv')
df2 = df2.drop(['num', 'value'], axis = 1)
df2.insert(3,'Life Expectancy','')

from datetime import datetime
df2['Life Expectancy'] = (pd.to_datetime(df2['spent_block_date']) - pd.to_datetime(df2['block_date'])).map(lambda x:x.days)
df2['block_date'] = pd.to_datetime(df2['block_date'], format='%m/%d/%Y')
df2['spent_block_date'] = pd.to_datetime(df2['spent_block_date'], format='%m/%d/%Y')

df2['temp'] = df2['UTXO'] * df2['Life Expectancy']
df2

Unnamed: 0,UTXO,block_date,spent_block_date,Life Expectancy,temp
0,50.00,2009-01-09,2009-01-12,3.0,150.00
1,50.00,2009-01-09,NaT,,
2,50.00,2009-01-09,NaT,,
3,50.00,2009-01-09,NaT,,
4,50.00,2009-01-09,NaT,,
...,...,...,...,...,...
137519,0.05,2010-12-31,2017-07-30,2403.0,120.15
137520,0.05,2010-12-31,2017-07-30,2403.0,120.15
137521,5.23,2010-12-31,2017-10-16,2481.0,12975.63
137522,0.05,2010-12-31,2017-12-15,2541.0,127.05


In [None]:
df2 = df2.sort_values(by = 'spent_block_date')
df2.reset_index(drop = True, inplace = True)

df2_sum = df2.groupby(df2['spent_block_date']).sum()['UTXO']
df2_sumblock = df2.groupby(df2['spent_block_date']).sum()['temp']

d = {'UTXO': df2_sum, 'temp': df2_sumblock}
df2_life_exp = pd.DataFrame(data = d)
df2_life_exp["sum_spent"] = df2_life_exp["UTXO"]
df2_life_exp["sum_temp"] = df2_life_exp["temp"]
df2_life_exp["sum_spent"] = df2_life_exp["sum_spent"].cumsum()
df2_life_exp["sum_temp"] = df2_life_exp["sum_temp"].cumsum()
df2_life_exp['Average Life Expectancy'] = df2_life_exp['sum_temp'] / df2_life_exp['sum_spent']
df2_life_exp["date"] = df2_life_exp.index
df2_life_exp.reset_index(drop = True, inplace = True)

df2_life_exp

Unnamed: 0,UTXO,temp,sum_spent,sum_temp,Average Life Expectancy,date
0,179.0,150.0,1.790000e+02,1.500000e+02,0.837989,2009-01-12
1,61.0,72.0,2.400000e+02,2.220000e+02,0.925000,2009-01-14
2,500.0,600.0,7.400000e+02,8.220000e+02,1.110811,2009-01-15
3,200.0,450.0,9.400000e+02,1.272000e+03,1.353191,2009-01-16
4,150.0,300.0,1.090000e+03,1.572000e+03,1.442202,2009-01-18
...,...,...,...,...,...,...
2115,50.0,181950.0,1.182703e+07,1.635586e+09,138.292222,2020-09-22
2116,50.0,189100.0,1.182708e+07,1.635775e+09,138.307626,2020-10-01
2117,1050.0,3858850.0,1.182813e+07,1.639634e+09,138.621592,2020-10-11
2118,50.0,181350.0,1.182818e+07,1.639815e+09,138.636338,2020-10-14


In [None]:
import plotly.express as px
fig2 = px.line(df2_life_exp, x = 'date', y = 'Average Life Expectancy', title = 'Life Expectancy Distribution')
fig2.show()

#Task 3: Calculate the distribution of Life Expectancy
Sorted by day, month, as well as the year, and print a plot by every time slot.


In [None]:
import pandas as pd
df3 = pd.read_csv('joint_2010.csv')
df3 = df3.drop(['num', 'value'], axis = 1)


from datetime import datetime
df3['Life Expectancy'] = (pd.to_datetime(df3['spent_block_date']) - pd.to_datetime(df3['block_date'])).map(lambda x:x.days)
df3['block_date'] = pd.to_datetime(df3['block_date'], format='%m/%d/%Y')
df3['spent_block_date'] = pd.to_datetime(df3['spent_block_date'], format='%m/%d/%Y')
df3 = df3.drop('spent_block_date', axis = 1)
df3

Unnamed: 0,UTXO,block_date,Life Expectancy
0,50.00,2009-01-09,3.0
1,50.00,2009-01-09,
2,50.00,2009-01-09,
3,50.00,2009-01-09,
4,50.00,2009-01-09,
...,...,...,...
137519,0.05,2010-12-31,2403.0
137520,0.05,2010-12-31,2403.0
137521,5.23,2010-12-31,2481.0
137522,0.05,2010-12-31,2541.0


In [None]:
df3 = df3.sort_values(by = 'block_date')
df3.reset_index(drop = True, inplace = True)
df3.insert(3,'< 1d','')
df3.insert(4,'1d ~ 1m','')
df3.insert(5,'1m ~ 1q','')
df3.insert(6,'1q ~ 6m','')
df3.insert(7,'6m ~ 1y','')
df3.insert(8,'1y ~ 2y','')
df3.insert(9,'2y ~ 3y','')
df3.insert(10, '> 3y', '')
df3

Unnamed: 0,UTXO,block_date,Life Expectancy,< 1d,1d ~ 1m,1m ~ 1q,1q ~ 6m,6m ~ 1y,1y ~ 2y,2y ~ 3y,> 3y
0,50.00,2009-01-09,3.0,,,,,,,,
1,50.00,2009-01-09,,,,,,,,,
2,50.00,2009-01-09,,,,,,,,,
3,50.00,2009-01-09,,,,,,,,,
4,50.00,2009-01-09,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
137519,50.04,2010-12-31,5.0,,,,,,,,
137520,11.93,2010-12-31,5.0,,,,,,,,
137521,0.04,2010-12-31,5.0,,,,,,,,
137522,0.01,2010-12-31,4.0,,,,,,,,


In [None]:
numrows = df3.shape[0]
for i in range(numrows):
  if df3['Life Expectancy'][i] < 1:
    df3['< 1d'][i] = df3['UTXO'][i]
  elif df3['Life Expectancy'][i] < 31:
    df3['1d ~ 1m'][i] = df3['UTXO'][i]
  elif df3['Life Expectancy'][i] < 92:
    df3['1m ~ 1q'][i] = df3['UTXO'][i]
  elif df3['Life Expectancy'][i] < 184:
    df3['1q ~ 6m'][i] = df3['UTXO'][i]
  elif df3['Life Expectancy'][i] < 366:
    df3['6m ~ 1y'][i] = df3['UTXO'][i]
  elif df3['Life Expectancy'][i] < 711:
    df3['1y ~ 2y'][i] = df3['UTXO'][i]
  elif df3['Life Expectancy'][i] < 1096:
    df3['2y ~ 3y'][i] = df3['UTXO'][i]
  else:
    df3['> 3y'][i] = df3['UTXO'][i]
df3



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/i

Unnamed: 0,UTXO,block_date,Life Expectancy,< 1d,1d ~ 1m,1m ~ 1q,1q ~ 6m,6m ~ 1y,1y ~ 2y,2y ~ 3y,> 3y
0,50.00,2009-01-09,3.0,,50,,,,,,
1,50.00,2009-01-09,,,,,,,,,50
2,50.00,2009-01-09,,,,,,,,,50
3,50.00,2009-01-09,,,,,,,,,50
4,50.00,2009-01-09,,,,,,,,,50
...,...,...,...,...,...,...,...,...,...,...,...
137519,50.04,2010-12-31,5.0,,50.04,,,,,,
137520,11.93,2010-12-31,5.0,,11.93,,,,,,
137521,0.04,2010-12-31,5.0,,0.04,,,,,,
137522,0.01,2010-12-31,4.0,,0.01,,,,,,


In [None]:
df3 = df3.sort_values(by = 'block_date')
df3.reset_index(drop = True, inplace = True)
df3_sum = df3.groupby(df3['block_date']).sum()['UTXO']
df3_1 = df3.groupby(df3['block_date'])['< 1d'].apply(lambda x: (x != '').sum())
df3_2 = df3.groupby(df3['block_date'])['1d ~ 1m'].apply(lambda x: (x != '').sum())
df3_3 = df3.groupby(df3['block_date'])['1m ~ 1q'].apply(lambda x: (x != '').sum())
df3_4 = df3.groupby(df3['block_date'])['1q ~ 6m'].apply(lambda x: (x != '').sum())
df3_5 = df3.groupby(df3['block_date'])['6m ~ 1y'].apply(lambda x: (x != '').sum())
df3_6 = df3.groupby(df3['block_date'])['1y ~ 2y'].apply(lambda x: (x != '').sum())
df3_7 = df3.groupby(df3['block_date'])['2y ~ 3y'].apply(lambda x: (x != '').sum())
df3_8 = df3.groupby(df3['block_date'])['> 3y'].apply(lambda x: (x != '').sum())

d = {'UTXO': df3_sum, '< 1d': df3_1, '1d ~ 1m': df3_2, '1m ~ 1q': df3_3, '1q ~ 6m': df3_4, '6m ~ 1y': df3_5, '1y ~ 2y': df3_6, '2y ~ 3y': df3_7, '> 3y': df3_8}
df3_life_exp = pd.DataFrame(data = d)
df3_life_exp["date"] = df3_life_exp.index
df3_life_exp.reset_index(drop = True, inplace = True)

df3_life_exp

Unnamed: 0,UTXO,< 1d,1d ~ 1m,1m ~ 1q,1q ~ 6m,6m ~ 1y,1y ~ 2y,2y ~ 3y,> 3y,date
0,700.000000,0,1,0,0,0,0,0,13,2009-01-09
1,3050.000000,0,0,0,0,0,0,0,61,2009-01-10
2,4650.000000,0,0,0,0,0,0,1,92,2009-01-11
3,4879.000000,6,2,0,0,0,2,0,96,2009-01-12
4,6150.000000,0,7,0,0,0,0,3,113,2009-01-13
...,...,...,...,...,...,...,...,...,...,...
717,58656.796641,0,291,62,51,7,8,17,10,2010-12-27
718,30374.153958,0,347,85,68,9,7,32,16,2010-12-28
719,86575.966836,0,292,68,36,12,7,19,9,2010-12-29
720,38568.962386,0,320,61,64,24,8,24,10,2010-12-30


In [None]:
import plotly.offline as py     
import plotly.graph_objects as go

trace0 = go.Scatter(x = df3_life_exp["date"], y = df3_life_exp["< 1d"], name = '< 1d')
trace1 = go.Scatter(x = df3_life_exp["date"], y = df3_life_exp["1d ~ 1m"], name = '1d ~ 1m')
trace2 = go.Scatter(x = df3_life_exp["date"], y = df3_life_exp["1m ~ 1q"], name = '1m ~ 1q')
trace3 = go.Scatter(x = df3_life_exp["date"], y = df3_life_exp["1q ~ 6m"], name = '1q ~ 6m')
trace4 = go.Scatter(x = df3_life_exp["date"], y = df3_life_exp["6m ~ 1y"], name = '6m ~ 1y')
trace5 = go.Scatter(x = df3_life_exp["date"], y = df3_life_exp["1y ~ 2y"], name = '1y ~ 2y')
trace6 = go.Scatter(x = df3_life_exp["date"], y = df3_life_exp["2y ~ 3y"], name = '2y ~ 3y')
trace7 = go.Scatter(x = df3_life_exp["date"], y = df3_life_exp["> 3y"], name = '> 3y')

layout = go.Layout(title="Distribution", xaxis = dict(title="date"), yaxis = dict(title ="Average Life Expectancy") ) 
data = [trace0, trace1, trace2, trace3, trace4, trace5, trace6, trace7]
fig = go.Figure(layout = layout,data = data) 
py.iplot(fig)

#Task 4: Calculate the Distribution for UTXO (Bitcoin Age Distribution for the Bitcoin that are still Alive)

In [None]:
df4 = pd.read_csv('joint_2010.csv')
df4 = df4.drop(['num', 'value'], axis = 1)


from datetime import datetime
df4['Life Expectancy'] = (pd.to_datetime(df4['spent_block_date']) - pd.to_datetime(df4['block_date'])).map(lambda x:x.days)
df4['block_date'] = pd.to_datetime(df4['block_date'], format='%m/%d/%Y')
df4['spent_block_date'] = pd.to_datetime(df4['spent_block_date'], format='%m/%d/%Y')
df4['temp'] = df4['UTXO'] * df4['Life Expectancy']
df4

Unnamed: 0,UTXO,block_date,spent_block_date,Life Expectancy,temp
0,50.00,2009-01-09,2009-01-12,3.0,150.00
1,50.00,2009-01-09,NaT,,
2,50.00,2009-01-09,NaT,,
3,50.00,2009-01-09,NaT,,
4,50.00,2009-01-09,NaT,,
...,...,...,...,...,...
137519,0.05,2010-12-31,2017-07-30,2403.0,120.15
137520,0.05,2010-12-31,2017-07-30,2403.0,120.15
137521,5.23,2010-12-31,2017-10-16,2481.0,12975.63
137522,0.05,2010-12-31,2017-12-15,2541.0,127.05


In [None]:
for i in range(len(df4)):
  if df4['block_date'][i] != df4['spent_block_date'][i]:
    df4_sum = df4.groupby(df4['block_date']).sum()['UTXO']
    df4_sumblock = df4.groupby(df4['block_date']).sum()['temp']

d = {'UTXO': df4_sum, 'temp': df4_sumblock}
df4_life_exp = pd.DataFrame(data = d)
df4_life_exp["sum_spent"] = df4_life_exp["UTXO"]
df4_life_exp["sum_temp"] = df4_life_exp["temp"]

df4_life_exp["sum_spent"] = df4_life_exp["sum_spent"].cumsum()
df4_life_exp["sum_temp"] = df4_life_exp["sum_temp"].cumsum()
df4_life_exp['Average Life Expectancy'] = df4_life_exp['sum_temp'] / df4_life_exp['sum_spent']
df4_life_exp["date"] = df4_life_exp.index
df4_life_exp.reset_index(drop = True, inplace = True)

df4_life_exp

Unnamed: 0,UTXO,temp,sum_spent,sum_temp,Average Life Expectancy,date
0,700.000000,1.500000e+02,7.000000e+02,1.500000e+02,0.214286,2009-01-09
1,3050.000000,0.000000e+00,3.750000e+03,1.500000e+02,0.040000,2009-01-10
2,4650.000000,4.420000e+04,8.400000e+03,4.435000e+04,5.279762,2009-01-11
3,4879.000000,4.090200e+04,1.327900e+04,8.525200e+04,6.420062,2009-01-12
4,6150.000000,1.336000e+05,1.942900e+04,2.188520e+05,11.264193,2009-01-13
...,...,...,...,...,...,...
717,58656.796641,3.670062e+06,1.273132e+07,1.631246e+09,128.128641,2010-12-27
718,30374.153958,1.669728e+06,1.276169e+07,1.632916e+09,127.954520,2010-12-28
719,86575.966836,3.541419e+06,1.284827e+07,1.636457e+09,127.367953,2010-12-29
720,38568.962386,1.750750e+06,1.288683e+07,1.638208e+09,127.122610,2010-12-30


In [None]:
import plotly.express as px
fig4 = px.line(df4_life_exp, x = 'date', y = 'Average Life Expectancy', title = 'UTXO Life Expectancy Distribution')
fig4.show()