<a href="https://colab.research.google.com/github/sunshineluyao/UTXO/blob/main/UTXO_Data_analysis_Revised.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [195]:
import numpy as np
import pandas as pd
import datetime

# Import Data from Google Drive and Data Wrangling

In [196]:
# Importing drive method from colab for accessing google drive
from google.colab import drive

In [197]:
# Mounting drive
# This will require authentication : Follow the steps as guided
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Note：Read data from the csv in drive: you have put the csv data in Google Drive folder "UTXO." The data contains all UTXO that is generated by 2010-12-31. The information includes


*   the value of the UTXO （in naive coin)
*   the data that the UTXO was generated
*   the data that the UTXO was spent. "NaN" if not spent by 2020-10-12




In [198]:
import pandas as pd
df_2010=pd.read_csv('/content/drive/My Drive/UTXO/joint_2010.csv',index_col='Unnamed: 0')
df_2010.head()

Unnamed: 0,value,block_date,spent_block_date
0,5000000000,2009-01-03,
21553,5000000000,2009-01-09,2009-01-12
1,5000000000,2009-01-09,
2,5000000000,2009-01-09,
3,5000000000,2009-01-09,


Generate the UTXO value in bitcoin unit, which = $value/10^{8}$

In [199]:
df_2010['UTXO'] = df_2010['value']*10**(-8)
df_2010.head()

Unnamed: 0,value,block_date,spent_block_date,UTXO
0,5000000000,2009-01-03,,50.0
21553,5000000000,2009-01-09,2009-01-12,50.0
1,5000000000,2009-01-09,,50.0
2,5000000000,2009-01-09,,50.0
3,5000000000,2009-01-09,,50.0


In [200]:
# drop value and reset index
df_2010=df_2010.reset_index()
df_2010 = df_2010.drop(['value','index'], axis = 1)
df_2010.head()

Unnamed: 0,block_date,spent_block_date,UTXO
0,2009-01-03,,50.0
1,2009-01-09,2009-01-12,50.0
2,2009-01-09,,50.0
3,2009-01-09,,50.0
4,2009-01-09,,50.0


In [201]:
df_2010.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 137525 entries, 0 to 137524
Data columns (total 3 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   block_date        137525 non-null  object 
 1   spent_block_date  115972 non-null  object 
 2   UTXO              137525 non-null  float64
dtypes: float64(1), object(2)
memory usage: 3.1+ MB


# Change the block_date and spent_block_date to datatime object

In [202]:
df_2010['block_date'] = pd.to_datetime(df_2010['block_date'], format='%Y/%m/%d')
df_2010['spent_block_date'] = pd.to_datetime(df_2010['spent_block_date'], format='%Y/%m/%d')
df_2010.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 137525 entries, 0 to 137524
Data columns (total 3 columns):
 #   Column            Non-Null Count   Dtype         
---  ------            --------------   -----         
 0   block_date        137525 non-null  datetime64[ns]
 1   spent_block_date  115972 non-null  datetime64[ns]
 2   UTXO              137525 non-null  float64       
dtypes: datetime64[ns](2), float64(1)
memory usage: 3.1 MB


# Task 1: Calculate Daily UTXO


# For each date, calculate the total UTXO generated on that date

In [203]:
#sort the data by blockdate
df_2010 = df_2010.sort_values(by = 'block_date')
df_2010.reset_index(drop = True, inplace = True)
df_2010.head()


Unnamed: 0,block_date,spent_block_date,UTXO
0,2009-01-03,NaT,50.0
1,2009-01-09,NaT,50.0
2,2009-01-09,NaT,50.0
3,2009-01-09,NaT,50.0
4,2009-01-09,NaT,50.0


In [204]:
### use reset_index() to keep the grouping variable as a column
df_newborn = df_2010.groupby(df_2010['block_date']).sum()['UTXO'].reset_index()
df_newborn.rename(columns={"UTXO": "UTXO_newborn"}, inplace=True)
df_newborn.head()

Unnamed: 0,block_date,UTXO_newborn
0,2009-01-03,50.0
1,2009-01-09,700.0
2,2009-01-10,3050.0
3,2009-01-11,4650.0
4,2009-01-12,4879.0


# For each date, calculate the total UTXO spent on that date

In [205]:
#sort the data by spent_block_date
df_2010 = df_2010.sort_values(by = 'spent_block_date')
df_2010.reset_index(drop = True, inplace = True)
df_2010.head()

Unnamed: 0,block_date,spent_block_date,UTXO
0,2009-01-09,2009-01-12,50.0
1,2009-01-12,2009-01-12,29.0
2,2009-01-12,2009-01-12,1.0
3,2009-01-12,2009-01-12,40.0
4,2009-01-12,2009-01-12,28.0


In [206]:
### use reset_index() to keep the grouping variable as a column
df_dead = df_2010.groupby(df_2010['spent_block_date']).sum()['UTXO'].reset_index()
df_dead.rename(columns={"UTXO": "UTXO_dead"}, inplace=True)
df_dead.head()

Unnamed: 0,spent_block_date,UTXO_dead
0,2009-01-12,179.0
1,2009-01-14,61.0
2,2009-01-15,500.0
3,2009-01-16,200.0
4,2009-01-18,150.0


# Merge the two dataframe to get the time series for newborn and dead UTXO for each date from 2009-01-12 to 2010-12-31

In [207]:
np.size(pd.date_range(start='2009-01-09', end='2010-12-31'))

722

In [208]:
df_UTXO=df = pd.DataFrame(np.zeros((722, 1)))
df_UTXO.columns=['date']
df_UTXO['date']=pd.date_range(start='2009-01-09', end='2010-12-31')
df_UTXO.head()

Unnamed: 0,date
0,2009-01-09
1,2009-01-10
2,2009-01-11
3,2009-01-12
4,2009-01-13


In [209]:
### merge
df_UTXO=df_UTXO.merge(df_newborn, how='left',left_on='date',right_on='block_date')
df_UTXO.head()

Unnamed: 0,date,block_date,UTXO_newborn
0,2009-01-09,2009-01-09,700.0
1,2009-01-10,2009-01-10,3050.0
2,2009-01-11,2009-01-11,4650.0
3,2009-01-12,2009-01-12,4879.0
4,2009-01-13,2009-01-13,6150.0


In [210]:
### merge
df_UTXO=df_UTXO.merge(df_dead, how='left',left_on='date',right_on='spent_block_date')
df_UTXO.head()

Unnamed: 0,date,block_date,UTXO_newborn,spent_block_date,UTXO_dead
0,2009-01-09,2009-01-09,700.0,NaT,
1,2009-01-10,2009-01-10,3050.0,NaT,
2,2009-01-11,2009-01-11,4650.0,NaT,
3,2009-01-12,2009-01-12,4879.0,2009-01-12,179.0
4,2009-01-13,2009-01-13,6150.0,NaT,


In [211]:
df_UTXO = df_UTXO.drop(['block_date','spent_block_date'], axis = 1)

In [212]:
df_UTXO=df_UTXO[['date','UTXO_newborn','UTXO_dead']].fillna(0)
df_UTXO.head()

Unnamed: 0,date,UTXO_newborn,UTXO_dead
0,2009-01-09,700.0,0.0
1,2009-01-10,3050.0,0.0
2,2009-01-11,4650.0,0.0
3,2009-01-12,4879.0,179.0
4,2009-01-13,6150.0,0.0


#Calculate the Daily Net new UTXO =UTXO_newborn-UTXO_dead

In [213]:
df_UTXO['Net_New']=df_UTXO['UTXO_newborn']-df_UTXO['UTXO_dead']
df_UTXO.head()

Unnamed: 0,date,UTXO_newborn,UTXO_dead,Net_New
0,2009-01-09,700.0,0.0,700.0
1,2009-01-10,3050.0,0.0,3050.0
2,2009-01-11,4650.0,0.0,4650.0
3,2009-01-12,4879.0,179.0,4700.0
4,2009-01-13,6150.0,0.0,6150.0


Calculate the Accumulated Sum of Net new UTXO to get the total UTXO not spent for each date

In [214]:
df_UTXO['UTXO_Cum']=df_UTXO['Net_New'].cumsum()
df_UTXO.head()

Unnamed: 0,date,UTXO_newborn,UTXO_dead,Net_New,UTXO_Cum
0,2009-01-09,700.0,0.0,700.0,700.0
1,2009-01-10,3050.0,0.0,3050.0,3750.0
2,2009-01-11,4650.0,0.0,4650.0,8400.0
3,2009-01-12,4879.0,179.0,4700.0,13100.0
4,2009-01-13,6150.0,0.0,6150.0,19250.0


In [215]:
df_UTXO.columns

Index(['date', 'UTXO_newborn', 'UTXO_dead', 'Net_New', 'UTXO_Cum'], dtype='object')

In [216]:
df_UTXO.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 722 entries, 0 to 721
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   date          722 non-null    datetime64[ns]
 1   UTXO_newborn  722 non-null    float64       
 2   UTXO_dead     722 non-null    float64       
 3   Net_New       722 non-null    float64       
 4   UTXO_Cum      722 non-null    float64       
dtypes: datetime64[ns](1), float64(4)
memory usage: 33.8 KB


In [217]:
import plotly.graph_objects as go
# Create traces
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_UTXO.date, y=df_UTXO.UTXO_newborn,
                    mode='lines',
                    name='UTXO Newborn'))

fig.add_trace(go.Scatter(x=df_UTXO.date, y=df_UTXO.UTXO_dead,
                    mode='lines+markers',
                    name='UTXO Dead'))

fig.add_trace(go.Scatter(x=df_UTXO.date, y=df_UTXO.Net_New,
                    mode='markers', name='UTXO Netnew'))
fig.update_layout(title='Daily Newborn and Dead UTXO',
                   xaxis_title='Date',
                   yaxis_title='UTXO')


fig.show()

In [218]:
import plotly.graph_objects as go
# Create traces
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_UTXO.date, y=df_UTXO.UTXO_Cum,
                    mode='markers', name='UTXO Cum'))
fig.update_layout(title='The Accumulated UTXO',
                   xaxis_title='Date',
                   yaxis_title='UTXO')
fig.show()

#Task 2: The Weighted Average of Life Expectancy by UTXO Value

# We calculate the weighted average life expectancy for all UTXO that has been spent by each date during 2009-01-09 to 2010-12-31

#The weights are UTXO VALUE

In [219]:
df_2010.head()

Unnamed: 0,block_date,spent_block_date,UTXO
0,2009-01-09,2009-01-12,50.0
1,2009-01-12,2009-01-12,29.0
2,2009-01-12,2009-01-12,1.0
3,2009-01-12,2009-01-12,40.0
4,2009-01-12,2009-01-12,28.0


In [220]:
df_2010.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 137525 entries, 0 to 137524
Data columns (total 3 columns):
 #   Column            Non-Null Count   Dtype         
---  ------            --------------   -----         
 0   block_date        137525 non-null  datetime64[ns]
 1   spent_block_date  115972 non-null  datetime64[ns]
 2   UTXO              137525 non-null  float64       
dtypes: datetime64[ns](2), float64(1)
memory usage: 3.1 MB


In [221]:
from datetime import datetime
df_2010['Life_Expectancy'] =df_2010['spent_block_date']-df_2010['block_date']
df_2010.head()

Unnamed: 0,block_date,spent_block_date,UTXO,Life_Expectancy
0,2009-01-09,2009-01-12,50.0,3 days
1,2009-01-12,2009-01-12,29.0,0 days
2,2009-01-12,2009-01-12,1.0,0 days
3,2009-01-12,2009-01-12,40.0,0 days
4,2009-01-12,2009-01-12,28.0,0 days


In [222]:
### only reserve the value of days
df_2010['Life_Expectancy']=df_2010['Life_Expectancy'].map(lambda x:x.days)
df_2010.head()


Unnamed: 0,block_date,spent_block_date,UTXO,Life_Expectancy
0,2009-01-09,2009-01-12,50.0,3.0
1,2009-01-12,2009-01-12,29.0,0.0
2,2009-01-12,2009-01-12,1.0,0.0
3,2009-01-12,2009-01-12,40.0,0.0
4,2009-01-12,2009-01-12,28.0,0.0


In [223]:
df_2010.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 137525 entries, 0 to 137524
Data columns (total 4 columns):
 #   Column            Non-Null Count   Dtype         
---  ------            --------------   -----         
 0   block_date        137525 non-null  datetime64[ns]
 1   spent_block_date  115972 non-null  datetime64[ns]
 2   UTXO              137525 non-null  float64       
 3   Life_Expectancy   115972 non-null  float64       
dtypes: datetime64[ns](2), float64(2)
memory usage: 4.2 MB


In [224]:
df_2010 = df_2010.sort_values(by = 'spent_block_date')


In [225]:
#https://stackoverflow.com/questions/49573844/pandas-cumulative-weighted-average
df_2010['WALE']=(df_2010['UTXO'].mul(df_2010['Life_Expectancy'])).cumsum().div(df_2010['UTXO'].cumsum())
df_2010.head()

Unnamed: 0,block_date,spent_block_date,UTXO,Life_Expectancy,WALE
0,2009-01-09,2009-01-12,50.0,3.0,3.0
1,2009-01-12,2009-01-12,29.0,0.0,1.898734
2,2009-01-12,2009-01-12,1.0,0.0,1.875
3,2009-01-12,2009-01-12,40.0,0.0,1.25
4,2009-01-12,2009-01-12,28.0,0.0,1.013514


In [226]:
### for each date, we count until the last UTXO spent
df_WALE=df_2010.groupby('spent_block_date')['WALE'].max().reset_index()
df_WALE.head()

Unnamed: 0,spent_block_date,WALE
0,2009-01-12,3.0
1,2009-01-14,0.925
2,2009-01-15,1.338462
3,2009-01-16,1.353191
4,2009-01-18,1.463462


In [227]:
df_UTXO=df_UTXO.merge(df_WALE, how='left',left_on='date',right_on='spent_block_date')
df_UTXO.head()

Unnamed: 0,date,UTXO_newborn,UTXO_dead,Net_New,UTXO_Cum,spent_block_date,WALE
0,2009-01-09,700.0,0.0,700.0,700.0,NaT,
1,2009-01-10,3050.0,0.0,3050.0,3750.0,NaT,
2,2009-01-11,4650.0,0.0,4650.0,8400.0,NaT,
3,2009-01-12,4879.0,179.0,4700.0,13100.0,2009-01-12,3.0
4,2009-01-13,6150.0,0.0,6150.0,19250.0,NaT,


In [228]:
df_UTXO=df_UTXO.drop('spent_block_date',axis=1)
df_UTXO=df_UTXO.fillna(method='ffill')
df_UTXO=df_UTXO.dropna()
df_UTXO.head()

Unnamed: 0,date,UTXO_newborn,UTXO_dead,Net_New,UTXO_Cum,WALE
3,2009-01-12,4879.0,179.0,4700.0,13100.0,3.0
4,2009-01-13,6150.0,0.0,6150.0,19250.0,3.0
5,2009-01-14,6511.0,61.0,6450.0,25700.0,0.925
6,2009-01-15,6800.0,500.0,6300.0,32000.0,1.338462
7,2009-01-16,5600.0,200.0,5400.0,37400.0,1.353191


In [229]:
import plotly.express as px
fig2 = px.line(df_UTXO, x = 'date', y = 'WALE', title = 'The Weighted Average of Life Expectancy by UTXO Value')
fig2.show()

#Task 3: Calculate the distribution of Life Expectancy
Sorted by day, month, as well as the year, and print a plot by every time slot.


In [230]:
df_2010 = df_2010.sort_values(by = 'block_date')
df_2010.head()


Unnamed: 0,block_date,spent_block_date,UTXO,Life_Expectancy,WALE
115972,2009-01-03,NaT,50.0,,
0,2009-01-09,2009-01-12,50.0,3.0,3.0
115980,2009-01-09,NaT,50.0,,
115981,2009-01-09,NaT,50.0,,
115977,2009-01-09,NaT,50.0,,


In [231]:
df_2010.reset_index(drop = True, inplace = True)
df_2010.insert(5,'< 1d','')
df_2010.insert(6,'1d ~ 1m','')
df_2010.insert(7,'1m ~ 1q','')
df_2010.insert(8,'1q ~ 6m','')
df_2010.insert(9,'6m ~ 1y','')
df_2010.insert(10,'1y ~ 2y','')
df_2010.insert(11,'2y ~ 3y','')
df_2010.insert(12,'3y ~ 4y','')
df_2010.insert(13,'4y ~ 5y','')
df_2010.insert(14, '> 5y', '')
df_2010.head()

Unnamed: 0,block_date,spent_block_date,UTXO,Life_Expectancy,WALE,< 1d,1d ~ 1m,1m ~ 1q,1q ~ 6m,6m ~ 1y,1y ~ 2y,2y ~ 3y,3y ~ 4y,4y ~ 5y,> 5y
0,2009-01-03,NaT,50.0,,,,,,,,,,,,
1,2009-01-09,2009-01-12,50.0,3.0,3.0,,,,,,,,,,
2,2009-01-09,NaT,50.0,,,,,,,,,,,,
3,2009-01-09,NaT,50.0,,,,,,,,,,,,
4,2009-01-09,NaT,50.0,,,,,,,,,,,,


In [244]:
numrows = df_2010.shape[0]
for i in range(numrows):
  if df_2010.loc[i,'Life_Expectancy'] < 1:
    df_2010.loc[i, '< 1d'] = df_2010.loc[i,'UTXO']
  elif df_2010.loc[i,'Life_Expectancy'] < 30:
    df_2010.loc[i,'1d ~ 1m'] = df_2010.loc[i,'UTXO']
  elif df_2010.loc[i,'Life_Expectancy'] < 90:
    df_2010.loc[i,'1m ~ 1q']= df_2010.loc[i,'UTXO']
  elif df_2010.loc[i,'Life_Expectancy'] < 182:
    df_2010.loc[i, '1q ~ 6m'] = df_2010.loc[i,'UTXO']
  elif df_2010.loc[i,'Life_Expectancy'] < 365:
    df_2010.loc[i, '6m ~ 1y'] = df_2010.loc[i,'UTXO']
  elif df_2010.loc[i,'Life_Expectancy'] < 365*2:
    df_2010.loc[i,'1y ~ 2y'] = df_2010.loc[i,'UTXO']
  elif df_2010.loc[i,'Life_Expectancy'] < 365*3:
    df_2010.loc[i, '2y ~ 3y'] = df_2010.loc[i,'UTXO']
  elif df_2010.loc[i,'Life_Expectancy'] < 365*4:
    df_2010.loc[i, '3y ~ 4y'] = df_2010.loc[i,'UTXO']
  elif df_2010.loc[i,'Life_Expectancy'] < 365*5:
    df_2010.loc[i, '4y ~ 5y'] = df_2010.loc[i,'UTXO']
  else:
    df_2010.loc[i,'> 5y'] = df_2010.loc[i,'UTXO']
df_2010.head()

Unnamed: 0,block_date,spent_block_date,UTXO,Life_Expectancy,WALE,< 1d,1d ~ 1m,1m ~ 1q,1q ~ 6m,6m ~ 1y,1y ~ 2y,2y ~ 3y,3y ~ 4y,4y ~ 5y,> 5y
0,2009-01-03,NaT,50.0,,,,,,,,,,,,50.0
1,2009-01-09,2009-01-12,50.0,3.0,3.0,,50.0,,,,,,,,
2,2009-01-09,NaT,50.0,,,,,,,,,,,,50.0
3,2009-01-09,NaT,50.0,,,,,,,,,,,,50.0
4,2009-01-09,NaT,50.0,,,,,,,,,,,,50.0


In [None]:
df3 = df3.sort_values(by = 'block_date')
df3.reset_index(drop = True, inplace = True)
df3_sum = df3.groupby(df3['block_date']).sum()['UTXO']
df3_1 = df3.groupby(df3['block_date'])['< 1d'].apply(lambda x: (x != '').sum())
df3_2 = df3.groupby(df3['block_date'])['1d ~ 1m'].apply(lambda x: (x != '').sum())
df3_3 = df3.groupby(df3['block_date'])['1m ~ 1q'].apply(lambda x: (x != '').sum())
df3_4 = df3.groupby(df3['block_date'])['1q ~ 6m'].apply(lambda x: (x != '').sum())
df3_5 = df3.groupby(df3['block_date'])['6m ~ 1y'].apply(lambda x: (x != '').sum())
df3_6 = df3.groupby(df3['block_date'])['1y ~ 2y'].apply(lambda x: (x != '').sum())
df3_7 = df3.groupby(df3['block_date'])['2y ~ 3y'].apply(lambda x: (x != '').sum())
df3_8 = df3.groupby(df3['block_date'])['> 3y'].apply(lambda x: (x != '').sum())

d = {'UTXO': df3_sum, '< 1d': df3_1, '1d ~ 1m': df3_2, '1m ~ 1q': df3_3, '1q ~ 6m': df3_4, '6m ~ 1y': df3_5, '1y ~ 2y': df3_6, '2y ~ 3y': df3_7, '> 3y': df3_8}
df3_life_exp = pd.DataFrame(data = d)
df3_life_exp["date"] = df3_life_exp.index
df3_life_exp.reset_index(drop = True, inplace = True)

df3_life_exp

In [None]:
import plotly.offline as py     
import plotly.graph_objects as go

trace0 = go.Scatter(x = df3_life_exp["date"], y = df3_life_exp["< 1d"], name = '< 1d')
trace1 = go.Scatter(x = df3_life_exp["date"], y = df3_life_exp["1d ~ 1m"], name = '1d ~ 1m')
trace2 = go.Scatter(x = df3_life_exp["date"], y = df3_life_exp["1m ~ 1q"], name = '1m ~ 1q')
trace3 = go.Scatter(x = df3_life_exp["date"], y = df3_life_exp["1q ~ 6m"], name = '1q ~ 6m')
trace4 = go.Scatter(x = df3_life_exp["date"], y = df3_life_exp["6m ~ 1y"], name = '6m ~ 1y')
trace5 = go.Scatter(x = df3_life_exp["date"], y = df3_life_exp["1y ~ 2y"], name = '1y ~ 2y')
trace6 = go.Scatter(x = df3_life_exp["date"], y = df3_life_exp["2y ~ 3y"], name = '2y ~ 3y')
trace7 = go.Scatter(x = df3_life_exp["date"], y = df3_life_exp["> 3y"], name = '> 3y')

layout = go.Layout(title="Distribution", xaxis = dict(title="date"), yaxis = dict(title ="Average Life Expectancy") ) 
data = [trace0, trace1, trace2, trace3, trace4, trace5, trace6, trace7]
fig = go.Figure(layout = layout,data = data) 
py.iplot(fig)

#Task 4: Calculate the Distribution for UTXO (Bitcoin Age Distribution for the Bitcoin that are still Alive)

In [None]:
df4 = pd.read_csv('joint_2010.csv')
df4 = df4.drop(['num', 'value'], axis = 1)


from datetime import datetime
df4['Life Expectancy'] = (pd.to_datetime(df4['spent_block_date']) - pd.to_datetime(df4['block_date'])).map(lambda x:x.days)
df4['block_date'] = pd.to_datetime(df4['block_date'], format='%m/%d/%Y')
df4['spent_block_date'] = pd.to_datetime(df4['spent_block_date'], format='%m/%d/%Y')
df4['temp'] = df4['UTXO'] * df4['Life Expectancy']
df4

In [None]:
for i in range(len(df4)):
  if df4['block_date'][i] != df4['spent_block_date'][i]:
    df4_sum = df4.groupby(df4['block_date']).sum()['UTXO']
    df4_sumblock = df4.groupby(df4['block_date']).sum()['temp']

d = {'UTXO': df4_sum, 'temp': df4_sumblock}
df4_life_exp = pd.DataFrame(data = d)
df4_life_exp["sum_spent"] = df4_life_exp["UTXO"]
df4_life_exp["sum_temp"] = df4_life_exp["temp"]

df4_life_exp["sum_spent"] = df4_life_exp["sum_spent"].cumsum()
df4_life_exp["sum_temp"] = df4_life_exp["sum_temp"].cumsum()
df4_life_exp['Average Life Expectancy'] = df4_life_exp['sum_temp'] / df4_life_exp['sum_spent']
df4_life_exp["date"] = df4_life_exp.index
df4_life_exp.reset_index(drop = True, inplace = True)

df4_life_exp

In [None]:
import plotly.express as px
fig4 = px.line(df4_life_exp, x = 'date', y = 'Average Life Expectancy', title = 'UTXO Life Expectancy Distribution')
fig4.show()