<a href="https://colab.research.google.com/github/mdowns23/EthereumPricePredictor/blob/main/Ethereum_Price_Predictor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Mark Downs,
August 15 2022

The purpose of this project is to get some insight on Ethereum's price fluctuations through data analysis and machine learning. 

This project gets its data from Yahoo Finance and uses the pandas and prophet library for data analysis and machine learning. 

This project is based on the following article: https://medium.com/bitgrit-data-science-publication/ethereum-price-prediction-with-python-3b3805e6e512 


In [51]:
!pip install pystan --quiet
!pip install prophet --quiet
!pip install yfinance --quiet

In [52]:
import pandas as pd
import yfinance as yf
from datetime import datetime
from datetime import timedelta
import plotly.graph_objects as go
from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly
import warnings

warnings.filterwarnings('ignore')

pd.options.display.float_format = '${:,.2f}'.format

In [53]:
today = datetime.today().strftime('%Y-%m-%d')
start_date = '2016-01-01'

eth_data = yf.download('ETH-USD', start_date, today)

eth_data.tail()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-08-10,"$1,702.91","$1,869.40","$1,665.09","$1,851.74","$1,851.74",23512477984
2022-08-11,"$1,851.83","$1,927.94","$1,851.83","$1,881.22","$1,881.22",23826986482
2022-08-12,"$1,880.90","$1,957.55","$1,860.08","$1,957.25","$1,957.25",17168141904
2022-08-13,"$1,957.33","$2,013.76","$1,948.60","$1,981.34","$1,981.34",16038975216
2022-08-14,"$1,981.78","$2,022.79","$1,919.01","$1,936.80","$1,936.80",14062754456


In [54]:
eth_data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-11-09,$308.64,$329.45,$307.06,$320.88,$320.88,893249984
2017-11-10,$320.67,$324.72,$294.54,$299.25,$299.25,885985984
2017-11-11,$298.59,$319.45,$298.19,$314.68,$314.68,842300992
2017-11-12,$314.69,$319.15,$298.51,$307.91,$307.91,1613479936
2017-11-13,$307.02,$328.42,$307.02,$316.72,$316.72,1041889984


In [55]:
eth_data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1740 entries, 2017-11-09 to 2022-08-14
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Open       1740 non-null   float64
 1   High       1740 non-null   float64
 2   Low        1740 non-null   float64
 3   Close      1740 non-null   float64
 4   Adj Close  1740 non-null   float64
 5   Volume     1740 non-null   int64  
dtypes: float64(5), int64(1)
memory usage: 95.2 KB


In [56]:
eth_data.isnull().sum()

Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

In [57]:
eth_data.columns

Index(['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'], dtype='object')

In [58]:
eth_data.reset_index(inplace=True)
eth_data.columns

Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'], dtype='object')

In [59]:
#Create Data Frame with Date and Opening price
df = eth_data[["Date", "Open"]]

new_names = {
    "Date": "ds",
    "Open": "y",
}
#Change column names
df.rename(columns=new_names, inplace=True)

In [60]:
df.tail()

Unnamed: 0,ds,y
1735,2022-08-10,"$1,702.91"
1736,2022-08-11,"$1,851.83"
1737,2022-08-12,"$1,880.90"
1738,2022-08-13,"$1,957.33"
1739,2022-08-14,"$1,981.78"


In [61]:
# Plot open price
x = df["ds"]
y = df["y"]
graph = go.Figure()

graph.add_trace(go.Scatter(x=x, y=y))
# Set title
graph.update_layout(title_text="Time series plot of Ethereum Open Price",)
#Create Ranges 
graph.update_layout(
    xaxis=dict(
        rangeselector=dict(
            buttons=list(
                [
                    dict(count=1, label="1m", step="month", stepmode="backward"),
                    dict(count=6, label="6m", step="month", stepmode="backward"),
                    dict(count=1, label="YTD", step="year", stepmode="todate"),
                    dict(count=1, label="1y", step="year", stepmode="backward"),
                    dict(step="all"),
                ]
            )
        ),
        rangeslider=dict(visible=True),
        type="date",
    )
)


In [62]:
#Make Prophet Model
m = Prophet(
    seasonality_mode="multiplicative"#since data changes depending on trend year by year
)

m.fit(df)

INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
DEBUG:cmdstanpy:input tempfile: /tmp/tmpnq_5zovf/nnb1417d.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpnq_5zovf/2k0w7jo7.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.7/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=87827', 'data', 'file=/tmp/tmpnq_5zovf/nnb1417d.json', 'init=/tmp/tmpnq_5zovf/2k0w7jo7.json', 'output', 'file=/tmp/tmp0248a2gl/prophet_model-20220815092908.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
09:29:08 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
09:29:09 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


<prophet.forecaster.Prophet at 0x7fd785c3e050>

In [63]:
#Make dates for the next year
future_dates = m.make_future_dataframe(periods = 365)
future_dates.tail()

Unnamed: 0,ds
2100,2023-08-10
2101,2023-08-11
2102,2023-08-12
2103,2023-08-13
2104,2023-08-14


In [64]:
#Predict prices for the next year
predictions = m.predict(future_dates)
predictions[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
2100,2023-08-10,$52.29,"$-1,123.22","$1,312.47"
2101,2023-08-11,$48.22,"$-1,164.33","$1,310.25"
2102,2023-08-12,$44.02,"$-1,232.16","$1,333.14"
2103,2023-08-13,$40.24,"$-1,186.99","$1,361.33"
2104,2023-08-14,$36.25,"$-1,196.03","$1,390.18"


In [65]:
#Predict price for the next day
next_day = (datetime.today() + timedelta(days=1)).strftime('%Y-%m-%d')
predictions[predictions['ds'] == next_day]['yhat'].item()

1674.1387639161885

In [66]:
plot_plotly(m, predictions)

In [67]:
plot_components_plotly(m, predictions)

Model Conclusion(August 15, 2022)



* Ethereum is at a downwards trend
* Ethereum's price is lowest around July on a Saturday
* Ethereum's price is highest around November on a Thursday

