# Predicting Crypto with LLMs

## Libraries

In [10]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import yfinance as yf
from statsmodels.tsa.arima.model import ARIMA
from langchain_community.llms import Ollama

## Pull Crypto data

In [11]:
# Pull stock data from yfinance for the past month
def pull_stocks(ticker):
    end_date = datetime.today()
    start_date = end_date - timedelta(days=30)
    stock_data = yf.Ticker(ticker)
    stock_df = stock_data.history(start=start_date, end=end_date)
    stock_df.index = stock_df.index.tz_localize(None)  # Ensure stock data is timezone-naive
    stock_df = stock_df.reset_index()
    stock_df['Date'] = stock_df['Date'].dt.strftime('%Y-%m-%d')

    stock_df['pct_change'] = stock_df['Close'].pct_change()

    stock_df = stock_df[stock_df['pct_change'].notna()==True]

    stock_df = stock_df[['Date','pct_change']]

    actual_final = stock_df.tail(1)

    # stock_df = stock_df.iloc[:-1]

    return stock_df, actual_final

btc, btc_final = pull_stocks('BTC-USD')
eth, eth_final = pull_stocks('ETH-USD')
xrp, xrp_final = pull_stocks('XRP-USD')



## Run ARIMA

In [12]:
def arima(timeseries_df):
    # Ensure 'Date' is the index and in datetime format
    timeseries_df.set_index('Date', inplace=True)
    timeseries_df.index = pd.to_datetime(timeseries_df.index)

    # Remove the last row (assumed to be NaN)
    timeseries_df = timeseries_df[:-1]

    # Convert percentage strings to float if necessary
    if timeseries_df['pct_change'].dtype == 'object':
        timeseries_df['pct_change'] = timeseries_df['pct_change'].str.rstrip('%').astype('float') / 100.0

    # Fit ARIMA model
    model = ARIMA(timeseries_df['pct_change'].dropna(), order=(1, 1, 1))
    results = model.fit()

    # Predict the next day's percentage change
    forecast = results.forecast(steps=1)
    predicted_pct_change = forecast.values[0]

    print(f"Predicted percentage change for next day: {predicted_pct_change:.6f}")

arima(btc)
arima(eth)
arima(xrp)

  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-invertible starting MA parameters found.'
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)


Predicted percentage change for next day: -0.007978
Predicted percentage change for next day: -0.005279
Predicted percentage change for next day: -0.006455


## Prepare data for LLM

In [13]:
from io import StringIO

def convert_to_csv_string(timeseries):

    timeseries = timeseries.reset_index()

    timeseries['pct_change'] = np.round(timeseries['pct_change'], 6)

    # Remove final row
    timeseries = timeseries.iloc[:-1]

    # Convert DataFrame to CSV string
    csv_buffer = StringIO()
    timeseries.to_csv(csv_buffer, index=False)
    csv_string = csv_buffer.getvalue()

    return csv_string


btc_for_llm = convert_to_csv_string(btc)
eth_for_llm = convert_to_csv_string(eth)
xrp_for_llm = convert_to_csv_string(xrp)


In [14]:
print(btc_for_llm)

Date,pct_change
2024-05-31,-0.012778
2024-06-01,0.003193
2024-06-02,0.00066
2024-06-03,0.015545
2024-06-04,0.025623
2024-06-05,0.007299
2024-06-06,-0.004581
2024-06-07,-0.019992
2024-06-08,-0.000531
2024-06-09,0.004938
2024-06-10,-0.001949
2024-06-11,-0.031365
2024-06-12,0.013503
2024-06-13,-0.021758
2024-06-14,-0.011165
2024-06-15,0.002725
2024-06-16,0.006769
2024-06-17,-0.002232
2024-06-18,-0.020297
2024-06-19,-0.00277
2024-06-20,-0.002026
2024-06-21,-0.011298
2024-06-22,0.00244
2024-06-23,-0.016681
2024-06-24,-0.045954
2024-06-25,0.025337
2024-06-26,-0.016073
2024-06-27,0.013049



## Run LLM

Note: This requires having an active local Ollama server connection running and installing the llama3, mistral, and gemma3 models

In [15]:
## Note: Change the date of the prediction/forecast for your own use (here, 2024-06-28)

def predict_timeseries(timeseries):
    output = llm.invoke(f"""
        You are a large language model with time series forecasting capabilities.
        Predict the percent change for the day immediately after the end of the provided time series (2024-06-28).
        Use only your model capabilities, not any other method.
        The data is in the format of a csv file.
        The dataset includes:
        - Date
        - Percent change in the cryptocurrency from the previous day
        Provide only the forecasted percent change for 2024-06-28 as a point estimate. 
        Do not include any other text or context, just the one value:
        {timeseries}
    """)
    return output.strip()


In [16]:
llm = Ollama(model="mistral", temperature=0)

print(predict_timeseries(btc_for_llm))
print(predict_timeseries(eth_for_llm))
print(predict_timeseries(xrp_for_llm))

0.008937 (rounded to four decimal places)
0.0198 (rounded to two decimal places)
0.00895 (rounded to four decimal places)


In [17]:
llm = Ollama(model="llama3", temperature=0)

print(predict_timeseries(btc_for_llm))
print(predict_timeseries(eth_for_llm))
print(predict_timeseries(xrp_for_llm))


0.011345
0.011345
0.005211


In [18]:
llm = Ollama(model="gemma2", temperature=0)

print(predict_timeseries(btc_for_llm))
print(predict_timeseries(eth_for_llm))
print(predict_timeseries(xrp_for_llm))


0.012778
0.012345
0.005678


## Actual values for predicted day

In [19]:
btc_final

Unnamed: 0,Date,pct_change
29,2024-06-28,-0.020853


In [20]:
eth_final

Unnamed: 0,Date,pct_change
29,2024-06-28,-0.020659


In [21]:
xrp_final

Unnamed: 0,Date,pct_change
29,2024-06-28,-0.007943
