In [1]:
pip install ta

Collecting ta
  Using cached ta-0.11.0.tar.gz (25 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: ta
  Building wheel for ta (setup.py): started
  Building wheel for ta (setup.py): finished with status 'done'
  Created wheel for ta: filename=ta-0.11.0-py3-none-any.whl size=29421 sha256=15054b2a50ea6aa54f4788d0635ef917eaf16b5b6014b43283f167d61f6dc33c
  Stored in directory: c:\users\solent\appdata\local\pip\cache\wheels\5c\a1\5f\c6b85a7d9452057be4ce68a8e45d77ba34234a6d46581777c6
Successfully built ta
Installing collected packages: ta
Successfully installed ta-0.11.0
Note: you may need to restart the kernel to use updated packages.


In [26]:
import pandas as pd
from ta.trend import SMAIndicator, MACD
from ta.momentum import RSIIndicator

# read the first two rows to get column names
with open("crypto_4.csv", "r") as file:
    price_row = file.readline().strip().split(",")[1:]  # Skip the first empty cell
    ticker_row = file.readline().strip().split(",")[1:]  # Skip "Ticker"

# Create proper column names by combining Price and Ticker rows
column_names = [f"{price} {ticker}" for price, ticker in zip(price_row, ticker_row)]

# Now load the data, starting from the Date row
data = pd.read_csv("crypto_4.csv", skiprows=2, index_col=0, parse_dates=True)
data.columns = column_names  # Assign the correct column names
print("Column names in crypto_4.csv after renaming:")
print(data.columns.tolist())

# Define tickers
tickers = ["BNB-USD", "BTC-USD", "ETH-USD", "XRP-USD"]

# Process each ticker separately
for ticker in tickers:
    # Select columns for this ticker
    df = data[[f"Open {ticker}", f"High {ticker}", f"Low {ticker}", 
               f"Close {ticker}", f"Volume {ticker}"]].copy()
    df.columns = ["Open", "High", "Low", "Close", "Volume"]  # Rename for simplicity
    
    # Clean: Fill missing values and drop NaNs
    df.ffill(inplace=True)
    df.dropna(inplace=True)
    
    # Feature Engineering
    df["Lag1"] = df["Close"].shift(1)
    df["Lag7"] = df["Close"].shift(7)
    df["SMA7"] = SMAIndicator(df["Close"], window=7).sma_indicator()
    df["RSI14"] = RSIIndicator(df["Close"], window=14).rsi()
    df["MACD"] = MACD(df["Close"], window_slow=26, window_fast=12).macd()
    df["Returns"] = df["Close"].pct_change()
    df["Target"] = df["Close"].shift(-1)
    
    # Drop rows with NaN 
    df.dropna(inplace=True)
    
    # Save processed data
    df.to_csv(f"{ticker}_processed.csv")
    print(f"Processed {ticker}: {df.shape} rows, columns: {list(df.columns)}")


print("\nBTC-USD Sample:")
print(pd.read_csv("BTC-USD_processed.csv").tail())

Column names in crypto_data.csv after renaming:
['Close BNB-USD', 'Close BTC-USD', 'Close ETH-USD', 'Close XRP-USD', 'High BNB-USD', 'High BTC-USD', 'High ETH-USD', 'High XRP-USD', 'Low BNB-USD', 'Low BTC-USD', 'Low ETH-USD', 'Low XRP-USD', 'Open BNB-USD', 'Open BTC-USD', 'Open ETH-USD', 'Open XRP-USD', 'Volume BNB-USD', 'Volume BTC-USD', 'Volume ETH-USD', 'Volume XRP-USD']
Processed BNB-USD: (705, 12) rows, columns: ['Open', 'High', 'Low', 'Close', 'Volume', 'Lag1', 'Lag7', 'SMA7', 'RSI14', 'MACD', 'Returns', 'Target']
Processed BTC-USD: (705, 12) rows, columns: ['Open', 'High', 'Low', 'Close', 'Volume', 'Lag1', 'Lag7', 'SMA7', 'RSI14', 'MACD', 'Returns', 'Target']
Processed ETH-USD: (705, 12) rows, columns: ['Open', 'High', 'Low', 'Close', 'Volume', 'Lag1', 'Lag7', 'SMA7', 'RSI14', 'MACD', 'Returns', 'Target']
Processed XRP-USD: (705, 12) rows, columns: ['Open', 'High', 'Low', 'Close', 'Volume', 'Lag1', 'Lag7', 'SMA7', 'RSI14', 'MACD', 'Returns', 'Target']

BTC-USD Sample:
          