In [None]:
import pandas as pd
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import ccxt

# Function to fetch historical OHLCV data
def fetch_ohlcv(symbol, timeframe, limit):
    exchange = ccxt.binance()  # Change this based on your exchange
    ohlcv = exchange.fetch_ohlcv(symbol, timeframe, limit=limit)
    df = pd.DataFrame(ohlcv, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
    df.set_index('timestamp', inplace=True)
    return df

# Function to add technical indicators
def add_technical_indicators(df):
    # Calculate RSI

    close_diff = df['close'].diff(1)
    # Calculate gains and losses
    gains = close_diff.where(close_diff > 0, 0)
    losses = -close_diff.where(close_diff < 0, 0)
    # Smoothed average gains and losses
    avg_gain = gains.rolling(window=14, min_periods=1).mean()
    avg_loss = losses.rolling(window=14, min_periods=1).mean()
    # Calculate relative strength (RS)
    rs = avg_gain / avg_loss
    df['rsi'] = 100 - (100 / (1 + rs))

    # Calculate MACD
    df['ema12'] = df['close'].ewm(span=12, adjust=False).mean()
    df['ema26'] = df['close'].ewm(span=26, adjust=False).mean()
    df['macd'] = df['ema12'] - df['ema26']

    # Calculate Bollinger Bands
    df['sma20'] = df['close'].rolling(window=20).mean()
    df['upper_band'] = df['sma20'] + 2 * df['close'].rolling(window=20).std()
    df['lower_band'] = df['sma20'] - 2 * df['close'].rolling(window=20).std()
    # Calculate Bollinger Bands Width
    df['bollinger_width'] = (df['upper_band'] - df['lower_band']) / df['sma20'] * 100

    # Calculate Force Index
    df['force_index'] = df['close'].diff(1) * df['volume']

# Fetch data
symbol = 'BTC/USDT'
timeframe = '1s'
limit = 1000
data = fetch_ohlcv(symbol, timeframe, limit)

# Add technical indicators
add_technical_indicators(data)

# Save to CSV
data.to_csv('btc_usdt.csv')

In [None]:
data.dropna()

Unnamed: 0_level_0,open,high,low,close,volume,rsi,ema12,ema26,macd,sma20,upper_band,lower_band,bollinger_width,force_index
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2023-10-15 21:33:40,27200.84,27200.85,27200.84,27200.85,0.02268,50.000000,27200.777312,27200.500817,0.276494,27200.7070,27201.556572,27199.857428,0.006247,0.000000
2023-10-15 21:33:41,27200.84,27200.85,27200.84,27200.85,0.02434,57.142857,27200.788495,27200.526683,0.261812,27200.7760,27201.395589,27200.156411,0.004556,0.000000
2023-10-15 21:33:42,27200.84,27200.84,27200.84,27200.84,0.00859,42.857143,27200.796419,27200.549891,0.246527,27200.8450,27200.855260,27200.834740,0.000075,-0.000086
2023-10-15 21:33:43,27200.84,27200.84,27200.84,27200.84,0.01191,42.857143,27200.803123,27200.571381,0.231742,27200.8450,27200.855260,27200.834740,0.000075,0.000000
2023-10-15 21:33:44,27200.84,27200.85,27200.84,27200.85,0.00933,50.000000,27200.810335,27200.592019,0.218316,27200.8455,27200.855708,27200.835292,0.000075,0.000093
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-10-15 21:49:56,27173.28,27173.28,27173.27,27173.27,0.76721,0.000000,27173.270368,27173.270992,-0.000624,27173.2715,27173.278828,27173.264172,0.000054,0.000000
2023-10-15 21:49:57,27173.27,27173.28,27173.27,27173.28,0.02052,100.000000,27173.271850,27173.271659,0.000191,27173.2715,27173.278828,27173.264172,0.000054,0.000205
2023-10-15 21:49:58,27173.28,27173.28,27173.28,27173.28,0.00073,100.000000,27173.273104,27173.272277,0.000827,27173.2720,27173.280208,27173.263792,0.000060,0.000000
2023-10-15 21:49:59,27173.27,27173.28,27173.27,27173.27,0.07406,50.000000,27173.272626,27173.272108,0.000518,27173.2715,27173.278828,27173.264172,0.000054,-0.000741


In [None]:
# Load the data
data = pd.read_csv('btc_usdt.csv', index_col='timestamp').dropna()

# Create binary labels indicating price movement (1 for price increase, 0 for price decrease or no change)
data['price_movement'] = (data['close'].shift(-1) > data['close']).astype(int)

# Select features for logistic regression
features = ['rsi', 'macd', 'ema12','ema26','sma20','force_index', 'bollinger_width']

# Prepare data
X = data[features].values
y = data['price_movement'].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train logistic regression model
model = LogisticRegression()
model.fit(X_train_scaled, y_train)

# Predict on the test set
y_pred = model.predict(X_test_scaled)

# Evaluate the model accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Model Accuracy: {accuracy * 100:.2f}%')

# Get feature importance
feature_importance = model.coef_[0]
feature_importance_dict = {feature: importance for feature, importance in zip(features, feature_importance)}

top_features = sorted(feature_importance_dict.items(), key=lambda x: abs(x[1]), reverse=True)[:]
print('\nTop Features:')
for feature, importance in top_features:
    print(f'{feature}: {importance:.4f}')

Model Accuracy: 73.60%

Top Features:
macd: 0.2488
ema26: -0.2458
ema12: -0.2242
force_index: 0.1326
bollinger_width: -0.0937
sma20: 0.0659
rsi: -0.0282
