In [2]:
# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import warnings
warnings.filterwarnings('ignore')

# Load and optimize data
def load_and_optimize_data():
    df = pd.read_csv('btc_15m_data_2018_to_2025.csv')
    
    # Sample data to hourly timeframe
    df = df.iloc[::4].reset_index(drop=True)
    
    # Select essential features
    essential_cols = ['timestamp', 'open', 'high', 'low', 'close', 'volume']
    df = df[essential_cols]
    
    # Optimize data types
    for col in df.select_dtypes(include=['float64']).columns:
        df[col] = df[col].astype('float32')
    
    # Convert timestamp
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    df.set_index('timestamp', inplace=True)
    
    return df

# Feature Engineering
def add_technical_indicators(df):
    df['MA7'] = df['close'].rolling(window=7).mean()
    df['MA21'] = df['close'].rolling(window=21).mean()
    df['MA50'] = df['close'].rolling(window=50).mean()
    df['Daily_Return'] = df['close'].pct_change()
    df['Volatility'] = df['Daily_Return'].rolling(window=21).std()
    df['RSI'] = calculate_rsi(df['close'])
    return df

def calculate_rsi(data, periods=14):
    delta = data.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=periods).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=periods).mean()
    rs = gain / loss
    return 100 - (100 / (1 + rs))

# Main execution
df = load_and_optimize_data()
df = add_technical_indicators(df)

# Visualize price trends
plt.figure(figsize=(15, 8))
plt.plot(df.index, df['close'], label='Bitcoin Price')
plt.plot(df.index, df['MA21'], label='21-hour MA')
plt.title('Bitcoin Price with Moving Average')
plt.xlabel('Date')
plt.ylabel('Price (USD)')
plt.legend()
plt.show()

# Prepare modeling data
df = df.dropna()
features = ['MA7', 'MA21', 'MA50', 'Daily_Return', 'Volatility', 'RSI']
X = df[features]
y = df['close']

# Split and train
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse:.2f}')
print(f'R2 Score: {r2:.2f}')

# Feature importance analysis
feature_importance = pd.DataFrame({
    'feature': features,
    'importance': model.feature_importances_
})
feature_importance = feature_importance.sort_values('importance', ascending=False)

plt.figure(figsize=(10, 6))
sns.barplot(x='importance', y='feature', data=feature_importance)
plt.title('Feature Importance')
plt.show()

# Generate future predictions
last_30_periods = X.tail(30)
future_pred = model.predict(last_30_periods)
print("\nPredicted prices for next 30 periods:")
print(future_pred)

# Save optimized dataset
df.to_csv('bitcoin_data_optimized.csv.gz', compression='gzip')


ModuleNotFoundError: No module named 'pandas'