In [1]:
import requests
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
import joblib

In [None]:
def fetch_crypto_data(crypto_pair, api_key):
    base_url = "https://api.coingecko.com/api/v3/coins/{}/market_chart"
    crypto_id = crypto_pair.split('/')[0].lower()
    url = base_url.format(crypto_id)
    params = {
        'vs_currency': crypto_pair.split('/')[1].lower(),
        'days': '365',  
        'interval': 'daily'
    }
    headers = {
        'x_cg_demo_api_key': api_key  
    }

    response = requests.get(url, params=params, headers=headers)
    data = response.json()

    if 'prices' not in data:
        print("Error: 'prices' key not found in the response.")
        print("Full response:", data)
        return pd.DataFrame()  

    prices = data['prices']
    df = pd.DataFrame(prices, columns=['Date', 'Close'])
    df['Date'] = pd.to_datetime(df['Date'], unit='ms')
    df['Open'] = df['Close'].shift(1)
    df['High'] = df['Close'].rolling(window=1).max()
    df['Low'] = df['Close'].rolling(window=1).min()
    df = df.dropna()

    return df

In [3]:
def calculate_metrics(data, variable1, variable2):
    data = data.sort_values(by='Date')

    data[f'High_Last_{variable1}_Days'] = data['High'].rolling(window=variable1).max()
    data[f'Days_Since_High_Last_{variable1}_Days'] = (data['Date'] - data['Date'].shift(variable1)).dt.days
    data[f'%_Diff_From_High_Last_{variable1}_Days'] = (data['Close'] - data[f'High_Last_{variable1}_Days']) / data[f'High_Last_{variable1}_Days'] * 100

    data[f'Low_Last_{variable1}_Days'] = data['Low'].rolling(window=variable1).min()
    data[f'Days_Since_Low_Last_{variable1}_Days'] = (data['Date'] - data['Date'].shift(variable1)).dt.days
    data[f'%_Diff_From_Low_Last_{variable1}_Days'] = (data['Close'] - data[f'Low_Last_{variable1}_Days']) / data[f'Low_Last_{variable1}_Days'] * 100

    data[f'High_Next_{variable2}_Days'] = data['High'].shift(-variable2).rolling(window=variable2).max()
    data[f'%_Diff_From_High_Next_{variable2}_Days'] = (data['Close'] - data[f'High_Next_{variable2}_Days']) / data[f'High_Next_{variable2}_Days'] * 100

    data[f'Low_Next_{variable2}_Days'] = data['Low'].shift(-variable2).rolling(window=variable2).min()
    data[f'%_Diff_From_Low_Next_{variable2}_Days'] = (data['Close'] - data[f'Low_Next_{variable2}_Days']) / data[f'Low_Next_{variable2}_Days'] * 100

    data = data.dropna()

    return data

In [4]:
def train_model(X, y_high, y_low):
    X_train, X_test, y_high_train, y_high_test, y_low_train, y_low_test = train_test_split(
        X, y_high, y_low, test_size=0.2, random_state=42
    )

    model_high = RandomForestRegressor(n_estimators=100, random_state=42)
    model_low = RandomForestRegressor(n_estimators=100, random_state=42)

    model_high.fit(X_train, y_high_train)
    model_low.fit(X_train, y_low_train)

    y_high_pred = model_high.predict(X_test)
    y_low_pred = model_low.predict(X_test)

    high_mae = mean_absolute_error(y_high_test, y_high_pred)
    low_mae = mean_absolute_error(y_low_test, y_low_pred)

    print(f'Mean Absolute Error for High: {high_mae}')
    print(f'Mean Absolute Error for Low: {low_mae}')

    joblib.dump(model_high, 'model_high.pkl')
    joblib.dump(model_low, 'model_low.pkl')

    return model_high, model_low

In [None]:
def predict_outcomes(model_high, model_low, input_features):
    model_high = joblib.load('model_high.pkl')
    model_low = joblib.load('model_low.pkl')

    high_pred = model_high.predict([input_features])
    low_pred = model_low.predict([input_features])

    return high_pred[0], low_pred[0]


In [None]:
api_key = 'CG-jwaGUjTXLRyZRYwvERN9hNJT'  
crypto_pair = "bitcoin/usd"

df = fetch_crypto_data(crypto_pair, api_key)

if df.empty:
    print("Failed to fetch data.")
else:
    print("Fetched Data:")
    print(df.head())  

    variable1 = 7
    variable2 = 5

    metrics_df = calculate_metrics(df, variable1, variable2)

    print("Metrics DataFrame Columns:")
    print(metrics_df.columns)


Fetched Data:
        Date         Close          Open          High           Low
1 2023-11-04  34731.381369  34924.055450  34731.381369  34731.381369
2 2023-11-05  35048.407835  34731.381369  35048.407835  35048.407835
3 2023-11-06  35061.928749  35048.407835  35061.928749  35061.928749
4 2023-11-07  35031.268882  35061.928749  35031.268882  35031.268882
5 2023-11-08  35436.537630  35031.268882  35436.537630  35436.537630
Metrics DataFrame Columns:
Index(['Date', 'Close', 'Open', 'High', 'Low', 'High_Last_7_Days',
       'Days_Since_High_Last_7_Days', '%_Diff_From_High_Last_7_Days',
       'Low_Last_7_Days', 'Days_Since_Low_Last_7_Days',
       '%_Diff_From_Low_Last_7_Days', 'High_Next_5_Days',
       '%_Diff_From_High_Next_5_Days', 'Low_Next_5_Days',
       '%_Diff_From_Low_Next_5_Days'],
      dtype='object')


In [None]:
with pd.ExcelWriter("C:/Users/DELL/Desktop/crypto_data.xlsx") as writer:
      df.to_excel(writer, sheet_name='Raw Data', index=False)
      metrics_df.to_excel(writer, sheet_name='Metrics Data', index=False)

print("Data and metrics saved to crypto_data.xlsx.")

Data and metrics saved to crypto_data.xlsx.


In [None]:
features = [
    f'Days_Since_High_Last_{variable1}_Days',
    f'%_Diff_From_High_Last_{variable1}_Days',
    f'Days_Since_Low_Last_{variable1}_Days',
    f'%_Diff_From_Low_Last_{variable1}_Days'
]

target_high = f'%_Diff_From_High_Next_{variable2}_Days'
target_low = f'%_Diff_From_Low_Next_{variable2}_Days'

if not metrics_df.empty and target_high in metrics_df.columns and target_low in metrics_df.columns:
    X = metrics_df[features]
    y_high = metrics_df[target_high]
    y_low = metrics_df[target_low]

    if not (y_high.isna().any() or y_low.isna().any()):
        model_high, model_low = train_model(X, y_high, y_low)

        input_features = [1, -0.90, 7, 4.76]
        high_pred, low_pred = predict_outcomes(model_high, model_low, input_features)
        print(f'Predicted % Diff From High Next {variable2} Days: {high_pred}')
        print(f'Predicted % Diff From Low Next {variable2} Days: {low_pred}')


Mean Absolute Error for High: 3.4168248718842222
Mean Absolute Error for Low: 3.714318605770346
Predicted % Diff From High Next 5 Days: -2.543785494598555
Predicted % Diff From Low Next 5 Days: 3.136294445808237


