<a href="https://colab.research.google.com/github/mahesh-tippanu/crypto-predictor/blob/main/crypto_data_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!pip install requests pandas scikit-learn joblib


In [None]:
import requests
import pandas as pd
from datetime import datetime, timedelta

def fetch_crypto_data(network_id, token_id, start_date):
    url = f'https://api.coingecko.com/api/v3/coins/{crypto_id}/market_chart'

    response = requests.get(url)
    if response.status_code != 200:
        raise Exception(f"Failed to fetch data: {response.status_code}")
    data = response.json()['data']

    dates = []
    open_prices = []
    high_prices = []
    low_prices = []
    close_prices = []

    for entry in data:
        date = datetime.strptime(entry['time'], "%Y-%m-%dT%H:%M:%S.%fZ").date()
        if date >= datetime.strptime(start_date, "%Y-%m-%d").date():
            dates.append(date)
            open_prices.append(entry['price']['open'])
            high_prices.append(entry['price']['high'])
            low_prices.append(entry['price']['low'])
            close_prices.append(entry['price']['close'])

    df = pd.DataFrame({
        'Date': dates,
        'Open': open_prices,
        'High': high_prices,
        'Low': low_prices,
        'Close': close_prices
    })
    return df


In [None]:
def calculate_metrics(data, variable1, variable2):
    data['High_Last_{}_Days'.format(variable1)] = data['High'].rolling(window=variable1).max()
    data['Days_Since_High_Last_{}_Days'.format(variable1)] = data.apply(
        lambda row: (row['Date'] - data[data['High'] == row['High_Last_{}_Days'.format(variable1)]]['Date'].max()).days, axis=1
    )
    data['%_Diff_From_High_Last_{}_Days'.format(variable1)] = (
        (data['Close'] - data['High_Last_{}_Days'.format(variable1)]) / data['High_Last_{}_Days'.format(variable1)]) * 100

    data['Low_Last_{}_Days'.format(variable1)] = data['Low'].rolling(window=variable1).min()
    data['Days_Since_Low_Last_{}_Days'.format(variable1)] = data.apply(
        lambda row: (row['Date'] - data[data['Low'] == row['Low_Last_{}_Days'.format(variable1)]]['Date'].max()).days, axis=1
    )
    data['%_Diff_From_Low_Last_{}_Days'.format(variable1)] = (
        (data['Close'] - data['Low_Last_{}_Days'.format(variable1)]) / data['Low_Last_{}_Days'.format(variable1)]) * 100

    data['High_Next_{}_Days'.format(variable2)] = data['High'].shift(-variable2).rolling(window=variable2).max()
    data['%_Diff_From_High_Next_{}_Days'.format(variable2)] = (
        (data['Close'] - data['High_Next_{}_Days'.format(variable2)]) / data['High_Next_{}_Days'.format(variable2)]) * 100

    data['Low_Next_{}_Days'.format(variable2)] = data['Low'].shift(-variable2).rolling(window=variable2).min()
    data['%_Diff_From_Low_Next_{}_Days'.format(variable2)] = (
        (data['Close'] - data['Low_Next_{}_Days'.format(variable2)]) / data['Low_Next_{}_Days'.format(variable2)]) * 100

    return data


In [None]:
import requests
import pandas as pd
from datetime import datetime, timedelta

def fetch_crypto_data(crypto_id, vs_currency, days):
    """
    Fetches daily cryptocurrency price data from CoinGecko API.

    Args:
        crypto_id (str): The CoinGecko ID for the cryptocurrency (e.g., 'bitcoin').
        vs_currency (str): The currency in which price data is desired (e.g., 'usd').
        days (str): Number of days to fetch data for, e.g., '30' for the last 30 days or 'max' for all available data.

    Returns:
        pd.DataFrame: A pandas DataFrame containing the date, open, high, low, and close prices.
    """

    url = f'https://api.coingecko.com/api/v3/coins/{crypto_id}/market_chart'
    params = {'vs_currency': vs_currency, 'days': days}

    response = requests.get(url, params=params)
    if response.status_code != 200:
        raise Exception(f"Failed to fetch data: {response.status_code}")

    data = response.json()['prices']
    dates, open_prices, high_prices, low_prices, close_prices = [], [], [], [], []

    for entry in data:
        dates.append(datetime.fromtimestamp(entry[0] / 1000).date())
        close_prices.append(entry[1])

    # Assuming open, high, and low are the same as close here.
    open_prices = close_prices
    high_prices = close_prices
    low_prices = close_prices

    df = pd.DataFrame({
        'Date': dates,
        'Open': open_prices,
        'High': high_prices,
        'Low': low_prices,
        'Close': close_prices
    })

    return df

# Fetch data
crypto_data = fetch_crypto_data("bitcoin", "usd", "30")

# Save to Excel file
file_path = '/content/sample_data/crypto_data.xlsx'  # Update with the actual file path
crypto_data.to_excel(file_path, index=False)

print(f"Crypto data saved to {file_path}")


Crypto data saved to /content/sample_data/crypto_data.xlsx


In [None]:
#pip install requests pandas scikit-learn joblib


In [None]:
import requests
import pandas as pd
from datetime import datetime

# Function to retrieve cryptocurrency data
def fetch_crypto_data(crypto_id, start_date):
    url = f'https://api.coingecko.com/api/v3/coins/{crypto_id}/market_chart'
    params = {
        'vs_currency': 'usd',
        'days': 'max',
        'interval': 'daily'
    }
    response = requests.get(url, params=params)

    if response.status_code != 200:
        raise Exception(f"Failed to fetch data: {response.status_code}")

    data = response.json().get('prices', [])

    # Prepare data lists
    dates, open_prices, high_prices, low_prices, close_prices = [], [], [], [], []

    for entry in data:
        date = datetime.utcfromtimestamp(entry[0] / 1000).strftime('%Y-%m-%d')
        if date >= start_date:
            dates.append(date)
            open_prices.append(entry[1])
            high_prices.append(entry[1])
            low_prices.append(entry[1])
            close_prices.append(entry[1])

    # Create DataFrame
    df = pd.DataFrame({
        'Date': dates,
        'Open': open_prices,
        'High': high_prices,
        'Low': low_prices,
        'Close': close_prices
    })

    return df

# Function to calculate metrics
def calculate_metrics(data, variable1, variable2):
    data[f'High_Last_{variable1}_Days'] = data['High'].rolling(window=variable1).max()
    data[f'Days_Since_High_Last_{variable1}_Days'] = data.apply(
        lambda row: (row['Date'] - data[data['High'] == row[f'High_Last_{variable1}_Days']]['Date'].max()).days,
        axis=1
    )
    data[f'%_Diff_From_High_Last_{variable1}_Days'] = (
        (data['Close'] - data[f'High_Last_{variable1}_Days']) / data[f'High_Last_{variable1}_Days']
    ) * 100

    data[f'Low_Last_{variable1}_Days'] = data['Low'].rolling(window=variable1).min()
    data[f'Days_Since_Low_Last_{variable1}_Days'] = data.apply(
        lambda row: (row['Date'] - data[data['Low'] == row[f'Low_Last_{variable1}_Days']]['Date'].max()).days,
        axis=1
    )
    data[f'%_Diff_From_Low_Last_{variable1}_Days'] = (
        (data['Close'] - data[f'Low_Last_{variable1}_Days']) / data[f'Low_Last_{variable1}_Days']
    ) * 100

    data[f'High_Next_{variable2}_Days'] = data['High'].shift(-variable2).rolling(window=variable2).max()
    data[f'%_Diff_From_High_Next_{variable2}_Days'] = (
        (data['Close'] - data[f'High_Next_{variable2}_Days']) / data[f'High_Next_{variable2}_Days']
    ) * 100

    data[f'Low_Next_{variable2}_Days'] = data['Low'].shift(-variable2).rolling(window=variable2).min()
    data[f'%_Diff_From_Low_Next_{variable2}_Days'] = (
        (data['Close'] - data[f'Low_Next_{variable2}_Days']) / data[f'Low_Next_{variable2}_Days']
    ) * 100

    return data


In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
import joblib

def train_model(data, variable1, variable2):
    features = [
        f'Days_Since_High_Last_{variable1}_Days',
        f'%_Diff_From_High_Last_{variable1}_Days',
        f'Days_Since_Low_Last_{variable1}_Days',
        f'%_Diff_From_Low_Last_{variable1}_Days'
    ]
    target_high = f'%_Diff_From_High_Next_{variable2}_Days'
    target_low = f'%_Diff_From_Low_Next_{variable2}_Days'

    X = data[features].dropna()
    y_high = data[target_high].dropna()
    y_low = data[target_low].dropna()

    X_train, X_test, y_train_high, y_test_high, y_train_low, y_test_low = train_test_split(
        X, y_high, y_low, test_size=0.2, random_state=42
    )

    model_high = RandomForestRegressor(n_estimators=100, random_state=42)
    model_low = RandomForestRegressor(n_estimators=100, random_state=42)

    model_high.fit(X_train, y_train_high)
    model_low.fit(X_train, y_train_low)

    predictions_high = model_high.predict(X_test)
    predictions_low = model_low.predict(X_test)
    mae_high = mean_absolute_error(y_test_high, predictions_high)
    mae_low = mean_absolute_error(y_test_low, predictions_low)

    print(f'MAE for High Predictions: {mae_high}')
    print(f'MAE for Low Predictions: {mae_low}')

    joblib.dump(model_high, 'model_high.pkl')
    joblib.dump(model_low, 'model_low.pkl')

    return model_high, model_low

def predict_outcomes(model_high, model_low, input_data):
    high_prediction = model_high.predict([input_data])
    low_prediction = model_low.predict([input_data])
    return high_prediction[0], low_prediction[0]


In [None]:
from IPython import get_ipython
from IPython.display import display
import requests
import pandas as pd
from datetime import datetime
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
import joblib

# Fetch cryptocurrency data
def fetch_crypto_data(crypto_id, vs_currency, days):
    url = f'https://api.coingecko.com/api/v3/coins/{crypto_id}/market_chart'
    params = {
        'vs_currency': vs_currency,
        'days': days
    }
    response = requests.get(url, params=params)
    if response.status_code != 200:
        raise Exception(f"Failed to fetch data: {response.status_code}")

    data = response.json()['prices']
    dates, close_prices = [], []
    for entry in data:
        dates.append(datetime.fromtimestamp(entry[0] / 1000).date())
        close_prices.append(entry[1])

    df = pd.DataFrame({
        'Date': dates,
        'Open': close_prices,
        'High': close_prices,
        'Low': close_prices,
        'Close': close_prices
    })
    return df

# Calculate metrics
def calculate_metrics(data, variable1, variable2):
    data['High_Last_{}_Days'.format(variable1)] = data['High'].rolling(window=variable1).max()
    data['Days_Since_High_Last_{}_Days'.format(variable1)] = data.apply(
        lambda row: (row['Date'] - data[data['High'] == row['High_Last_{}_Days'.format(variable1)]]['Date'].max()).days if pd.notna(row['High_Last_{}_Days'.format(variable1)]) else None, axis=1
    )
    data['%_Diff_From_High_Last_{}_Days'.format(variable1)] = (
        (data['Close'] - data['High_Last_{}_Days'.format(variable1)]) / data['High_Last_{}_Days'.format(variable1)]) * 100

    data['Low_Last_{}_Days'.format(variable1)] = data['Low'].rolling(window=variable1).min()
    data['Days_Since_Low_Last_{}_Days'.format(variable1)] = data.apply(
        lambda row: (row['Date'] - data[data['Low'] == row['Low_Last_{}_Days'.format(variable1)]]['Date'].max()).days if pd.notna(row['Low_Last_{}_Days'.format(variable1)]) else None, axis=1
    )
    data['%_Diff_From_Low_Last_{}_Days'.format(variable1)] = (
        (data['Close'] - data['Low_Last_{}_Days'.format(variable1)]) / data['Low_Last_{}_Days'.format(variable1)]) * 100

    data['High_Next_{}_Days'.format(variable2)] = data['High'].shift(-variable2).rolling(window=variable2).max()
    data['%_Diff_From_High_Next_{}_Days'.format(variable2)] = (
        (data['Close'] - data['High_Next_{}_Days'.format(variable2)]) / data['High_Next_{}_Days'.format(variable2)]) * 100

    data['Low_Next_{}_Days'.format(variable2)] = data['Low'].shift(-variable2).rolling(window=variable2).min()
    data['%_Diff_From_Low_Next_{}_Days'.format(variable2)] = (
        (data['Close'] - data['Low_Next_{}_Days'.format(variable2)]) / data['Low_Next_{}_Days'.format(variable2)]) * 100

    return data

# Train model
def train_model(data, variable1, variable2):
    features = [
        f'Days_Since_High_Last_{variable1}_Days',
        f'%_Diff_From_High_Last_{variable1}_Days',
        f'Days_Since_Low_Last_{variable1}_Days',
        f'%_Diff_From_Low_Last_{variable1}_Days',
        f'High_Next_{variable2}_Days',
        f'Low_Next_{variable2}_Days',
        f'%_Diff_From_High_Next_{variable2}_Days',
        f'%_Diff_From_Low_Next_{variable2}_Days'
    ]

    # Filter data to avoid NaNs from rolling operations
    data = data.dropna(subset=features + [f'High_Next_{variable2}_Days', f'Low_Next_{variable2}_Days'])

    X = data[features]
    y = data['Close']  # Target variable

    # Split the dataset into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Initialize and train the model
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    # Make predictions and evaluate the model
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)

    print(f'Mean Absolute Error: {mae}')

    # Save the model
    joblib.dump(model, 'crypto_price_predictor.joblib')

    return model

# Example usage
if __name__ == "__main__":
    data = fetch_crypto_data('bitcoin', 'usd', 30)  # Fetch last 30 days of Bitcoin data
    data = calculate_metrics(data, 14, 7)  # Calculate metrics with look-back of 14 days and forecast of 7 days
    model = train_model(data, 14, 7)  # Train the model


Mean Absolute Error: 75.38643143888191


In [None]:
from google.colab import files
uploaded = files.upload()

Saving ml_model.py to ml_model (1).py


In [17]:
from ml_model import train_model, predict_outcomes

# Train the model for high and low prices
model_high, model_low = train_model(data, 14, 7)

input_features = [10, 5.0, -5, 9.0]

# Make predictions
predicted_high, predicted_low = predict_outcomes(model_high, model_low, input_features)


print(f"Predicted High: {predicted_high}")
print(f"Predicted Low: {predicted_low}")


Mean Absolute Error for High: 0.5629203983271615
Mean Absolute Error for Low: 0.5197922178737594
Predicted High: -0.24962587033027564
Predicted Low: 0.23746344667725736
