In [None]:

# ========== SETTINGS ==========
# Central configuration cell for defining imports, inputs, outputs, and settings.

# ========== 1. Imports ==========
import os
import pandas as pd
import json
import yfinance as yf
from datetime import datetime
from tqdm import tqdm

# ========== 2. Configurable Settings ==========
SETTINGS = {
    "DEBUG": True,                 # If True, print debug statements; if False, suppress debug output
    "DATA_SOURCE": "yfinance",     # Source of historical data (yfinance or other API)
    "SAVE_FORMAT": "json",         # Format to save the collected data
}

# ========== 3. Input Paths ==========
# Define paths for any input files or folders
DATA_SOURCE_PATH = '1_data/collected/'  # Directory for data collection source files
MODEL_INPUT_PATH = '3_models/'          # Path for loading existing models if needed

# ========== 4. Output Paths ==========
# Define paths for saving different types of outputs
DATA_OUTPUT_DIR = '1_data/collected/'              # Directory to save collected Bitcoin data
MODEL_OUTPUT_DIR = '3_models/'                     # Directory to save trained models
LOGS_OUTPUT_DIR = '6_logs/'                        # Directory for logging outputs
REPORTS_OUTPUT_DIR = '7_reports/'                  # Directory for performance and analytics reports
TROUBLESHOOT_OUTPUT_FILE = 'BitBot_Notebook_Updated.ipynb'  # Path for saving updated notebook

# Ensure output directories exist
for path in [DATA_OUTPUT_DIR, MODEL_OUTPUT_DIR, LOGS_OUTPUT_DIR, REPORTS_OUTPUT_DIR]:
    os.makedirs(path, exist_ok=True)

# ========== 5. Available Documents for Selection ==========
# List of documents for user selection in troubleshooting sessions
AVAILABLE_DOCUMENTS = [
    "README.md",
    "BitBot_Notebook.ipynb",
    "BitBot_Notebook_Updated.ipynb"
]

# Display paths and output file formats for reference
print("Settings initialized. Key directories and files:")
print(f"Data collection output directory: {DATA_OUTPUT_DIR}bitcoin_data_*.json")  # '*' indicates iterative naming
print(f"Model output directory: {MODEL_OUTPUT_DIR}model_name.joblib")
print(f"Logs directory: {LOGS_OUTPUT_DIR}log_file.log")
print(f"Reports directory: {REPORTS_OUTPUT_DIR}report_*.csv")  # For performance reports
print(f"Troubleshooter output file: {TROUBLESHOOT_OUTPUT_FILE}")

print("\nSetup complete. Ready to collect data and troubleshoot BitBot!")


Setup complete. Ready to collect data for BitBot!


In [None]:
# ========== DATA COLLECTION SECTION ==========
# Purpose: This section collects Bitcoin data from Yahoo Finance for multiple timeframes, starting from the earliest available date.
# Each month’s data for each timeframe is saved in JSON format, allowing review before being added to `bitcoin_data_updated`.
# At the end of each year, all the monthly data is reviewed before saving to the main ledger, `bitcoin_data69`.
# If the data is satisfactory, `bitcoin_data69` is updated and verified with an algebra challenge prompt before finalizing.

def collect_bitcoin_data(output_dir):
    """
    Collects Bitcoin data from Yahoo Finance for default timeframes (1m, 15m, 1h, 1d, 1mo).
    Each month’s data is collected and saved as JSON, allowing review before adding to `bitcoin_data_updated`.
    At the end of each year, prompts allow the user to review the year's data before adding it to `bitcoin_data69`.
    
    Parameters:
    - output_dir (str): The directory to save collected data files.

    Returns:
    - collected_data (dict): A dictionary containing collected data for each timeframe by year.
    """

    # Initialize ticker symbol and timeframes for data collection
    ticker = "BTC-USD"
    timeframes = ["1m", "15m", "1h", "1d", "1mo"]  # Timeframes to collect data for
    collected_data = {}  # Dictionary to store data by year and timeframe

    # Step 1: Determine the start date for available Bitcoin data (Yahoo Finance)
    ticker_info = yf.Ticker(ticker)
    hist_data = ticker_info.history(period="max", interval="1mo")
    first_date = hist_data.index[0].strftime('%Y-%m-%d')  # First available trading date
    start_year = int(first_date[:4])  # Extract start year
    current_year = datetime.now().year  # Current year for reference

    # Step 2: Loop through each year from start_year to the present year, collecting monthly data for each timeframe
    for year in range(start_year, current_year + 1):
        yearly_data = {tf: [] for tf in timeframes}  # Dictionary to store data for each timeframe in the current year
        print(f"\n========== Collecting data for year {year} ==========")

        # Step 3: Loop through each month to collect data
        for month in range(1, 13):
            month_str = f"{year}-{month:02d}-01"  # Format month start date
            monthly_data = {}  # Temporary storage for each month's data

            # Loop over each timeframe and collect data month-by-month
            for tf in timeframes:
                try:
                    # Collect data for one month with the specified timeframe
                    data = yf.download(ticker, start=month_str, end=f"{year}-{month + 1:02d}-01", interval=tf)
                    data.reset_index(inplace=True)  # Reset index for easier JSON conversion

                    # Check if data was collected successfully and convert it to JSON format
                    if not data.empty:
                        monthly_json = data.to_json(orient="records", date_format="iso")
                        yearly_data[tf].append(monthly_json)  # Add monthly JSON data to yearly storage

                        # Log collection status
                        print(f"[DEBUG] Collected {tf} data for {year}-{month:02d} with {len(data)} rows")
                except Exception as e:
                    print(f"Error collecting {tf} data for {year}-{month:02d}: {e}")
                    return  # Exit the function if data collection fails for any reason

            # Step 4: Save the collected data to `bitcoin_data_updated.json` after each month
            updated_filename = os.path.join(output_dir, 'bitcoin_data_updated.json')
            with open(updated_filename, 'w') as f:
                json.dump(yearly_data, f)
            print(f"[INFO] Monthly data for {year}-{month:02d} saved to bitcoin_data_updated.json")

            # Prompt user for review after each month
            review_prompt = input("Review this month's data before proceeding. If everything looks good, press Enter. To exit, press Ctrl+C.")
            if review_prompt != "":  # If user exits, stop data collection
                print("Exiting data collection.")
                return

        # Step 5: At the end of each year, prompt user to review the full year's data
        print(f"\n========== End of {year} Data Review ==========")
        year_review = input("Review the entire year’s data before finalizing. If ready to save, press Enter. To exit, press Ctrl+C.")
        
        # Save the entire year’s data to `bitcoin_data69.json` if reviewed and confirmed
        if year_review == "":
            final_filename = os.path.join(output_dir, 'bitcoin_data69.json')
            with open(final_filename, 'w') as f:
                json.dump(yearly_data, f)
            print(f"[INFO] Yearly data for {year} saved to bitcoin_data69.json")

            # Fun math prompt to confirm overwriting the main ledger
            print("Answer a simple math question to confirm this data save!")
            math_question = "What is 5 + 7? "
            if input(math_question) == "12":
                print("[SUCCESS] bitcoin_data69.json has been updated.")
            else:
                print("[WARNING] Incorrect answer! bitcoin_data69.json was not updated. Try again if needed.")
    
    return collected_data  # Return collected data dictionary for reference


In [None]:
# ========== ENHANCED TECHNICAL ANALYSIS FEATURE ENGINEERING ==========
# Generates technical indicators for the input DataFrame and provides feedback on each step.

import pandas as pd
import numpy as np
import os

processed_data_dir = '1_data/2_processed/'
os.makedirs(processed_data_dir, exist_ok=True)

def load_collected_data(data_path):
    """Loads raw collected data from the specified path."""
    with open(data_path, 'r') as f:
        data = pd.read_json(f)
    return data

def generate_technical_indicators(df):
    """Generates a set of technical indicators, with print statements to provide feedback on each addition."""
    print("[INFO] Generating technical indicators...")
    
    # Moving Averages
    df['SMA_20'] = df['Close'].rolling(window=20).mean()  # 20-day Simple Moving Average
    df['SMA_50'] = df['Close'].rolling(window=50).mean()  # 50-day Simple Moving Average
    print("[DEBUG] Added SMA indicators.")
    
    # Relative Strength Index (RSI)
    df['RSI'] = 100 - (100 / (1 + df['Close'].pct_change().rolling(window=14).mean()))
    print("[DEBUG] Added RSI indicator.")
    
    # MACD (Moving Average Convergence Divergence)
    df['MACD'] = df['Close'].ewm(span=12, adjust=False).mean() - df['Close'].ewm(span=26, adjust=False).mean()
    df['MACD_signal'] = df['MACD'].ewm(span=9, adjust=False).mean()  # MACD Signal line
    print("[DEBUG] Added MACD and Signal Line indicators.")
    
    # Bollinger Bands
    df['BB_Middle'] = df['Close'].rolling(window=20).mean()
    df['BB_Upper'] = df['BB_Middle'] + 2 * df['Close'].rolling(window=20).std()
    df['BB_Lower'] = df['BB_Middle'] - 2 * df['Close'].rolling(window=20).std()
    print("[DEBUG] Added Bollinger Bands (Upper, Middle, Lower).")
    
    # Volatility
    df['Volatility'] = df['Close'].rolling(window=20).std()
    print("[DEBUG] Added Volatility indicator.")
    
    # Exponential Moving Averages (EMA)
    df['EMA_12'] = df['Close'].ewm(span=12, adjust=False).mean()
    df['EMA_26'] = df['Close'].ewm(span=26, adjust=False).mean()
    print("[DEBUG] Added EMA indicators (12, 26).")

    print("[INFO] Technical indicators generated successfully.")
    return df.dropna()  # Drop NaNs created by rolling calculations

def process_and_save_data(raw_data_path, output_path):
    """Loads raw data, generates indicators, and saves processed data, with feedback for each step."""
    # Load data
    raw_data = load_collected_data(raw_data_path)
    print(f"[INFO] Loaded raw data with {len(raw_data)} rows.")
    
    # Generate technical indicators
    processed_data = generate_technical_indicators(raw_data)
    print("[INFO] Technical indicators added to data.")
    
    # Save processed data
    processed_data.to_json(output_path, orient="records", date_format="iso")
    print(f"[INFO] Processed data saved to {output_path}.")

# Example usage
# Replace 'collected_data.json' with your actual raw data file name.
raw_data_path = os.path.join(output_dir, 'bitcoin_data.json')
processed_data_path = os.path.join(processed_data_dir, 'processed_bitcoin_data.json')
process_and_save_data(raw_data_path, processed_data_path)


In [None]:
# ========== FUNDAMENTAL ANALYSIS DATA COLLECTION ==========
# Collects fundamental metrics relevant to Bitcoin's network and market health.

import requests
import json
from datetime import datetime

fundamental_data_dir = '1_data/2_fundamental/'
os.makedirs(fundamental_data_dir, exist_ok=True)

def fetch_on_chain_data():
    """Fetches on-chain data from a crypto API (e.g., Glassnode or similar if available) and returns JSON."""
    try:
        # Placeholder URL - Replace with a valid endpoint if available.
        url = "https://api.cryptofundamentals.com/v1/metrics/BTC"
        response = requests.get(url)
        if response.status_code == 200:
            data = response.json()
            print("[INFO] On-chain data fetched successfully.")
            return data
        else:
            print(f"[ERROR] Failed to fetch data: {response.status_code}")
            return {}
    except Exception as e:
        print(f"[ERROR] Exception occurred: {e}")
        return {}

def parse_and_save_fundamental_data(data, output_path):
    """Parses relevant fundamental metrics from raw API data and saves it."""
    try:
        # Example parsing: Assume the API provides specific key metrics
        processed_data = {
            "timestamp": datetime.now().isoformat(),
            "transaction_volume": data.get("transaction_volume", None),
            "active_addresses": data.get("active_addresses", None),
            "hash_rate": data.get("hash_rate", None),
            "difficulty": data.get("difficulty", None),
            "market_cap": data.get("market_cap", None),
            "liquidity_index": data.get("liquidity_index", None)
        }
        
        with open(output_path, 'w') as f:
            json.dump(processed_data, f, indent=4)
        print(f"[INFO] Fundamental data saved to {output_path}.")
    except Exception as e:
        print(f"[ERROR] Error processing and saving data: {e}")

# Run data collection and saving
on_chain_data = fetch_on_chain_data()
fundamental_output_path = os.path.join(fundamental_data_dir, 'bitcoin_fundamental_data.json')
parse_and_save_fundamental_data(on_chain_data, fundamental_output_path)


In [None]:
# ========== ITERATIVE TROUBLESHOOTING CELL ==========
# Uses the troubleshooter module for interactive cell improvement suggestions.
# This cell integrates with the settings and data collection structures to enhance compatibility.

# Import the troubleshooter module
from troubleshooter import troubleshoot_cell

def start_troubleshooting():
    """
    Enhanced troubleshooting interface for selecting documents, cells, and actions for iterative refinement.
    Designed to guide the user step-by-step through the troubleshooting process, with compatibility to the updated settings.
    """
    print("========== TROUBLESHOOTING INTERFACE ==========")
    
    # Step 1: Document Selection
    print("\nStep 1: Select the document(s) to include in this troubleshooting session:")
    available_docs = AVAILABLE_DOCUMENTS  # Uses AVAILABLE_DOCUMENTS defined in the settings cell
    for i, doc in enumerate(available_docs, start=1):
        print(f"{i}. {doc}")
    
    selected_docs_input = input("Enter document numbers separated by commas (e.g., 1,2): ").strip()
    selected_docs = [available_docs[int(choice.strip()) - 1] for choice in selected_docs_input.split(",") if choice.strip().isdigit()]
    print(f"\n[INFO] Selected documents for this session: {selected_docs}\n")

    # Step 2: Cell Selection with Optional Preview
    print("Step 2: Select the cells to troubleshoot (enter indices separated by commas).")
    cell_indices = input("Enter the cell indices to troubleshoot (e.g., 1,3,5): ").strip()
    cell_indices = [int(idx.strip()) for idx in cell_indices.split(",") if idx.strip().isdigit()]
    print(f"[INFO] Selected cell indices: {cell_indices}")
    
    # Optional: Preview each cell’s contents
    preview_choice = input("\nWould you like to preview the selected cells' content? (yes/no): ").strip().lower()
    if preview_choice == "yes":
        print("\nDisplaying a preview of the selected cells:")
        # Assuming `notebook` is a loaded variable with the notebook content; adjust if needed
        for idx in cell_indices:
            cell_content = notebook.cells[idx].source[:150]  # Displaying the first 150 characters as preview
            print(f"\n[Cell {idx} Preview]: {cell_content}...")
        print("\n[INFO] End of cell previews.\n")

    # Step 3: Action Selection
    print("Step 3: Choose the primary action for the selected cells:")
    primary_actions = ["Custom", "Create New Cell", "Modify Cell(s)", "Create Documentation"]
    for i, action in enumerate(primary_actions, start=1):
        print(f"{i}. {action}")

    action_choice = input("Enter the number of your action choice: ").strip()
    try:
        action_choice = int(action_choice) - 1
        if action_choice < 0 or action_choice >= len(primary_actions):
            raise ValueError("Invalid action choice.")
    except ValueError:
        print("[ERROR] Invalid choice. Please enter a valid number from the list.")
        return

    # Step 4: Custom Prompt Handling
    if primary_actions[action_choice] == "Custom":
        custom_prompt = input("Enter your custom prompt for Gemini: ")
        use_case = custom_prompt
    else:
        use_case = primary_actions[action_choice]
    
    # Confirm and Execute Troubleshooting
    print("\n========== Confirm and Execute ==========")
    print(f"Action: {use_case}\nSelected Cells: {cell_indices}\nSelected Documents: {selected_docs}")
    confirm = input("Confirm these choices and start troubleshooting? (yes/no): ").strip().lower()
    
    if confirm == "yes":
        # Execute troubleshooting for each selected cell with the chosen action
        for cell_index in cell_indices:
            print(f"\nStarting troubleshooting for cell {cell_index} with action '{use_case}'...")
            troubleshoot_cell(cell_index, action=use_case)  # Call the troubleshooting function with the action
        print("\n[INFO] Troubleshooting completed successfully for selected cells.")
    else:
        print("[INFO] Troubleshooting session canceled by user.")

# Start the troubleshooting interface
start_troubleshooting()


In [None]:

# ========== EXECUTE DATA COLLECTION ==========
# Run the data collection and save results
collected_data = collect_bitcoin_data(output_dir)
print("Data collection complete.")
