In [None]:
import os

# ==========================================
# FULL README CONTENT
# ==========================================
readme_text = """
# ðŸ“˜ Crypto Buy/Sell Classification Capstone
### End-to-End Machine Learning Pipeline Using Real Binance Data

**Status:** Completed
**UI:** Streamlit Dashboard Included

---

## 1. Project Overview

This project implements a fully automated Machine Learning pipeline that predicts cryptocurrency market movements (Buy, Sell, or Hold). It fetches historical data from the **Binance API**, processes technical indicators, trains multiple ML models (XGBoost, Random Forest, CatBoost, etc.), and evaluates them to select the best performer.

### Key Features
*   **Real-time Data:** Fetches live/historical OHLCV data directly from Binance.
*   **Feature Engineering:** Calculates RSI, MACD, Bollinger Bands, and Moving Averages.
*   **Dynamic Labeling:** Solves class imbalance using volatility-based thresholds.
*   **Model Arena:** Trains 5 different algorithms and automatically promotes the winner.
*   **Interactive UI:** A web-based dashboard to visualize charts and AI signals.

---

## 2. Project Structure

```text
crypto-classifier/
â”‚â”€â”€ data/
â”‚   â”œâ”€â”€ raw/                  # raw_data.csv
â”‚   â”œâ”€â”€ processed/            # processed_data.csv
â”‚   â”œâ”€â”€ feature_engineered/   # feature_engineered_data.csv
â”‚   â”œâ”€â”€ labeled/              # labeled_data.csv
â”‚
â”‚â”€â”€ notebooks/
â”‚   â”œâ”€â”€ 01_fetch_data.ipynb
â”‚   â”œâ”€â”€ 02_feature_engineering.ipynb
â”‚   â”œâ”€â”€ 03_model_training.ipynb
â”‚   â”œâ”€â”€ 04_evaluation.ipynb
â”‚   â”œâ”€â”€ 05_binance_prediction.ipynb
â”‚
â”‚â”€â”€ src/
â”‚   â”œâ”€â”€ data_fetcher.py       # Fetches from Binance
â”‚   â”œâ”€â”€ data_processor.py     # Cleans data types
â”‚   â”œâ”€â”€ feature_generator.py  # Calculates Indicators (RSI, MACD)
â”‚   â”œâ”€â”€ labeler.py            # Generates Targets (Buy/Sell)
â”‚   â”œâ”€â”€ train.py              # Trains all models
â”‚   â”œâ”€â”€ evaluate.py           # Evaluates and picks winner
â”‚   â”œâ”€â”€ predict.py            # Prediction Logic
â”‚   â”œâ”€â”€ app.py                # Streamlit UI
â”‚
â”‚â”€â”€ models/                   # Saved .pkl models
â”‚â”€â”€ README.md
â”‚â”€â”€ requirements.txt

---


3. How to Run the Pipeline
You can run the entire workflow from the terminal using these commands in order:
Phase 1: Data Engineering
code
Bash
# 1. Fetch raw data
python src/data_fetcher.py

# 2. Clean data types
python src/data_processor.py

# 3. Generate Technical Indicators
python src/feature_generator.py

# 4. Generate Targets (Dynamic Imbalance Fix)
python src/labeler.py
Phase 2: Machine Learning
code
Bash
# 5. Train Model Zoo (XGBoost, CatBoost, RF, etc.)
python src/train.py

# 6. Evaluate & Select Best Model
python src/evaluate.py
Phase 3: User Interface
To launch the interactive dashboard:
code
Bash
streamlit run src/app.py
4. Notebooks Guide
01_fetch_data.ipynb: ETL process visualization.
02_feature_engineering.ipynb: Visualization of RSI, MACD, and Price.
03_model_training.ipynb: Training logs and validation scores.
04_evaluation.ipynb: Confusion matrix and performance report of the winner.
05_binance_prediction.ipynb: Testing the model on specific scenarios.
"""
#==========================================
#WRITE FILE TO DISK
#==========================================
#Determine path (handles running from notebooks folder or root)
if os.path.exists("../src"):
# Running inside 'notebooks/' folder, go up one level
    readme_path = "../README.md"
else:
#    Running inside root folder
    readme_path = "README.md"
#Write the file
with open(readme_path, "w", encoding="utf-8") as f:
    f.write(readme_text)
print(f"âœ… README.md successfully created at: {os.path.abspath(readme_path)}")

âœ… README.md successfully created at: c:\Users\atien\Downloads\Data Science Class\Capstone_Project\Crypto_Classifier\README.md


In [None]:
import os

gitignore_content = """
# Ignore Virtual Environment
myVENV/
venv/
env/
__pycache__/
*.pyc

# Ignore Dataset (Too large for GitHub)
data/raw/*.csv
data/processed/*.csv
data/feature_engineered/*.csv
data/labeled/*.csv

# Ignore System Files
.DS_Store
.ipynb_checkpoints/
"""

# Write the file to the root folder
with open("gitignore.txt", "w") as f:
    f.write(gitignore_content)

# Rename it to .gitignore (Windows sometimes hides dotfiles, so we do it via python)
if os.path.exists(".gitignore"):
    os.remove(".gitignore")
os.rename("gitignore.txt", ".gitignore")

print("âœ… .gitignore file created successfully!")

âœ… .gitignore file created successfully!
