<a href="https://colab.research.google.com/github/lionatzion/PursuitofAlpha/blob/main/PursuitMaprun.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
import os, textwrap, zipfile, shutil

# 1. Mount your Google Drive
drive.mount('/content/drive')

# 2. Define paths
project_root = '/content/drive/MyDrive/quant_backtest_ml'
zip_path     = '/content/drive/MyDrive/quant_backtest_ml_scaffold.zip'

# 3. Remove any old project folder, then recreate every subdirectory
if os.path.isdir(project_root):
    shutil.rmtree(project_root)

# List of all subdirectories ('' = project root itself)
subdirs = [
    '',
    'modules',
    'pipelines',
    'data/raw',
    'data/processed',
    'data/altdata',
    'models/hf/finbert',
    'models/hf/finbert-tone',
    'models/trained',
    'notebooks',
    'logs'
]
for sub in subdirs:
    os.makedirs(os.path.join(project_root, sub), exist_ok=True)

# Helper to write text files
def write(path, content):
    with open(path, 'w') as f:
        f.write(textwrap.dedent(content).lstrip())

# 4. Top‐level files
write(f'{project_root}/README.md', """
    # Quant Backtest Machine Learning Project

    This scaffold sets up a modular project for quantitative trading strategies with ML and sentiment analysis.
""")
write(f'{project_root}/requirements.txt', """
    pandas
    numpy
    yfinance
    scikit-learn
    backtrader
    ta
    transformers
    torch
    joblib
    optuna
    google-cloud-storage
    google-cloud-logging
""")
write(f'{project_root}/.env_template', """
    HUGGINGFACE_TOKEN=
    TIINGO_API_KEY=
    POLYGON_API_KEY=
    NEWSAPI_KEY=
    GOOGLE_APPLICATION_CREDENTIALS=
""")
write(f'{project_root}/config.yaml', """
    project_root: "/content/drive/MyDrive/quant_backtest_ml"
    data:
      raw: "data/raw"
      processed: "data/processed"
      altdata: "data/altdata"
    models:
      hf:
        finbert: "models/hf/finbert"
        tone:   "models/hf/finbert-tone"
      trained: "models/trained"
    tickers: ["AAPL", "SPY", "QQQ"]
    backtest:
      start_date: "2019-01-01"
      end_date:   "2024-12-31"
      interval:   "1h"
""")
write(f'{project_root}/Dockerfile', """
    FROM python:3.10-slim
    COPY requirements.txt .
    RUN pip install --no-cache-dir -r requirements.txt
    COPY . /app
    WORKDIR /app
    CMD ["python", "pipelines/full_pipeline.py"]
""")
write(f'{project_root}/docker-compose.yml', """
    version: '3.8'
    services:
      app:
        build: .
        volumes:
          - .:/app
        command: python pipelines/full_pipeline.py
""")

# 5. modules/
modules = {
    'data_ingestion.py': 'def download_data():\n    pass\n',
    'feature_engineering.py': 'def create_volume_bars():\n    pass\n\ndef add_indicators():\n    pass\n',
    'model_training.py': 'def prepare_features():\n    pass\n\ndef train_classifier():\n    pass\n',
    'model_evaluation.py': 'def evaluate_model():\n    pass\n',
    'sentiment_analysis.py': 'def score_sentiment(text):\n    pass\n',
    'backtesting_workflow.py': 'def run_backtest():\n    pass\n',
}
for fn, content in modules.items():
    write(f'{project_root}/modules/{fn}', content)

# 6. pipelines/
pipelines = {
    'train_pipeline.py': 'from modules.model_training import train_classifier\n\ndef main():\n    train_classifier()\n\nif __name__=="__main__":\n    main()\n',
    'backtest_pipeline.py': 'from modules.backtesting_workflow import run_backtest\n\ndef main():\n    run_backtest()\n\nif __name__=="__main__":\n    main()\n',
    'full_pipeline.py': 'import subprocess\n\ndef main():\n    subprocess.run(["python","pipelines/train_pipeline.py"])\n    subprocess.run(["python","pipelines/backtest_pipeline.py"])\n\nif __name__=="__main__":\n    main()\n',
}
for fn, content in pipelines.items():
    write(f'{project_root}/pipelines/{fn}', content)

# 7. Zip everything
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
    for root, dirs, files in os.walk(project_root):
        for file in files:
            full = os.path.join(root, file)
            rel  = os.path.relpath(full, '/content/drive/MyDrive')
            zf.write(full, rel)

print(f"✅ Scaffold recreated and zipped to: {zip_path}")


Mounted at /content/drive
✅ Scaffold recreated and zipped to: /content/drive/MyDrive/quant_backtest_ml_scaffold.zip
