In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# üìä Exploration des Donn√©es Crypto\n",
    "## Analyse des Features pour Machine Learning\n",
    "\n",
    "**Date:** 12 novembre 2025  \n",
    "**Auteur:** Karim Assi  \n",
    "**Objectif:** Analyser les 54 features cr√©√©es et pr√©parer pour le ML"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Imports\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import glob\n",
    "import os\n",
    "from datetime import datetime\n",
    "\n",
    "# Configuration\n",
    "plt.style.use('seaborn-v0_8-darkgrid')\n",
    "sns.set_palette('husl')\n",
    "pd.set_option('display.max_columns', None)\n",
    "pd.set_option('display.float_format', '{:.2f}'.format)\n",
    "\n",
    "print(\"‚úÖ Imports r√©ussis\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Chargement des Donn√©es"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Trouver tous les fichiers de features\n",
    "feature_files = glob.glob('../data/processed/*_features_*.csv')\n",
    "\n",
    "print(f\"üìÅ Fichiers trouv√©s: {len(feature_files)}\")\n",
    "for f in feature_files:\n",
    "    print(f\"   - {os.path.basename(f)}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Charger Bitcoin (notre crypto principale)\n",
    "btc_file = [f for f in feature_files if 'bitcoin' in f.lower()][0]\n",
    "df_btc = pd.read_csv(btc_file)\n",
    "\n",
    "print(f\"üìä Bitcoin Features\")\n",
    "print(f\"   Shape: {df_btc.shape}\")\n",
    "print(f\"   Colonnes: {df_btc.shape[1]}\")\n",
    "print(f\"\\nüîç Aper√ßu:\")\n",
    "df_btc.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Charger toutes les cryptos\n",
    "cryptos = {}\n",
    "\n",
    "for file in feature_files:\n",
    "    basename = os.path.basename(file)\n",
    "    crypto = basename.split('_features_')[0].upper()\n",
    "    cryptos[crypto] = pd.read_csv(file)\n",
    "    print(f\"‚úÖ {crypto}: {cryptos[crypto].shape}\")\n",
    "\n",
    "print(f\"\\nüìä Total: {len(cryptos)} cryptos charg√©es\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Structure des Donn√©es"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Informations g√©n√©rales\n",
    "print(\"üìã Informations sur les donn√©es Bitcoin:\")\n",
    "print(df_btc.info())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Cat√©goriser les colonnes\n",
    "categories = {\n",
    "    'Prix & Volume': [c for c in df_btc.columns if any(x in c.lower() for x in ['price', 'volume', 'market'])],\n",
    "    'Indicateurs Tech': [c for c in df_btc.columns if any(x in c.lower() for x in ['sma', 'ema', 'rsi', 'macd', 'bb'])],\n",
    "    'Temporel': [c for c in df_btc.columns if any(x in c.lower() for x in ['year', 'month', 'day', 'hour', 'weekend', 'quarter'])],\n",
    "    'Sentiment': [c for c in df_btc.columns if any(x in c.lower() for x in ['fear', 'greed', 'fg'])],\n",
    "    'Lag': [c for c in df_btc.columns if 'lag' in c.lower()],\n",
    "}\n",
    "\n",
    "# Colonnes non cat√©goris√©es\n",
    "all_categorized = sum(categories.values(), [])\n",
    "categories['Autres'] = [c for c in df_btc.columns if c not in all_categorized]\n",
    "\n",
    "print(\"üìä R√©partition des Features:\\n\")\n",
    "for cat, cols in categories.items():\n",
    "    print(f\"{cat}: {len(cols)} features\")\n",
    "    if len(cols) <= 10:\n",
    "        for col in cols:\n",
    "            print(f\"   ‚Ä¢ {col}\")\n",
    "    else:\n",
    "        for col in cols[:5]:\n",
    "            print(f\"   ‚Ä¢ {col}\")\n",
    "        print(f\"   ... et {len(cols)-5} autres\")\n",
    "    print()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Analyse des Indicateurs Techniques"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# RSI Analysis\n",
    "print(\"üìà RSI (Relative Strength Index)\\n\")\n",
    "print(\"=\"*60)\n",
    "\n",
    "for crypto, df in cryptos.items():\n",
    "    if 'rsi_14' in df.columns:\n",
    "        rsi = df['rsi_14'].iloc[0]\n",
    "        \n",
    "        if rsi < 30:\n",
    "            signal = \"üü¢ SUR-VENDU (opportunit√© achat)\"\n",
    "        elif rsi > 70:\n",
    "            signal = \"üî¥ SUR-ACHET√â (risque correction)\"\n",
    "        else:\n",
    "            signal = \"üü° NEUTRE\"\n",
    "        \n",
    "        print(f\"{crypto:12} | RSI: {rsi:5.1f} | {signal}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# MACD Analysis\n",
    "print(\"\\nüìä MACD (Moving Average Convergence Divergence)\\n\")\n",
    "print(\"=\"*60)\n",
    "\n",
    "for crypto, df in cryptos.items():\n",
    "    if all(c in df.columns for c in ['macd', 'macd_signal', 'macd_histogram']):\n",
    "        macd = df['macd'].iloc[0]\n",
    "        signal = df['macd_signal'].iloc[0]\n",
    "        hist = df['macd_histogram'].iloc[0]\n",
    "        \n",
    "        trend = \"üü¢ HAUSSIER\" if hist > 0 else \"üî¥ BAISSIER\"\n",
    "        \n",
    "        print(f\"{crypto:12} | MACD: {macd:7.2f} | Signal: {signal:7.2f} | Hist: {hist:7.2f} | {trend}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# SMA Crossover Analysis\n",
    "print(\"\\nüìâ SMA Crossover (Golden/Death Cross)\\n\")\n",
    "print(\"=\"*60)\n",
    "\n",
    "for crypto, df in cryptos.items():\n",
    "    if all(c in df.columns for c in ['price_usd', 'sma_7', 'sma_30', 'sma_crossover']):\n",
    "        price = df['price_usd'].iloc[0]\n",
    "        sma7 = df['sma_7'].iloc[0]\n",
    "        sma30 = df['sma_30'].iloc[0]\n",
    "        crossover = df['sma_crossover'].iloc[0]\n",
    "        \n",
    "        if crossover == 1:\n",
    "            signal = \"‚úÖ GOLDEN CROSS (SMA7 > SMA30) - Tendance haussi√®re\"\n",
    "        else:\n",
    "            signal = \"‚ùå DEATH CROSS (SMA7 < SMA30) - Tendance baissi√®re\"\n",
    "        \n",
    "        print(f\"{crypto:12} | Prix: ${price:,.2f} | SMA7: ${sma7:,.2f} | SMA30: ${sma30:,.2f}\")\n",
    "        print(f\"             {signal}\\n\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Analyse du Sentiment (Fear & Greed)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Fear & Greed Analysis\n",
    "print(\"üò± Fear & Greed Index Analysis\\n\")\n",
    "print(\"=\"*60)\n",
    "\n",
    "for crypto, df in cryptos.items():\n",
    "    if 'fear_greed_index' in df.columns:\n",
    "        fg = df['fear_greed_index'].iloc[0]\n",
    "        \n",
    "        if fg <= 25:\n",
    "            classification = \"üò± EXTREME FEAR\"\n",
    "            action = \"üü¢ Opportunit√© d'ACHAT (contrarian)\"\n",
    "        elif fg <= 45:\n",
    "            classification = \"üò∞ FEAR\"\n",
    "            action = \"üü° Prudent pour ACHAT\"\n",
    "        elif fg <= 55:\n",
    "            classification = \"üòê NEUTRAL\"\n",
    "            action = \"‚ö™ Attendre signal clair\"\n",
    "        elif fg <= 75:\n",
    "            classification = \"üòÉ GREED\"\n",
    "            action = \"üü° Prudent pour VENTE\"\n",
    "        else:\n",
    "            classification = \"ü§ë EXTREME GREED\"\n",
    "            action = \"üî¥ Risque de CORRECTION\"\n",
    "        \n",
    "        print(f\"{crypto:12} | F&G: {fg:3.0f}/100 | {classification} | {action}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. Signaux de Trading Combin√©s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Calculer un score de signal pour chaque crypto\n",
    "print(\"üéØ SIGNAUX DE TRADING COMBIN√âS\\n\")\n",
    "print(\"=\"*80)\n",
    "\n",
    "signals = {}\n",
    "\n",
    "for crypto, df in cryptos.items():\n",
    "    score = 0\n",
    "    reasons = []\n",
    "    \n",
    "    # RSI\n",
    "    if 'rsi_14' in df.columns:\n",
    "        rsi = df['rsi_14'].iloc[0]\n",
    "        if rsi < 30:\n",
    "            score += 2\n",
    "            reasons.append(\"RSI sur-vendu (+2)\")\n",
    "        elif rsi > 70:\n",
    "            score -= 2\n",
    "            reasons.append(\"RSI sur-achet√© (-2)\")\n",
    "    \n",
    "    # MACD\n",
    "    if 'macd_histogram' in df.columns:\n",
    "        hist = df['macd_histogram'].iloc[0]\n",
    "        if hist > 0:\n",
    "            score += 1\n",
    "            reasons.append(\"MACD haussier (+1)\")\n",
    "        else:\n",
    "            score -= 1\n",
    "            reasons.append(\"MACD baissier (-1)\")\n",
    "    \n",
    "    # SMA Crossover\n",
    "    if 'sma_crossover' in df.columns:\n",
    "        if df['sma_crossover'].iloc[0] == 1:\n",
    "            score += 2\n",
    "            reasons.append(\"Golden Cross (+2)\")\n",
    "        else:\n",
    "            score -= 2\n",
    "            reasons.append(\"Death Cross (-2)\")\n",
    "    \n",
    "    # Fear & Greed (contrarian)\n",
    "    if 'fear_greed_index' in df.columns:\n",
    "        fg = df['fear_greed_index'].iloc[0]\n",
    "        if fg <= 25:\n",
    "            score += 3\n",
    "            reasons.append(\"Extreme Fear (+3)\")\n",
    "        elif fg > 75:\n",
    "            score -= 3\n",
    "            reasons.append(\"Extreme Greed (-3)\")\n",
    "    \n",
    "    # Volume Spike\n",
    "    if 'volume_spike' in df.columns:\n",
    "        if df['volume_spike'].iloc[0] == 1:\n",
    "            score += 1\n",
    "            reasons.append(\"Volume spike (+1)\")\n",
    "    \n",
    "    signals[crypto] = {'score': score, 'reasons': reasons}\n",
    "\n",
    "# Trier par score\n",
    "sorted_signals = sorted(signals.items(), key=lambda x: x[1]['score'], reverse=True)\n",
    "\n",
    "for crypto, data in sorted_signals:\n",
    "    score = data['score']\n",
    "    reasons = data['reasons']\n",
    "    \n",
    "    # D√©terminer signal global\n",
    "    if score >= 5:\n",
    "        signal = \"üü¢üü¢üü¢ FORT SIGNAL D'ACHAT\"\n",
    "    elif score >= 3:\n",
    "        signal = \"üü¢üü¢ SIGNAL D'ACHAT\"\n",
    "    elif score >= 1:\n",
    "        signal = \"üü¢ L√©ger signal d'achat\"\n",
    "    elif score == 0:\n",
    "        signal = \"‚ö™ NEUTRE\"\n",
    "    elif score >= -2:\n",
    "        signal = \"üî¥ L√©ger signal de vente\"\n",
    "    elif score >= -4:\n",
    "        signal = \"üî¥üî¥ SIGNAL DE VENTE\"\n",
    "    else:\n",
    "        signal = \"üî¥üî¥üî¥ FORT SIGNAL DE VENTE\"\n",
    "    \n",
    "    print(f\"\\n{crypto}\")\n",
    "    print(f\"   Score: {score:+d} | {signal}\")\n",
    "    print(f\"   Raisons:\")\n",
    "    for reason in reasons:\n",
    "        print(f\"      ‚Ä¢ {reason}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6. Statistiques Descriptives"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Statistiques sur les features num√©riques (quand on aura plus de donn√©es)\n",
    "print(\"üìä Statistiques Descriptives (donn√©es actuelles limit√©es)\\n\")\n",
    "\n",
    "# Pour l'instant, on a juste 1 ligne par crypto\n",
    "# Mais pr√©parons le code pour quand on aura des donn√©es historiques\n",
    "\n",
    "numeric_cols = df_btc.select_dtypes(include=[np.number]).columns\n",
    "print(f\"Features num√©riques: {len(numeric_cols)}\")\n",
    "print(f\"\\nAper√ßu des valeurs actuelles (Bitcoin):\")\n",
    "\n",
    "# S√©lectionner features importantes\n",
    "important_features = ['price_usd', 'rsi_14', 'macd', 'sma_7', 'sma_30', \n",
    "                     'fear_greed_index', 'volume_24h_usd']\n",
    "\n",
    "for feat in important_features:\n",
    "    if feat in df_btc.columns:\n",
    "        value = df_btc[feat].iloc[0]\n",
    "        print(f\"{feat:20} : {value:,.2f}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7. Comparaison entre Cryptos"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Cr√©er un DataFrame de comparaison\n",
    "comparison_data = []\n",
    "\n",
    "for crypto, df in cryptos.items():\n",
    "    row = {\n",
    "        'Crypto': crypto,\n",
    "        'Prix': df['price_usd'].iloc[0] if 'price_usd' in df.columns else np.nan,\n",
    "        'RSI': df['rsi_14'].iloc[0] if 'rsi_14' in df.columns else np.nan,\n",
    "        'MACD': df['macd_histogram'].iloc[0] if 'macd_histogram' in df.columns else np.nan,\n",
    "        'SMA_Cross': 'Golden' if df.get('sma_crossover', [0]).iloc[0] == 1 else 'Death',\n",
    "        'F&G': df['fear_greed_index'].iloc[0] if 'fear_greed_index' in df.columns else np.nan,\n",
    "        'Volume_Spike': 'Oui' if df.get('volume_spike', [0]).iloc[0] == 1 else 'Non',\n",
    "        'Signal_Score': signals[crypto]['score']\n",
    "    }\n",
    "    comparison_data.append(row)\n",
    "\n",
    "df_comparison = pd.DataFrame(comparison_data)\n",
    "df_comparison = df_comparison.sort_values('Signal_Score', ascending=False)\n",
    "\n",
    "print(\"\\nüìä TABLEAU COMPARATIF\\n\")\n",
    "print(df_comparison.to_string(index=False))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 8. Pr√©paration pour le Machine Learning"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Features s√©lectionn√©es pour le ML (quand on aura + de donn√©es)\n",
    "print(\"ü§ñ FEATURES POUR LE MACHINE LEARNING\\n\")\n",
    "print(\"=\"*60)\n",
    "\n",
    "ml_features = {\n",
    "    'Prix & Volume': ['price_usd', 'volume_24h_usd', 'market_cap_usd'],\n",
    "    'Indicateurs': ['rsi_14', 'macd', 'macd_histogram', 'sma_7', 'sma_30', \n",
    "                    'ema_12', 'ema_26', 'bb_upper', 'bb_lower', 'bb_width'],\n",
    "    'Ratios': ['price_to_sma7_ratio', 'price_to_sma30_ratio', 'volume_ratio'],\n",
    "    'Sentiment': ['fear_greed_index', 'fg_ma_7', 'fg_change_7d'],\n",
    "    'Temporel': ['day_of_week', 'hour', 'is_weekend', 'month'],\n",
    "    'Lag': ['price_usd_lag_1d', 'price_usd_lag_7d', 'volume_24h_usd_lag_1d'],\n",
    "    'Binaires': ['sma_crossover', 'volume_spike', 'is_extreme_fear', 'is_extreme_greed']\n",
    "}\n",
    "\n",
    "total_features = sum(len(v) for v in ml_features.values())\n",
    "print(f\"Total features pour ML: {total_features}\\n\")\n",
    "\n",
    "for category, features in ml_features.items():\n",
    "    print(f\"{category} ({len(features)}):\")\n",
    "    for feat in features:\n",
    "        available = \"‚úÖ\" if feat in df_btc.columns else \"‚ùå\"\n",
    "        print(f\"   {available} {feat}\")\n",
    "    print()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 9. Insights et Conclusions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(\"üí° INSIGHTS CL√âS\\n\")\n",
    "print(\"=\"*80)\n",
    "\n",
    "insights = [\n",
    "    \"1. DONN√âES ACTUELLES:\",\n",
    "    \"   ‚Ä¢ 5 cryptos avec 54 features chacune\",\n",
    "    \"   ‚Ä¢ Seulement 1 point de donn√©es (snapshot actuel)\",\n",
    "    f\"   ‚Ä¢ Fear & Greed actuel: {df_btc['fear_greed_index'].iloc[0]:.0f} (Extreme Fear)\",\n",
    "    \"\",\n",
    "    \"2. SIGNAUX ACTUELS:\",\n",
    "]\n",
    "\n",
    "# Top 3 opportunit√©s\n",
    "top3 = sorted_signals[:3]\n",
    "insights.append(\"   Top 3 opportunit√©s:\")\n",
    "for i, (crypto, data) in enumerate(top3, 1):\n",
    "    insights.append(f\"      {i}. {crypto} (score: {data['score']:+d})\")\n",
    "\n",
    "insights.extend([\n",
    "    \"\",\n",
    "    \"3. PROCHAINES √âTAPES:\",\n",
    "    \"   ‚úÖ Collecter donn√©es historiques Binance (ce soir)\",\n",
    "    \"   ‚úÖ Avoir 365 jours de donn√©es OHLCV\",\n",
    "    \"   ‚úÖ Re-g√©n√©rer features avec historique complet\",\n",
    "    \"   ‚úÖ Analyser corr√©lations entre features\",\n",
    "    \"   ‚úÖ Entra√Æner premier mod√®le ML\",\n",
    "    \"\",\n",
    "    \"4. MOD√àLE ML PR√âVU:\",\n",
    "    \"   ‚Ä¢ Target (y): Prix J+1 ou J+7\",\n",
    "    f\"   ‚Ä¢ Features (X): ~35-40 features s√©lectionn√©es\",\n",
    "    \"   ‚Ä¢ Mod√®les: R√©gression Lin√©aire, Random Forest, LSTM\",\n",
    "    \"   ‚Ä¢ Validation: Train/Test split 80/20\",\n",
    "    \"\",\n",
    "    \"5. LIMITATIONS ACTUELLES:\",\n",
    "    \"   ‚ö†Ô∏è Seulement 1 point de donn√©e par crypto\",\n",
    "    \"   ‚ö†Ô∏è Impossible de calculer corr√©lations\",\n",
    "    \"   ‚ö†Ô∏è Impossible d'entra√Æner mod√®le ML\",\n",
    "    \"   ‚úÖ R√âSOLU CE SOIR avec donn√©es Binance historiques\",\n",
    "])\n",
    "\n",
    "for insight in insights:\n",
    "    print(insight)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 10. Export pour Rapport"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Sauvegarder le tableau comparatif\n",
    "output_dir = '../docs'\n",
    "os.makedirs(output_dir, exist_ok=True)\n",
    "\n",
    "output_file = f\"{output_dir}/crypto_comparison_{datetime.now().strftime('%Y%m%d')}.csv\"\n",
    "df_comparison.to_csv(output_file, index=False)\n",
    "\n",
    "print(f\"‚úÖ Tableau comparatif sauvegard√©: {output_file}\")\n",
    "print(\"\\nüìä Ce fichier peut √™tre utilis√© dans votre rapport M2\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---\n",
    "## ‚úÖ Conclusion\n",
    "\n",
    "**√âtat actuel:**\n",
    "- ‚úÖ 54 features cr√©√©es par crypto\n",
    "- ‚úÖ Signaux de trading calcul√©s\n",
    "- ‚úÖ Pipeline de feature engineering op√©rationnel\n",
    "\n",
    "**Prochaine √©tape:**\n",
    "- üîÑ Collecte Binance OHLCV (365 jours)\n",
    "- üîÑ Re-analyse avec donn√©es compl√®tes\n",
    "- üîÑ Corr√©lations et visualisations\n",
    "- üîÑ Premier mod√®le ML\n",
    "\n",
    "**Date:** 12 novembre 2025  \n",
    "**Auteur:** Karim Assi (@karimassi02)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.10.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}