In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Week 2: Combined Recommendations\n",
    "## Association Rules + Sequential Patterns\n",
    "\n",
    "This notebook explores the combined recommendation engine."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append('../')\n",
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import pickle\n",
    "\n",
    "from ml_engine.association.fp_growth import AssociationEngine\n",
    "from ml_engine.sequence.markov import MarkovSequenceModel\n",
    "from ml_engine.decision_engine.recommender import RecommendationEngine\n",
    "from ml_engine.utils.metrics import RecommendationMetrics\n",
    "\n",
    "sns.set_style('whitegrid')\n",
    "plt.rcParams['figure.figsize'] = (14, 6)\n",
    "\n",
    "print(\"âœ… Imports successful\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Load Models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialize recommendation engine\n",
    "engine = RecommendationEngine(\n",
    "    association_weight=0.6,\n",
    "    sequence_weight=0.4,\n",
    "    min_score_threshold=0.15\n",
    ")\n",
    "\n",
    "# Load models\n",
    "engine.load_models()\n",
    "\n",
    "# Get model info\n",
    "info = engine.get_model_info()\n",
    "print(\"\\nðŸ“Š Model Info:\")\n",
    "for key, value in info.items():\n",
    "    print(f\"   {key}: {value}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Compare Individual vs Combined Recommendations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Test basket\n",
    "test_basket = ['Pantry', 'Produce']\n",
    "\n",
    "print(f\"ðŸ›’ Test Basket: {test_basket}\\n\")\n",
    "print(\"=\"*70)\n",
    "\n",
    "# Association rules only\n",
    "assoc_recs = engine.association_engine.get_recommendations(test_basket, max_items=5)\n",
    "print(\"\\nðŸ“Š ASSOCIATION RULES ONLY:\")\n",
    "for item, metrics in assoc_recs.items():\n",
    "    print(f\"   {item}: {metrics['confidence']:.2%} confidence\")\n",
    "\n",
    "# Sequential patterns only\n",
    "seq_recs = engine.sequence_model.predict_next(test_basket, max_predictions=5)\n",
    "print(\"\\nðŸ“Š SEQUENTIAL PATTERNS ONLY:\")\n",
    "for item, prob in seq_recs.items():\n",
    "    print(f\"   {item}: {prob:.2%} probability\")\n",
    "\n",
    "# Combined recommendations\n",
    "combined = engine.get_recommendations(test_basket, max_recommendations=5)\n",
    "print(\"\\nðŸŽ¯ COMBINED RECOMMENDATIONS:\")\n",
    "if combined:\n",
    "    for rec in combined['recommendations']:\n",
    "        print(f\"   {rec['item']}: {rec['combined_score']:.3f} (assoc: {rec['association_score']:.3f}, seq: {rec['sequence_score']:.3f})\")\n",
    "else:\n",
    "    print(\"   No recommendations\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Test Multiple Baskets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Load test baskets\n",
    "with open('../data/processed_baskets.pkl', 'rb') as f:\n",
    "    data = pickle.load(f)\n",
    "\n",
    "baskets = data['baskets']\n",
    "\n",
    "# Test on first 20 baskets\n",
    "test_baskets = baskets[:20]\n",
    "\n",
    "results = []\n",
    "for basket in test_baskets:\n",
    "    result = engine.get_recommendations(basket, max_recommendations=2)\n",
    "    results.append(result)\n",
    "\n",
    "# Count successful recommendations\n",
    "successful = sum(1 for r in results if r is not None)\n",
    "print(f\"\\nðŸ“Š Recommendation Coverage: {successful}/{len(test_baskets)} ({successful/len(test_baskets)*100:.1f}%)\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Analyze Recommendation Sources"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Count recommendation sources\n",
    "source_counts = {\n",
    "    'association_only': 0,\n",
    "    'sequential_only': 0,\n",
    "    'both': 0\n",
    "}\n",
    "\n",
    "for result in results:\n",
    "    if result:\n",
    "        for rec in result['recommendations']:\n",
    "            sources = rec['sources']\n",
    "            if len(sources) == 2:\n",
    "                source_counts['both'] += 1\n",
    "            elif 'association' in sources:\n",
    "                source_counts['association_only'] += 1\n",
    "            else:\n",
    "                source_counts['sequential_only'] += 1\n",
    "\n",
    "# Visualize\n",
    "fig, ax = plt.subplots(figsize=(10, 6))\n",
    "colors = ['steelblue', 'coral', 'mediumseagreen']\n",
    "ax.bar(source_counts.keys(), source_counts.values(), color=colors)\n",
    "ax.set_ylabel('Count')\n",
    "ax.set_title('Recommendation Sources Distribution')\n",
    "ax.grid(True, axis='y', alpha=0.3)\n",
    "\n",
    "for i, (k, v) in enumerate(source_counts.items()):\n",
    "    ax.text(i, v + 0.5, str(v), ha='center', va='bottom', fontweight='bold')\n",
    "\n",
    "plt.tight_layout()\n",
    "plt.show()\n",
    "\n",
    "print(\"\\nðŸ“Š Source Distribution:\")\n",
    "for source, count in source_counts.items():\n",
    "    print(f\"   {source}: {count}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 5. Simulate Acceptance and Calculate Metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Simulate recommendations with acceptance\n",
    "import random\n",
    "\n",
    "metrics = RecommendationMetrics()\n",
    "\n",
    "# Simulate on 100 baskets\n",
    "for basket in baskets[:100]:\n",
    "    result = engine.get_recommendations(basket)\n",
    "    \n",
    "    if result and result['recommendations']:\n",
    "        recommendations = [rec['item'] for rec in result['recommendations']]\n",
    "        \n",
    "        # Simulate acceptance (20% acceptance rate)\n",
    "        accepted = []\n",
    "        for rec in recommendations:\n",
    "            if random.random() < 0.20:  # 20% acceptance\n",
    "                accepted.append(rec)\n",
    "        \n",
    "        # Simulate basket values\n",
    "        basket_value_before = len(basket) * 45  # Avg â‚¹45 per item\n",
    "        basket_value_after = basket_value_before + len(accepted) * 50\n",
    "        \n",
    "        metrics.log_recommendation(\n",
    "            basket=basket,\n",
    "            recommendations=recommendations,\n",
    "            accepted=accepted,\n",
    "            basket_value_before=basket_value_before,\n",
    "            basket_value_after=basket_value_after\n",
    "        )\n",
    "    else:\n",
    "        # No recommendations\n",
    "        basket_value = len(basket) * 45\n",
    "        metrics.log_recommendation(\n",
    "            basket=basket,\n",
    "            recommendations=[],\n",
    "            accepted=[],\n",
    "            basket_value_before=basket_value,\n",
    "            basket_value_after=basket_value\n",
    "        )\n",
    "\n",
    "# Print metrics\n",
    "metrics.print_metrics()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 6. Tune Weights"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Test different weight combinations\n",
    "weight_combinations = [\n",
    "    (0.7, 0.3),\n",
    "    (0.6, 0.4),\n",
    "    (0.5, 0.5),\n",
    "    (0.4, 0.6),\n",
    "    (0.3, 0.7)\n",
    "]\n",
    "\n",
    "results_by_weights = []\n",
    "\n",
    "for assoc_w, seq_w in weight_combinations:\n",
    "    test_engine = RecommendationEngine(\n",
    "        association_weight=assoc_w,\n",
    "        sequence_weight=seq_w,\n",
    "        min_score_threshold=0.15\n",
    "    )\n",
    "    test_engine.load_models()\n",
    "    \n",
    "    # Count recommendations\n",
    "    rec_count = 0\n",
    "    for basket in baskets[:50]:\n",
    "        result = test_engine.get_recommendations(basket)\n",
    "        if result:\n",
    "            rec_count += len(result['recommendations'])\n",
    "    \n",
    "    results_by_weights.append({\n",
    "        'weights': f\"{assoc_w}/{seq_w}\",\n",
    "        'count': rec_count\n",
    "    })\n",
    "\n",
    "# Plot\n",
    "df_weights = pd.DataFrame(results_by_weights)\n",
    "\n",
    "fig, ax = plt.subplots(figsize=(10, 6))\n",
    "ax.plot(df_weights['weights'], df_weights['count'], marker='o', linewidth=2, markersize=8)\n",
    "ax.set_xlabel('Association / Sequential Weights')\n",
    "ax.set_ylabel('Total Recommendations')\n",
    "ax.set_title('Impact of Weight Distribution on Recommendation Count')\n",
    "ax.grid(True, alpha=0.3)\n",
    "plt.tight_layout()\n",
    "plt.show()\n",
    "\n",
    "print(\"\\nðŸ“Š Recommendations by Weight:\")\n",
    "print(df_weights)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 7. Key Insights\n",
    "\n",
    "### Observations:\n",
    "1. **Coverage:** X% of baskets receive recommendations\n",
    "2. **Source Distribution:** Most recommendations come from...\n",
    "3. **Weight Sensitivity:** Optimal weights appear to be...\n",
    "\n",
    "### Next Steps:\n",
    "- Week 3: Generate uplift training data\n",
    "- Week 4: Build causal uplift model\n",
    "- Week 5: Integrate uplift into decision engine"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "name": "python",
   "version": "3.10.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}