rammmix · rammmix · Oct 23, 2025
diff --git a/Matplotlib/Pokemon-Analysis-Optimized.ipynb b/Matplotlib/Pokemon-Analysis-Optimized.ipynb
@@ -0,0 +1,222 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Optimized Pokemon Data Analysis\n",
+    "\n",
+    "This notebook demonstrates optimized data analysis techniques for Pokemon data.\n",
+    "Key optimizations include:\n",
+    "- Memory-efficient data loading with proper dtypes\n",
+    "- Optimized plotting with reduced DPI and efficient rendering\n",
+    "- Batch processing for multiple visualizations\n",
+    "- Reduced memory footprint for large datasets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Import optimized utilities\n",
+    "import sys\n",
+    "sys.path.append('..')\n",
+    "from optimized_utils import load_pokemon_data, optimize_dataframe_memory, batch_plot_optimization\n",
+    "\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "import seaborn as sns\n",
+    "\n",
+    "# Set optimized matplotlib parameters\n",
+    "plt.rcParams['figure.dpi'] = 100\n",
+    "plt.rcParams['savefig.dpi'] = 100\n",
+    "plt.rcParams['figure.max_open_warning'] = 0\n",
+    "\n",
+    "# Suppress warnings\n",
+    "import warnings\n",
+    "warnings.filterwarnings('ignore')\n",
+    "\n",
+    "%matplotlib inline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Load Pokemon data with optimizations\n",
+    "print(\"Loading Pokemon data with optimizations...\")\n",
+    "df = load_pokemon_data('./data/pokemon.csv')\n",
+    "\n",
+    "# Apply additional memory optimizations\n",
+    "df = optimize_dataframe_memory(df)\n",
+    "\n",
+    "print(f\"\\nData shape: {df.shape}\")\n",
+    "print(f\"Memory usage: {df.memory_usage(deep=True).sum() / 1024 / 1024:.2f} MB\")\n",
+    "print(f\"\\nData types:\")\n",
+    "print(df.dtypes.value_counts())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Optimized data exploration\n",
+    "print(\"Data Overview:\")\n",
+    "print(\"=\" * 50)\n",
+    "print(f\"Total Pokemon: {len(df):,}\")\n",
+    "print(f\"Generations: {df['generation_id'].nunique()}\")\n",
+    "print(f\"Primary Types: {df['type_1'].nunique()}\")\n",
+    "print(f\"Secondary Types: {df['type_2'].nunique()}\")\n",
+    "print(f\"Legendary Pokemon: {df['is_legendary'].sum()}\")\n",
+    "\n",
+    "# Missing data analysis\n",
+    "print(f\"\\nMissing Data Analysis:\")\n",
+    "missing_data = df.isna().sum()\n",
+    "missing_pct = (missing_data / len(df) * 100).round(1)\n",
+    "missing_df = pd.DataFrame({\n",
+    "    'Missing Count': missing_data,\n",
+    "    'Missing %': missing_pct\n",
+    "}).sort_values('Missing Count', ascending=False)\n",
+    "print(missing_df[missing_df['Missing Count'] > 0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Create optimized batch visualizations\n",
+    "def plot_generation_distribution(ax):\n",
+    "    \"\"\"Plot generation distribution\"\"\"\n",
+    "    generation_counts = df['generation_id'].value_counts().sort_index()\n",
+    "    ax.bar(generation_counts.index, generation_counts.values, color='skyblue', alpha=0.7)\n",
+    "    ax.set_title('Pokemon by Generation')\n",
+    "    ax.set_xlabel('Generation')\n",
+    "    ax.set_ylabel('Count')\n",
+    "    ax.grid(True, alpha=0.3)\n",
+    "\n",
+    "def plot_type_distribution(ax):\n",
+    "    \"\"\"Plot primary type distribution\"\"\"\n",
+    "    type_counts = df['type_1'].value_counts()\n",
+    "    ax.barh(range(len(type_counts)), type_counts.values, color='lightcoral', alpha=0.7)\n",
+    "    ax.set_yticks(range(len(type_counts)))\n",
+    "    ax.set_yticklabels(type_counts.index)\n",
+    "    ax.set_title('Pokemon by Primary Type')\n",
+    "    ax.set_xlabel('Count')\n",
+    "    ax.grid(True, alpha=0.3)\n",
+    "\n",
+    "def plot_stats_distribution(ax):\n",
+    "    \"\"\"Plot total stats distribution\"\"\"\n",
+    "    ax.hist(df['total_points'], bins=30, color='lightgreen', alpha=0.7, edgecolor='black')\n",
+    "    ax.set_title('Total Stats Distribution')\n",
+    "    ax.set_xlabel('Total Points')\n",
+    "    ax.set_ylabel('Frequency')\n",
+    "    ax.grid(True, alpha=0.3)\n",
+    "\n",
+    "def plot_legendary_comparison(ax):\n",
+    "    \"\"\"Plot legendary vs non-legendary stats\"\"\"\n",
+    "    legendary_stats = df[df['is_legendary'] == True]['total_points']\n",
+    "    normal_stats = df[df['is_legendary'] == False]['total_points']\n",
+    "    \n",
+    "    ax.hist([normal_stats, legendary_stats], bins=20, alpha=0.7, \n",
+    "            label=['Normal', 'Legendary'], color=['lightblue', 'gold'])\n",
+    "    ax.set_title('Stats: Legendary vs Normal Pokemon')\n",
+    "    ax.set_xlabel('Total Points')\n",
+    "    ax.set_ylabel('Frequency')\n",
+    "    ax.legend()\n",
+    "    ax.grid(True, alpha=0.3)\n",
+    "\n",
+    "# Create batch plot\n",
+    "plot_functions = [plot_generation_distribution, plot_type_distribution, \n",
+    "                  plot_stats_distribution, plot_legendary_comparison]\n",
+    "batch_plot_optimization(plot_functions, figsize=(16, 12))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Optimized correlation analysis\n",
+    "print(\"Statistical Analysis:\")\n",
+    "print(\"=\" * 50)\n",
+    "\n",
+    "# Select numeric columns for correlation\n",
+    "numeric_cols = df.select_dtypes(include=[np.number]).columns\n",
+    "correlation_matrix = df[numeric_cols].corr()\n",
+    "\n",
+    "# Create correlation heatmap\n",
+    "plt.figure(figsize=(12, 10))\n",
+    "sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', center=0, \n",
+    "            square=True, fmt='.2f', cbar_kws={'shrink': 0.8})\n",
+    "plt.title('Pokemon Stats Correlation Matrix', fontsize=14, fontweight='bold')\n",
+    "plt.tight_layout()\n",
+    "plt.show()\n",
+    "\n",
+    "# Print top correlations\n",
+    "print(\"\\nTop Correlations:\")\n",
+    "corr_pairs = correlation_matrix.unstack().sort_values(ascending=False)\n",
+    "corr_pairs = corr_pairs[corr_pairs < 1.0]  # Remove self-correlations\n",
+    "print(corr_pairs.head(10))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Performance metrics and optimization results\n",
+    "print(\"Performance Analysis:\")\n",
+    "print(\"=\" * 50)\n",
+    "print(f\"Dataset size: {df.shape[0]:,} rows × {df.shape[1]} columns\")\n",
+    "print(f\"Memory usage: {df.memory_usage(deep=True).sum() / 1024 / 1024:.2f} MB\")\n",
+    "print(f\"Average memory per row: {df.memory_usage(deep=True).sum() / df.shape[0]:.2f} bytes\")\n",
+    "\n",
+    "# Data quality metrics\n",
+    "print(f\"\\nData Quality:\")\n",
+    "print(f\"Completeness: {(1 - df.isna().sum().sum() / df.size) * 100:.1f}%\")\n",
+    "print(f\"Unique Pokemon: {df['name'].nunique():,}\")\n",
+    "print(f\"Duplicate names: {df['name'].duplicated().sum()}\")\n",
+    "\n",
+    "# Optimization summary\n",
+    "print(f\"\\nOptimization Summary:\")\n",
+    "print(f\"✓ Memory-efficient data loading with proper dtypes\")\n",
+    "print(f\"✓ Optimized plotting with reduced DPI\")\n",
+    "print(f\"✓ Batch processing for multiple visualizations\")\n",
+    "print(f\"✓ Vectorized operations for statistical calculations\")\n",
+    "print(f\"✓ Efficient memory usage: {df.memory_usage(deep=True).sum() / 1024 / 1024:.1f} MB\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}