# Vibe Coding by QIU Feng

一个完整的 **Vibe Coding** Jupyter Notebook！这个 Notebook 包含：

## 🎯 主要特色：

1. **📊 数据可视化组合** - Pandas + Matplotlib + Seaborn
2. **🤖 机器学习示例** - TensorFlow 房价预测模型
3. **🕷️ 网页抓取演示** - BeautifulSoup 数据提取
4. **🖼️ 计算机视觉** - OpenCV 图像处理
5. **🚀 API 开发代码** - FastAPI 示例
6. **🎮 游戏开发代码** - Pygame 示例


In [3]:
# pip install pandas matplotlib seaborn beautifulsoup4 requests
# pip install tensorflow scikit-learn opencv-python
# pip install fastapi uvicorn pygame



## 1. 📊 Pandas + Matplotlib = 数据处理与可视化



In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# 设置matplotlib中文显示
plt.rcParams['font.sans-serif'] = ['SimHei', 'Arial Unicode MS', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False

print("🔹 创建模拟电商销售数据")
print("=" * 50)

# 创建示例数据 - 模拟6个月的电商销售情况
data = {
    '月份': ['1月', '2月', '3月', '4月', '5月', '6月'],
    '销售额': [120, 135, 148, 162, 180, 195],  # 单位：万元
    '客户数': [45, 52, 58, 64, 72, 78],        # 单位：万人
    '订单数': [380, 420, 465, 510, 580, 620]   # 单位：万单
}

# 使用Pandas创建DataFrame进行数据处理
df = pd.DataFrame(data)
print("原始数据预览：")
print(df)
print("\n数据统计摘要：")
print(df.describe())

# 计算增长率
df['销售额增长率'] = df['销售额'].pct_change() * 100
df['客户增长率'] = df['客户数'].pct_change() * 100

print("\n增长率分析：")
print(df[['月份', '销售额增长率', '客户增长率']].round(2))

# 使用Matplotlib进行数据可视化
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
fig.suptitle('📊 电商业务数据分析看板', fontsize=16, fontweight='bold')

# 子图1：销售额趋势线图
axes[0,0].plot(df['月份'], df['销售额'], marker='o', linewidth=2, markersize=8, color='#2E86AB')
axes[0,0].set_title('📈 月度销售额趋势', fontsize=12, fontweight='bold')
axes[0,0].set_ylabel('销售额 (万元)')
axes[0,0].grid(True, alpha=0.3)
for i, v in enumerate(df['销售额']):
    axes[0,0].annotate(f'{v}万', (i, v), textcoords="offset points", xytext=(0,10), ha='center')

# 子图2：客户数柱状图
bars = axes[0,1].bar(df['月份'], df['客户数'], color='#A23B72', alpha=0.8)
axes[0,1].set_title('👥 月度客户数统计', fontsize=12, fontweight='bold')
axes[0,1].set_ylabel('客户数 (万人)')
axes[0,1].grid(True, alpha=0.3, axis='y')
for bar, value in zip(bars, df['客户数']):
    axes[0,1].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1, 
                   f'{value}万', ha='center', va='bottom')

# 子图3：订单数面积图
axes[1,0].fill_between(df['月份'], df['订单数'], alpha=0.6, color='#F18F01')
axes[1,0].plot(df['月份'], df['订单数'], marker='s', color='#C73E1D', linewidth=2)
axes[1,0].set_title('📦 月度订单数趋势', fontsize=12, fontweight='bold')
axes[1,0].set_ylabel('订单数 (万单)')
axes[1,0].grid(True, alpha=0.3)

# 子图4：多指标对比
x = np.arange(len(df['月份']))
width = 0.25

axes[1,1].bar(x - width, df['销售额']/10, width, label='销售额(十万元)', color='#2E86AB', alpha=0.8)
axes[1,1].bar(x, df['客户数'], width, label='客户数(万人)', color='#A23B72', alpha=0.8)
axes[1,1].bar(x + width, df['订单数']/10, width, label='订单数(十万单)', color='#F18F01', alpha=0.8)

axes[1,1].set_title('📊 多指标对比分析', fontsize=12, fontweight='bold')
axes[1,1].set_ylabel('数值')
axes[1,1].set_xticks(x)
axes[1,1].set_xticklabels(df['月份'])
axes[1,1].legend()
axes[1,1].grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

# 数据洞察分析
print("\n🔍 数据洞察：")
print(f"• 总销售额：{df['销售额'].sum()}万元")
print(f"• 平均月销售额：{df['销售额'].mean():.1f}万元")
print(f"• 销售额增长：{((df['销售额'].iloc[-1] - df['销售额'].iloc[0]) / df['销售额'].iloc[0] * 100):.1f}%")
print(f"• 客户数增长：{((df['客户数'].iloc[-1] - df['客户数'].iloc[0]) / df['客户数'].iloc[0] * 100):.1f}%")
print(f"• 平均客单价：{(df['销售额'] * 10000 / (df['客户数'] * 10000)).mean():.0f}元")



## 2. 🎨 Seaborn = 高级统计图表



In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# 设置中文字体显示
plt.rcParams['font.sans-serif'] = ['SimHei']  # 设置中文字体为黑体
plt.rcParams['axes.unicode_minus'] = False  # 解决负号显示问题

import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

print("🔹 使用Seaborn创建高级统计图表")
print("=" * 50)

# 设置Seaborn样式
sns.set_style("whitegrid")
sns.set_palette("husl")

# 创建更复杂的产品销售数据集
np.random.seed(42)  # 确保结果可重现
n_samples = 200

print("📊 生成模拟产品数据集...")

# 模拟产品数据
categories = ['电子产品', '服装配饰', '家居用品', '图书音像', '食品饮料']
brands = ['品牌A', '品牌B', '品牌C', '品牌D', '品牌E']
regions = ['华北', '华东', '华南', '华中', '西南']

# 创建复杂的关联数据
complex_data = pd.DataFrame({
    '产品类别': np.random.choice(categories, n_samples),
    '品牌': np.random.choice(brands, n_samples),
    '销售区域': np.random.choice(regions, n_samples),
    '价格': np.random.gamma(2, 50, n_samples),  # 伽马分布，更真实的价格分布
    '评分': np.random.beta(8, 2, n_samples) * 4 + 1,  # Beta分布，评分集中在高分
    '销量': np.random.poisson(25, n_samples),  # 泊松分布，销量数据
    '促销活动': np.random.choice(['是', '否'], n_samples, p=[0.3, 0.7])
})

# 添加一些现实的关联性
# 电子产品价格通常更高
mask_electronics = complex_data['产品类别'] == '电子产品'
complex_data.loc[mask_electronics, '价格'] *= 2

# 价格影响销量（负相关）
price_effect = 1 - (complex_data['价格'] - complex_data['价格'].min()) / (complex_data['价格'].max() - complex_data['价格'].min()) * 0.5
complex_data['销量'] = (complex_data['销量'] * price_effect).astype(int)

# 促销活动影响销量
promotion_mask = complex_data['促销活动'] == '是'
complex_data.loc[promotion_mask, '销量'] *= 1.5

print(f"数据集大小：{len(complex_data)} 条记录")
print("\n数据预览：")
print(complex_data.head())
print("\n各类别数量分布：")
print(complex_data['产品类别'].value_counts())

# 创建高级统计图表
fig, axes = plt.subplots(3, 2, figsize=(18, 15))
fig.suptitle('🎨 Seaborn高级统计图表分析', fontsize=16, fontweight='bold')

# 1. 箱线图 - 各类别产品价格分布
sns.boxplot(data=complex_data, x='产品类别', y='价格', ax=axes[0,0])
axes[0,0].set_title('📦 各类别产品价格分布箱线图', fontsize=12, fontweight='bold')
axes[0,0].tick_params(axis='x', rotation=45)
axes[0,0].set_ylabel('价格 (元)')

# 2. 散点图 - 价格与评分关系
sns.scatterplot(data=complex_data, x='价格', y='评分', hue='产品类别', 
                size='销量', sizes=(50, 200), alpha=0.7, ax=axes[0,1])
axes[0,1].set_title('💎 价格与评分关系散点图', fontsize=12, fontweight='bold')
axes[0,1].set_xlabel('价格 (元)')
axes[0,1].set_ylabel('评分')

# 3. 热力图 - 数值变量相关性
numeric_data = complex_data.select_dtypes(include=[np.number])
correlation_matrix = numeric_data.corr()
mask = np.triu(np.ones_like(correlation_matrix, dtype=bool))
sns.heatmap(correlation_matrix, mask=mask, annot=True, cmap='RdYlBu_r', 
            center=0, square=True, ax=axes[1,0])
axes[1,0].set_title('🔥 数据相关性热力图', fontsize=12, fontweight='bold')

# 4. 小提琴图 - 各区域销量分布
sns.violinplot(data=complex_data, x='销售区域', y='销量', ax=axes[1,1])
axes[1,1].set_title('🎻 各区域销量分布小提琴图', fontsize=12, fontweight='bold')
axes[1,1].set_ylabel('销量 (件)')

# 5. 分组柱状图 - 促销活动效果
promotion_effect = complex_data.groupby(['产品类别', '促销活动'])['销量'].mean().reset_index()
sns.barplot(data=promotion_effect, x='产品类别', y='销量', hue='促销活动', ax=axes[2,0])
axes[2,0].set_title('📊 促销活动对销量影响', fontsize=12, fontweight='bold')
axes[2,0].tick_params(axis='x', rotation=45)
axes[2,0].set_ylabel('平均销量 (件)')

# 6. 分布图 - 价格分布
sns.histplot(data=complex_data, x='价格', hue='产品类别', 
             multiple="stack", alpha=0.7, ax=axes[2,1])
axes[2,1].set_title('📈 各类别产品价格分布', fontsize=12, fontweight='bold')
axes[2,1].set_xlabel('价格 (元)')
axes[2,1].set_ylabel('数量')

plt.tight_layout()
plt.show()

# 统计分析报告
print("\n📋 统计分析报告：")
print("=" * 50)

# 价格统计
price_stats = complex_data.groupby('产品类别')['价格'].agg(['mean', 'std', 'min', 'max'])
print("各类别价格统计：")
print(price_stats.round(2))

# 销量统计
sales_stats = complex_data.groupby('产品类别')['销量'].agg(['mean', 'std', 'sum'])
print("\n各类别销量统计：")
print(sales_stats.round(2))

# 促销效果分析
promotion_analysis = complex_data.groupby('促销活动').agg({
    '销量': ['mean', 'sum'],
    '价格': 'mean',
    '评分': 'mean'
}).round(2)
print("\n促销活动效果分析：")
print(promotion_analysis)

# 相关性分析
print(f"\n🔍 关键发现：")
print(f"• 价格与销量相关系数：{correlation_matrix.loc['价格', '销量']:.3f}")
print(f"• 评分与销量相关系数：{correlation_matrix.loc['评分', '销量']:.3f}")
print(f"• 促销活动平均提升销量：{(complex_data[complex_data['促销活动']=='是']['销量'].mean() - complex_data[complex_data['促销活动']=='否']['销量'].mean()):.1f}件")



## 3. 🕷️ BeautifulSoup = 网页数据抓取



In [None]:
from bs4 import BeautifulSoup
import requests
import re

print("🔹 使用BeautifulSoup进行网页数据抓取")
print("=" * 50)

# 模拟网页抓取示例（使用本地HTML）
# 创建一个更复杂的HTML示例，模拟电商网站
sample_html = """
<!DOCTYPE html>
<html>
<head>
    <title>Vibe商城 - 最新产品</title>
    <meta charset="UTF-8">
</head>
<body>
    <div class="header">
        <h1>Vibe商城</h1>
        <nav>
            <ul>
                <li><a href="/electronics">电子产品</a></li>
                <li><a href="/books">图书</a></li>
                <li><a href="/clothing">服装</a></li>
            </ul>
        </nav>
    </div>
    
    <div class="products-container">
        <div class="product" data-category="electronics">
            <img src="laptop.jpg" alt="笔记本电脑">
            <h2 class="product-name">高性能笔记本电脑</h2>
            <p class="price" data-price="5999">¥5,999</p>
            <div class="rating">
                <span class="stars">★★★★★</span>
                <span class="rating-score">4.8</span>
                <span class="review-count">(238条评价)</span>
            </div>
            <p class="description">Intel i7处理器，16GB内存，512GB SSD</p>
            <div class="tags">
                <span class="tag">热销</span>
                <span class="tag">包邮</span>
            </div>
        </div>
        
        <div class="product" data-category="electronics">
            <img src="phone.jpg" alt="智能手机">
            <h2 class="product-name">5G智能手机</h2>
            <p class="price" data-price="3999">¥3,999</p>
            <div class="rating">
                <span class="stars">★★★★☆</span>
                <span class="rating-score">4.5</span>
                <span class="review-count">(156条评价)</span>
            </div>
            <p class="description">6.7英寸屏幕，128GB存储，三摄像头</p>
            <div class="tags">
                <span class="tag">新品</span>
            </div>
        </div>
        
        <div class="product" data-category="books">
            <img src="python_book.jpg" alt="Python编程书">
            <h2 class="product-name">Python编程从入门到实践</h2>
            <p class="price" data-price="89">¥89</p>
            <div class="rating">
                <span class="stars">★★★★★</span>
                <span class="rating-score">4.9</span>
                <span class="review-count">(892条评价)</span>
            </div>
            <p class="description">零基础学Python，实战项目丰富</p>
            <div class="tags">
                <span class="tag">畅销</span>
                <span class="tag">包邮</span>
            </div>
        </div>
        
        <div class="product" data-category="clothing">
            <img src="jacket.jpg" alt="冬季外套">
            <h2 class="product-name">保暖冬季外套</h2>
            <p class="price" data-price="299">¥299</p>
            <div class="rating">
                <span class="stars">★★★★☆</span>
                <span class="rating-score">4.3</span>
                <span class="review-count">(67条评价)</span>
            </div>
            <p class="description">防风保暖，多色可选</p>
            <div class="tags">
                <span class="tag">限时优惠</span>
            </div>
        </div>
    </div>
    
    <div class="statistics">
        <p>总商品数：<span id="total-products">1,250</span></p>
        <p>今日销量：<span id="daily-sales">89</span></p>
        <p>用户评分：<span id="avg-rating">4.6</span></p>
    </div>
</body>
</html>
"""

print("📄 解析HTML内容...")

# 使用BeautifulSoup解析HTML
soup = BeautifulSoup(sample_html, 'html.parser')

# 1. 提取基本信息
print("\n🏪 网站基本信息：")
title = soup.find('title').text
print(f"网站标题：{title}")

# 提取导航菜单
nav_links = soup.find('nav').find_all('a')
print("导航菜单：")
for link in nav_links:
    print(f"  • {link.text} -> {link.get('href')}")

# 2. 提取产品信息
print("\n🛍️ 产品信息提取：")
products = []

for product_div in soup.find_all('div', class_='product'):
    # 提取产品基本信息
    name = product_div.find('h2', class_='product-name').text
    price_text = product_div.find('p', class_='price').text
    price_value = int(product_div.find('p', class_='price').get('data-price'))
    category = product_div.get('data-category')
    description = product_div.find('p', class_='description').text
    
    # 提取评分信息
    rating_div = product_div.find('div', class_='rating')
    rating_score = float(rating_div.find('span', class_='rating-score').text)
    review_text = rating_div.find('span', class_='review-count').text
    # 使用正则表达式提取评价数量
    review_count = int(re.search(r'(\d+)', review_text).group(1))
    
    # 提取标签
    tag_elements = product_div.find_all('span', class_='tag')
    tags = [tag.text for tag in tag_elements]
    
    products.append({
        '产品名称': name,
        '价格': price_value,
        '价格显示': price_text,
        '类别': category,
        '描述': description,
        '评分': rating_score,
        '评价数量': review_count,
        '标签': ', '.join(tags)
    })

# 转换为DataFrame进行数据分析
scraped_df = pd.DataFrame(products)
print(f"成功抓取 {len(scraped_df)} 个产品")
print("\n产品数据预览：")
print(scraped_df)

# 3. 数据清洗和处理
print("\n🔧 数据处理和分析：")

# 按类别分组统计
category_stats = scraped_df.groupby('类别').agg({
    '价格': ['mean', 'min', 'max', 'count'],
    '评分': 'mean',
    '评价数量': 'sum'
}).round(2)

print("各类别统计：")
print(category_stats)

# 4. 数据可视化
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
fig.suptitle('🕷️ 网页抓取数据分析', fontsize=16, fontweight='bold')

# 子图1：各类别价格分布
category_prices = scraped_df.groupby('类别')['价格'].mean()
bars = axes[0,0].bar(category_prices.index, category_prices.values, 
                     color=['#FF6B6B', '#4ECDC4', '#45B7D1'], alpha=0.8)
axes[0,0].set_title('📊 各类别平均价格', fontsize=12, fontweight='bold')
axes[0,0].set_ylabel('价格 (元)')
axes[0,0].tick_params(axis='x', rotation=45)
for bar, value in zip(bars, category_prices.values):
    axes[0,0].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 50, 
                   f'¥{value:.0f}', ha='center', va='bottom')

# 子图2：评分与价格关系
scatter = axes[0,1].scatter(scraped_df['价格'], scraped_df['评分'], 
                           s=scraped_df['评价数量']*0.5, alpha=0.7, c=range(len(scraped_df)), cmap='viridis')
axes[0,1].set_title('💎 价格与评分关系', fontsize=12, fontweight='bold')
axes[0,1].set_xlabel('价格 (元)')
axes[0,1].set_ylabel('评分')
axes[0,1].grid(True, alpha=0.3)

# 子图3：评价数量分布
axes[1,0].bar(scraped_df['产品名称'], scraped_df['评价数量'], 
              color='#96CEB4', alpha=0.8)
axes[1,0].set_title('📝 各产品评价数量', fontsize=12, fontweight='bold')
axes[1,0].set_ylabel('评价数量')
axes[1,0].tick_params(axis='x', rotation=45)

# 子图4：产品标签词云效果（用柱状图模拟）
all_tags = []
for tags in scraped_df['标签']:
    all_tags.extend(tags.split(', '))
tag_counts = pd.Series(all_tags).value_counts()

axes[1,1].barh(tag_counts.index, tag_counts.values, color='#FFEAA7', alpha=0.8)
axes[1,1].set_title('🏷️ 标签使用频率', fontsize=12, fontweight='bold')
axes[1,1].set_xlabel('使用次数')

plt.tight_layout()
plt.show()

# 5. 高级数据提取示例
print("\n🔍 高级数据提取示例：")

# 提取网站统计信息
statistics_div = soup.find('div', class_='statistics')
stats = {}
for p in statistics_div.find_all('p'):
    text = p.text
    if '总商品数' in text:
        stats['总商品数'] = p.find('span').text
    elif '今日销量' in text:
        stats['今日销量'] = p.find('span').text
    elif '用户评分' in text:
        stats['用户评分'] = p.find('span').text

print("网站统计信息：")
for key, value in stats.items():
    print(f"  • {key}: {value}")

# 计算一些业务指标
print("\n📈 业务指标计算：")
avg_price = scraped_df['价格'].mean()
highest_rated = scraped_df.loc[scraped_df['评分'].idxmax()]
most_reviewed = scraped_df.loc[scraped_df['评价数量'].idxmax()]

print(f"• 平均产品价格: ¥{avg_price:.2f}")
print(f"• 评分最高产品: {highest_rated['产品名称']} (评分: {highest_rated['评分']})")
print(f"• 评价最多产品: {most_reviewed['产品名称']} (评价: {most_reviewed['评价数量']}条)")

# 价格区间分析
price_ranges = pd.cut(scraped_df['价格'], bins=[0, 100, 1000, 5000, 10000], 
                     labels=['0-100元', '100-1000元', '1000-5000元', '5000元以上'])
price_distribution = price_ranges.value_counts()
print(f"\n价格区间分布:")
for range_name, count in price_distribution.items():
    print(f"  • {range_name}: {count}个产品")

print("\n✅ 网页抓取完成！成功提取并分析了所有产品数据。")



## 4. 🤖 TensorFlow/Pytorch = 机器学习模型



In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_absolute_percentage_error

print("🔹 使用PyTorch构建机器学习模型")
print("=" * 50)

# 检查PyTorch版本和GPU
print(f"✅ PyTorch 版本: {torch.__version__}")
print(f"✅ 可用GPU: {'可用' if torch.cuda.is_available() else '不可用'}")

# 1. 创建房价预测数据集（回归问题）
print("\n🏠 创建房价预测数据集...")
np.random.seed(42)
n_samples = 2000

# 生成特征数据
house_data = pd.DataFrame({
    '面积': np.random.normal(120, 40, n_samples),  # 平方米
    '房间数': np.random.randint(1, 6, n_samples),   # 房间数量
    '楼层': np.random.randint(1, 31, n_samples),    # 楼层
    '房龄': np.random.randint(0, 30, n_samples),    # 房龄（年）
    '距离地铁': np.random.exponential(2, n_samples),  # 距离地铁站（公里）
    '学区房': np.random.choice([0, 1], n_samples, p=[0.7, 0.3]),  # 是否学区房
    '装修情况': np.random.choice([0, 1, 2], n_samples, p=[0.3, 0.5, 0.2])  # 0简装 1精装 2豪装
})

# 确保数据合理性
house_data['面积'] = np.clip(house_data['面积'], 30, 300)
house_data['距离地铁'] = np.clip(house_data['距离地铁'], 0.1, 10)

# 生成目标变量（房价）- 基于特征的复杂关系
base_price = (
    house_data['面积'] * 500 +                    # 面积影响
    house_data['房间数'] * 15000 +                # 房间数影响
    (31 - house_data['楼层']) * 2000 +            # 楼层影响（高楼层更贵）
    (30 - house_data['房龄']) * 3000 +            # 房龄影响（新房更贵）
    (10 - house_data['距离地铁']) * 8000 +        # 地铁距离影响
    house_data['学区房'] * 200000 +               # 学区房加成
    house_data['装修情况'] * 50000                # 装修情况影响
)

# 添加噪声
noise = np.random.normal(0, 50000, n_samples)
house_data['价格'] = base_price + noise
house_data['价格'] = np.clip(house_data['价格'], 100000, 2000000)  # 限制价格范围

print("数据集基本信息：")
print(house_data.describe())
print(f"\n数据集形状: {house_data.shape}")

# 2. 数据预处理
print("\n🔧 数据预处理...")
X = house_data.drop('价格', axis=1).values
y = house_data['价格'].values.reshape(-1, 1)

# 数据分割
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 特征标准化
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 转换为PyTorch张量
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

# 创建数据集和数据加载器
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# 创建验证集
train_size = int(0.8 * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

batch_size = 32
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

print(f"训练集大小: {len(train_dataset)}")
print(f"验证集大小: {len(val_dataset)}")
print(f"测试集大小: {len(test_dataset)}")

# 3. 构建神经网络模型
print("\n🧠 构建神经网络模型...")

class HousePriceModel(nn.Module):
    def __init__(self, input_size):
        super(HousePriceModel, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )
    
    def forward(self, x):
        return self.network(x)

# 创建模型
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = HousePriceModel(X_train_scaled.shape[1]).to(device)
print("模型结构：")
print(model)

# 定义损失函数和优化器
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 训练模型
print("\n🚀 开始训练模型...")
num_epochs = 100
patience = 10
best_val_loss = float('inf')
counter = 0

# 存储训练历史
history = {
    'train_loss': [],
    'val_loss': [],
    'train_mae': [],
    'val_mae': []
}

for epoch in range(num_epochs):
    # 训练阶段
    model.train()
    train_loss = 0.0
    train_mae = 0.0
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        
        # 前向传播
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        
        # 反向传播和优化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # 计算统计量
        train_loss += loss.item()
        train_mae += torch.mean(torch.abs(outputs - targets)).item()
    
    # 验证阶段
    model.eval()
    val_loss = 0.0
    val_mae = 0.0
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            
            val_loss += criterion(outputs, targets).item()
            val_mae += torch.mean(torch.abs(outputs - targets)).item()
    
    # 计算平均损失
    train_loss /= len(train_loader)
    val_loss /= len(val_loader)
    train_mae /= len(train_loader)
    val_mae /= len(val_loader)
    
    # 存储历史
    history['train_loss'].append(train_loss)
    history['val_loss'].append(val_loss)
    history['train_mae'].append(train_mae)
    history['val_mae'].append(val_mae)
    
    # 打印进度
    print(f"Epoch [{epoch+1}/{num_epochs}], "
          f"Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, "
          f"Train MAE: {train_mae:.2f}, Val MAE: {val_mae:.2f}")
    
    # 早停机制
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        counter = 0
        # 保存最佳模型
        torch.save(model.state_dict(), 'best_model.pth')
    else:
        counter += 1
        if counter >= patience:
            print(f"早停在第 {epoch+1} 轮")
            break

# 加载最佳模型
model.load_state_dict(torch.load('best_model.pth'))
model.eval()

# 4. 模型评估
print("\n📊 模型评估...")

# 在测试集上评估
test_loss = 0.0
test_mae = 0.0
all_targets = []
all_predictions = []

with torch.no_grad():
    for inputs, targets in test_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = model(inputs)
        
        test_loss += criterion(outputs, targets).item()
        test_mae += torch.mean(torch.abs(outputs - targets)).item()
        
        all_targets.append(targets.cpu().numpy())
        all_predictions.append(outputs.cpu().numpy())

# 计算平均损失
test_loss /= len(test_loader)
test_mae /= len(test_loader)

# 合并预测结果
all_targets = np.concatenate(all_targets)
all_predictions = np.concatenate(all_predictions)

# 计算更多指标
r2 = r2_score(all_targets, all_predictions)
mape = mean_absolute_percentage_error(all_targets, all_predictions)

print(f"测试集损失 (MSE): {test_loss:.2f}")
print(f"测试集平均绝对误差 (MAE): {test_mae:.2f}")
print(f"R² 分数: {r2:.4f}")
print(f"平均绝对百分比误差 (MAPE): {mape:.4f}")

# 5. 可视化结果
print("\n📈 结果可视化...")

fig, axes = plt.subplots(2, 2, figsize=(15, 12))
fig.suptitle('🤖 PyTorch房价预测模型分析', fontsize=16, fontweight='bold')

# 子图1：训练历史
axes[0,0].plot(history['train_loss'], label='训练损失', color='blue')
axes[0,0].plot(history['val_loss'], label='验证损失', color='red')
axes[0,0].set_title('📉 模型训练历史', fontsize=12, fontweight='bold')
axes[0,0].set_xlabel('轮次')
axes[0,0].set_ylabel('损失')
axes[0,0].legend()
axes[0,0].grid(True, alpha=0.3)

# 子图2：MAE历史
axes[0,1].plot(history['train_mae'], label='训练MAE', color='green')
axes[0,1].plot(history['val_mae'], label='验证MAE', color='orange')
axes[0,1].set_title('📊 平均绝对误差历史', fontsize=12, fontweight='bold')
axes[0,1].set_xlabel('轮次')
axes[0,1].set_ylabel('MAE')
axes[0,1].legend()
axes[0,1].grid(True, alpha=0.3)

# 子图3：预测vs实际
axes[1,0].scatter(all_targets, all_predictions, alpha=0.6, color='purple')
axes[1,0].plot([all_targets.min(), all_targets.max()], 
              [all_targets.min(), all_targets.max()], 'r--', lw=2)
axes[1,0].set_title('🎯 预测值 vs 实际值', fontsize=12, fontweight='bold')
axes[1,0].set_xlabel('实际房价')
axes[1,0].set_ylabel('预测房价')
axes[1,0].grid(True, alpha=0.3)

# 子图4：残差分布
# residuals = all_targets - all_predictions.flatten()
residuals = all_targets.flatten() - all_predictions.flatten()
axes[1,1].hist(residuals, bins=30, alpha=0.7, color='cyan', edgecolor='black')
axes[1,1].set_title('📊 残差分布', fontsize=12, fontweight='bold')
axes[1,1].set_xlabel('残差')
axes[1,1].set_ylabel('频次')
axes[1,1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('pytorch_house_price_analysis.png')
plt.show()

# 6. 特征重要性分析
print("\n🔍 特征重要性分析...")

# 获取第一层权重
first_layer = model.network[0]
weights = first_layer.weight.data.cpu().numpy()
feature_importance = np.abs(weights).mean(axis=0)

feature_names = house_data.drop('价格', axis=1).columns
importance_df = pd.DataFrame({
    '特征': feature_names,
    '重要性': feature_importance
}).sort_values('重要性', ascending=False)

print("特征重要性排序：")
print(importance_df)

# 绘制特征重要性
plt.figure(figsize=(10, 6))
bars = plt.bar(importance_df['特征'], importance_df['重要性'], 
               color='lightblue', alpha=0.8, edgecolor='navy')
plt.title('🎯 特征重要性分析', fontsize=14, fontweight='bold')
plt.xlabel('特征')
plt.ylabel('重要性分数')
plt.xticks(rotation=45)
plt.grid(True, alpha=0.3, axis='y')

# 添加数值标签
for bar, value in zip(bars, importance_df['重要性']):
    plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.001, 
            f'{value:.3f}', ha='center', va='bottom')

plt.tight_layout()
plt.savefig('feature_importance.png')
plt.show()

# 7. 实际预测示例
print("\n🏡 实际预测示例...")

# 创建几个示例房子
sample_houses = pd.DataFrame({
    '面积': [90, 130, 200],
    '房间数': [2, 3, 4],
    '楼层': [15, 8, 3],
    '房龄': [5, 15, 2],
    '距离地铁': [0.8, 2.5, 1.2],
    '学区房': [1, 0, 1],
    '装修情况': [2, 1, 2]
})

# 标准化
sample_scaled = scaler.transform(sample_houses)
sample_tensor = torch.tensor(sample_scaled, dtype=torch.float32).to(device)

# 预测
with torch.no_grad():
    predictions = model(sample_tensor).cpu().numpy()

print("示例房子预测结果：")
for i, (_, house) in enumerate(sample_houses.iterrows()):
    pred_price = predictions[i][0]
    print(f"\n房子 {i+1}:")
    print(f"  • 面积: {house['面积']}㎡")
    print(f"  • 房间数: {house['房间数']}室")
    print(f"  • 楼层: {house['楼层']}层")
    print(f"  • 房龄: {house['房龄']}年")
    print(f"  • 距离地铁: {house['距离地铁']:.1f}km")
    print(f"  • 学区房: {'是' if house['学区房'] else '否'}")
    print(f"  • 装修: {['简装', '精装', '豪装'][int(house['装修情况'])]}")
    print(f"  🏷️ 预测价格: ¥{pred_price:,.0f}")

print("\n✅ PyTorch机器学习示例完成！")

## 5. 🖼️ OpenCV = 计算机视觉

In [None]:
print("🔹 使用TensorFlow构建机器学习模型")
print("=" * 50)

# 检查并安装依赖
try:
    import tensorflow as tf
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import StandardScaler, LabelEncoder
    from sklearn.metrics import classification_report, confusion_matrix
    print(f"✅ TensorFlow 版本: {tf.__version__}")
    print(f"✅ 可用GPU: {len(tf.config.list_physical_devices('GPU'))} 个")
except ImportError:
    print("❌ 请先安装 TensorFlow: pip install tensorflow scikit-learn")
    print("继续展示代码示例...")

# 1. 创建房价预测数据集（回归问题）
print("\n🏠 创建房价预测数据集...")
np.random.seed(42)
n_samples = 2000

# 生成特征数据
house_data = pd.DataFrame({
    '面积': np.random.normal(120, 40, n_samples),  # 平方米
    '房间数': np.random.randint(1, 6, n_samples),   # 房间数量
    '楼层': np.random.randint(1, 31, n_samples),    # 楼层
    '房龄': np.random.randint(0, 30, n_samples),    # 房龄（年）
    '距离地铁': np.random.exponential(2, n_samples),  # 距离地铁站（公里）
    '学区房': np.random.choice([0, 1], n_samples, p=[0.7, 0.3]),  # 是否学区房
    '装修情况': np.random.choice([0, 1, 2], n_samples, p=[0.3, 0.5, 0.2])  # 0简装 1精装 2豪装
})

# 确保数据合理性
house_data['面积'] = np.clip(house_data['面积'], 30, 300)
house_data['距离地铁'] = np.clip(house_data['距离地铁'], 0.1, 10)

# 生成目标变量（房价）- 基于特征的复杂关系
base_price = (
    house_data['面积'] * 500 +                    # 面积影响
    house_data['房间数'] * 15000 +                # 房间数影响
    (31 - house_data['楼层']) * 2000 +            # 楼层影响（高楼层更贵）
    (30 - house_data['房龄']) * 3000 +            # 房龄影响（新房更贵）
    (10 - house_data['距离地铁']) * 8000 +        # 地铁距离影响
    house_data['学区房'] * 200000 +               # 学区房加成
    house_data['装修情况'] * 50000                # 装修情况影响
)

# 添加噪声
noise = np.random.normal(0, 50000, n_samples)
house_data['价格'] = base_price + noise
house_data['价格'] = np.clip(house_data['价格'], 100000, 2000000)  # 限制价格范围

print("数据集基本信息：")
print(house_data.describe())
print(f"\n数据集形状: {house_data.shape}")

# 2. 数据预处理
print("\n🔧 数据预处理...")
X = house_data.drop('价格', axis=1)
y = house_data['价格']

# 数据分割
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 特征标准化
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"训练集大小: {X_train_scaled.shape}")
print(f"测试集大小: {X_test_scaled.shape}")

# 3. 构建神经网络模型
print("\n🧠 构建神经网络模型...")

try:
    # 创建回归模型
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(128, activation='relu', input_shape=(X_train_scaled.shape[1],)),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(1)  # 输出层，回归问题
    ])

    # 编译模型
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss='mse',
        metrics=['mae']
    )

    # 显示模型结构
    print("模型结构：")
    model.summary()

    # 训练模型
    print("\n🚀 开始训练模型...")
    
    # 设置回调函数
    early_stopping = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss', patience=10, restore_best_weights=True
    )
    
    reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss', factor=0.5, patience=5, min_lr=0.00001
    )

    # 训练模型
    history = model.fit(
        X_train_scaled, y_train,
        epochs=100,
        batch_size=32,
        validation_split=0.2,
        callbacks=[early_stopping, reduce_lr],
        verbose=1
    )

    # 4. 模型评估
    print("\n📊 模型评估...")
    
    # 在测试集上评估
    test_loss, test_mae = model.evaluate(X_test_scaled, y_test, verbose=0)
    print(f"测试集损失 (MSE): {test_loss:.2f}")
    print(f"测试集平均绝对误差 (MAE): {test_mae:.2f}")

    # 预测
    y_pred = model.predict(X_test_scaled, verbose=0)
    
    # 计算更多指标
    from sklearn.metrics import r2_score, mean_absolute_percentage_error
    r2 = r2_score(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred)
    
    print(f"R² 分数: {r2:.4f}")
    print(f"平均绝对百分比误差 (MAPE): {mape:.4f}")

    # 5. 可视化结果
    print("\n📈 结果可视化...")
    
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    fig.suptitle('🤖 TensorFlow房价预测模型分析', fontsize=16, fontweight='bold')

    # 子图1：训练历史
    axes[0,0].plot(history.history['loss'], label='训练损失', color='blue')
    axes[0,0].plot(history.history['val_loss'], label='验证损失', color='red')
    axes[0,0].set_title('📉 模型训练历史', fontsize=12, fontweight='bold')
    axes[0,0].set_xlabel('轮次')
    axes[0,0].set_ylabel('损失')
    axes[0,0].legend()
    axes[0,0].grid(True, alpha=0.3)

    # 子图2：MAE历史
    axes[0,1].plot(history.history['mae'], label='训练MAE', color='green')
    axes[0,1].plot(history.history['val_mae'], label='验证MAE', color='orange')
    axes[0,1].set_title('📊 平均绝对误差历史', fontsize=12, fontweight='bold')
    axes[0,1].set_xlabel('轮次')
    axes[0,1].set_ylabel('MAE')
    axes[0,1].legend()
    axes[0,1].grid(True, alpha=0.3)

    # 子图3：预测vs实际
    axes[1,0].scatter(y_test, y_pred, alpha=0.6, color='purple')
    axes[1,0].plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
    axes[1,0].set_title('🎯 预测值 vs 实际值', fontsize=12, fontweight='bold')
    axes[1,0].set_xlabel('实际房价')
    axes[1,0].set_ylabel('预测房价')
    axes[1,0].grid(True, alpha=0.3)

    # 子图4：残差分布
    residuals = y_test - y_pred.flatten()
    axes[1,1].hist(residuals, bins=30, alpha=0.7, color='cyan', edgecolor='black')
    axes[1,1].set_title('📊 残差分布', fontsize=12, fontweight='bold')
    axes[1,1].set_xlabel('残差')
    axes[1,1].set_ylabel('频次')
    axes[1,1].grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()

    # 6. 特征重要性分析（使用权重近似）
    print("\n🔍 特征重要性分析...")
    
    # 获取第一层权重的绝对值平均作为特征重要性指标
    first_layer_weights = model.layers[0].get_weights()[0]
    feature_importance = np.abs(first_layer_weights).mean(axis=1)
    
    feature_names = X.columns
    importance_df = pd.DataFrame({
        '特征': feature_names,
        '重要性': feature_importance
    }).sort_values('重要性', ascending=False)
    
    print("特征重要性排序：")
    print(importance_df)
    
    # 绘制特征重要性
    plt.figure(figsize=(10, 6))
    bars = plt.bar(importance_df['特征'], importance_df['重要性'], 
                   color='lightblue', alpha=0.8, edgecolor='navy')
    plt.title('🎯 特征重要性分析', fontsize=14, fontweight='bold')
    plt.xlabel('特征')
    plt.ylabel('重要性分数')
    plt.xticks(rotation=45)
    plt.grid(True, alpha=0.3, axis='y')
    
    # 添加数值标签
    for bar, value in zip(bars, importance_df['重要性']):
        plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.001, 
                f'{value:.3f}', ha='center', va='bottom')
    
    plt.tight_layout()
    plt.show()

    # 7. 实际预测示例
    print("\n🏡 实际预测示例...")
    
    # 创建几个示例房子
    sample_houses = pd.DataFrame({
        '面积': [90, 130, 200],
        '房间数': [2, 3, 4],
        '楼层': [15, 8, 3],
        '房龄': [5, 15, 2],
        '距离地铁': [0.8, 2.5, 1.2],
        '学区房': [1, 0, 1],
        '装修情况': [2, 1, 2]
    })
    
    # 标准化
    sample_scaled = scaler.transform(sample_houses)
    
    # 预测
    predictions = model.predict(sample_scaled, verbose=0)
    
    print("示例房子预测结果：")
    for i, (_, house) in enumerate(sample_houses.iterrows()):
        pred_price = predictions[i][0]
        print(f"\n房子 {i+1}:")
        print(f"  • 面积: {house['面积']}㎡")
        print(f"  • 房间数: {house['房间数']}室")
        print(f"  • 楼层: {house['楼层']}层")
        print(f"  • 房龄: {house['房龄']}年")
        print(f"  • 距离地铁: {house['距离地铁']:.1f}km")
        print(f"  • 学区房: {'是' if house['学区房'] else '否'}")
        print(f"  • 装修: {['简装', '精装', '豪装'][house['装修情况']]}")
        print(f"  🏷️ 预测价格: ¥{pred_price:,.0f}")

except Exception as e:
    print(f"❌ 模型训练出错: {e}")
    print("这可能是因为没有安装TensorFlow或版本不兼容")

print("\n✅ TensorFlow机器学习示例完成！")

In [None]:
print("🔹 使用OpenCV进行计算机视觉处理")
print("=" * 50)

# 检查并安装依赖
try:
    import cv2
    print(f"✅ OpenCV 版本: {cv2.__version__}")
except ImportError:
    print("❌ 请先安装 OpenCV: pip install opencv-python")
    print("继续展示代码示例...")

# 1. 创建示例图像
print("\n🎨 创建和处理图像...")

# 创建一个彩色画布
img_width, img_height = 800, 600
canvas = np.zeros((img_height, img_width, 3), dtype=np.uint8)

# 设置背景渐变色
for i in range(img_height):
    gradient_value = int(255 * (i / img_height))
    canvas[i, :] = [gradient_value // 3, gradient_value // 2, gradient_value]

print("🖌️ 在画布上绘制各种图形...")

try:
    # 绘制几何图形
    # 矩形
    cv2.rectangle(canvas, (50, 50), (250, 200), (0, 255, 100), -1)  # 填充绿色矩形
    cv2.rectangle(canvas, (50, 50), (250, 200), (255, 255, 255), 3)  # 白色边框
    
    # 圆形
    cv2.circle(canvas, (400, 125), 80, (255, 100, 0), -1)  # 填充橙色圆形
    cv2.circle(canvas, (400, 125), 80, (255, 255, 255), 3)  # 白色边框
    
    # 椭圆
    cv2.ellipse(canvas, (600, 125), (100, 60), 45, 0, 360, (255, 0, 255), -1)  # 紫色椭圆
    cv2.ellipse(canvas, (600, 125), (100, 60), 45, 0, 360, (255, 255, 255), 3)
    
    # 多边形
    points = np.array([[150, 300], [50, 450], [100, 550], [200, 550], [250, 450]], np.int32)
    cv2.fillPoly(canvas, [points], (0, 200, 255))  # 填充黄色五边形
    cv2.polylines(canvas, [points], True, (255, 255, 255), 3)
    
    # 绘制文字
    cv2.putText(canvas, 'OpenCV Demo', (350, 350), 
                cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 3)
    cv2.putText(canvas, 'Computer Vision', (320, 400), 
                cv2.FONT_HERSHEY_SIMPLEX, 1.2, (200, 200, 200), 2)
    
    # 绘制直线
    cv2.line(canvas, (50, 500), (750, 500), (255, 255, 0), 5)  # 黄色水平线
    cv2.line(canvas, (400, 50), (400, 550), (0, 255, 255), 3)  # 青色垂直线
    
    print("✅ 图形绘制完成")

    # 2. 图像处理操作
    print("\n🔧 应用各种图像处理技术...")
    
    # 高斯模糊
    blurred = cv2.GaussianBlur(canvas, (15, 15), 0)
    
    # 转换为灰度图
    gray = cv2.cvtColor(canvas, cv2.COLOR_BGR2GRAY)
    
    # 边缘检测
    edges = cv2.Canny(gray, 50, 150)
    
    # 形态学操作
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
    morphed = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel)
    
    # HSV颜色空间转换
    hsv = cv2.cvtColor(canvas, cv2.COLOR_BGR2HSV)
    
    # 颜色范围提取（提取绿色区域）
    lower_green = np.array([40, 50, 50])
    upper_green = np.array([80, 255, 255])
    green_mask = cv2.inRange(hsv, lower_green, upper_green)
    green_result = cv2.bitwise_and(canvas, canvas, mask=green_mask)
    
    # 3. 创建综合展示
    print("\n📊 创建图像处理结果展示...")
    
    fig, axes = plt.subplots(3, 3, figsize=(18, 15))
    fig.suptitle('🖼️ OpenCV计算机视觉处理展示', fontsize=16, fontweight='bold')
    
    # 原始图像
    axes[0,0].imshow(cv2.cvtColor(canvas, cv2.COLOR_BGR2RGB))
    axes[0,0].set_title('🎨 原始图像', fontsize=12, fontweight='bold')
    axes[0,0].axis('off')
    
    # 高斯模糊
    axes[0,1].imshow(cv2.cvtColor(blurred, cv2.COLOR_BGR2RGB))
    axes[0,1].set_title('🌫️ 高斯模糊', fontsize=12, fontweight='bold')
    axes[0,1].axis('off')
    
    # 灰度图像
    axes[0,2].imshow(gray, cmap='gray')
    axes[0,2].set_title('⚫ 灰度图像', fontsize=12, fontweight='bold')
    axes[0,2].axis('off')
    
    # 边缘检测
    axes[1,0].imshow(edges, cmap='gray')
    axes[1,0].set_title('🔍 Canny边缘检测', fontsize=12, fontweight='bold')
    axes[1,0].axis('off')
    
    # 形态学处理
    axes[1,1].imshow(morphed, cmap='gray')
    axes[1,1].set_title('🔧 形态学处理', fontsize=12, fontweight='bold')
    axes[1,1].axis('off')
    
    # HSV颜色空间
    axes[1,2].imshow(cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB))
    axes[1,2].set_title('🌈 HSV颜色空间', fontsize=12, fontweight='bold')
    axes[1,2].axis('off')
    
    # 颜色掩码
    axes[2,0].imshow(green_mask, cmap='gray')
    axes[2,0].set_title('🟢 绿色区域掩码', fontsize=12, fontweight='bold')
    axes[2,0].axis('off')
    
    # 颜色提取结果
    axes[2,1].imshow(cv2.cvtColor(green_result, cv2.COLOR_BGR2RGB))
    axes[2,1].set_title('🎯 绿色区域提取', fontsize=12, fontweight='bold')
    axes[2,1].axis('off')
    
    # 轮廓检测
    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour_img = canvas.copy()
    cv2.drawContours(contour_img, contours, -1, (255, 255, 0), 2)
    axes[2,2].imshow(cv2.cvtColor(contour_img, cv2.COLOR_BGR2RGB))
    axes[2,2].set_title('📐 轮廓检测', fontsize=12, fontweight='bold')
    axes[2,2].axis('off')
    
    plt.tight_layout()
    plt.show()
    
    # 4. 图像分析和统计
    print("\n📈 图像分析和统计...")
    
    # 计算颜色直方图
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    fig.suptitle('📊 图像分析统计', fontsize=16, fontweight='bold')
    
    # RGB颜色直方图
    colors = ['red', 'green', 'blue']
    for i, color in enumerate(colors):
        hist = cv2.calcHist([canvas], [i], None, [256], [0, 256])
        axes[0,0].plot(hist, color=color, alpha=0.7, linewidth=2)
    axes[0,0].set_title('🎨 RGB颜色直方图', fontsize=12, fontweight='bold')
    axes[0,0].set_xlabel('像素值')
    axes[0,0].set_ylabel('频次')
    axes[0,0].legend(['Red', 'Green', 'Blue'])
    axes[0,0].grid(True, alpha=0.3)
    
    # 灰度直方图
    gray_hist = cv2.calcHist([gray], [0], None, [256], [0, 256])
    axes[0,1].plot(gray_hist, color='black', linewidth=2)
    axes[0,1].fill_between(range(256), gray_hist.flatten(), alpha=0.3, color='gray')
    axes[0,1].set_title('⚫ 灰度直方图', fontsize=12, fontweight='bold')
    axes[0,1].set_xlabel('灰度值')
    axes[0,1].set_ylabel('频次')
    axes[0,1].grid(True, alpha=0.3)
    
    # 轮廓分析
    contour_areas = [cv2.contourArea(contour) for contour in contours if cv2.contourArea(contour) > 100]
    contour_perimeters = [cv2.arcLength(contour, True) for contour in contours if cv2.contourArea(contour) > 100]
    
    axes[1,0].bar(range(len(contour_areas)), contour_areas, color='skyblue', alpha=0.8)
    axes[1,0].set_title('📐 轮廓面积分析', fontsize=12, fontweight='bold')
    axes[1,0].set_xlabel('轮廓编号')
    axes[1,0].set_ylabel('面积 (像素²)')
    axes[1,0].grid(True, alpha=0.3, axis='y')
    
    # 图像属性统计
    stats_data = {
        '属性': ['图像宽度', '图像高度', '总像素数', '轮廓数量', '平均亮度', '标准差'],
        '数值': [
            canvas.shape[1], 
            canvas.shape[0], 
            canvas.shape[0] * canvas.shape[1],
            len(contours),
            np.mean(gray),
            np.std(gray)
        ]
    }
    
    stats_df = pd.DataFrame(stats_data)
    axes[1,1].axis('tight')
    axes[1,1].axis('off')
    table = axes[1,1].table(cellText=[[f'{val:.2f}' if isinstance(val, float) else str(val) for val in stats_df['数值']]], 
                           rowLabels=stats_df['属性'], 
                           colLabels=['数值'],
                           cellLoc='center',
                           loc='center')
    table.auto_set_font_size(False)
    table.set_fontsize(10)
    table.scale(1.2, 1.5)
    axes[1,1].set_title('📊 图像属性统计', fontsize=12, fontweight='bold')
    
    plt.tight_layout()
    plt.show()
    
    # 5. 图像滤波器演示
    print("\n🔬 各种图像滤波器演示...")
    
    # 应用不同的滤波器
    # 均值滤波
    mean_filtered = cv2.blur(canvas, (15, 15))
    
    # 中值滤波
    median_filtered = cv2.medianBlur(canvas, 15)
    
    # 双边滤波
    bilateral_filtered = cv2.bilateralFilter(canvas, 15, 80, 80)
    
    # 拉普拉斯滤波
    laplacian = cv2.Laplacian(gray, cv2.CV_64F)
    laplacian = np.uint8(np.absolute(laplacian))
    
    # Sobel滤波
    sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
    sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
    sobel_combined = np.sqrt(sobelx**2 + sobely**2)
    sobel_combined = np.uint8(sobel_combined)
    
    # 展示滤波结果
    fig, axes = plt.subplots(2, 3, figsize=(18, 12))
    fig.suptitle('🔬 图像滤波器效果展示', fontsize=16, fontweight='bold')
    
    # 原图
    axes[0,0].imshow(cv2.cvtColor(canvas, cv2.COLOR_BGR2RGB))
    axes[0,0].set_title('🎨 原始图像', fontsize=12, fontweight='bold')
    axes[0,0].axis('off')
    
    # 均值滤波
    axes[0,1].imshow(cv2.cvtColor(mean_filtered, cv2.COLOR_BGR2RGB))
    axes[0,1].set_title('📊 均值滤波', fontsize=12, fontweight='bold')
    axes[0,1].axis('off')
    
    # 中值滤波
    axes[0,2].imshow(cv2.cvtColor(median_filtered, cv2.COLOR_BGR2RGB))
    axes[0,2].set_title('🎯 中值滤波', fontsize=12, fontweight='bold')
    axes[0,2].axis('off')
    
    # 双边滤波
    axes[1,0].imshow(cv2.cvtColor(bilateral_filtered, cv2.COLOR_BGR2RGB))
    axes[1,0].set_title('🔄 双边滤波', fontsize=12, fontweight='bold')
    axes[1,0].axis('off')
    
    # 拉普拉斯边缘检测
    axes[1,1].imshow(laplacian, cmap='gray')
    axes[1,1].set_title('⚡ 拉普拉斯边缘检测', fontsize=12, fontweight='bold')
    axes[1,1].axis('off')
    
    # Sobel边缘检测
    axes[1,2].imshow(sobel_combined, cmap='gray')
    axes[1,2].set_title('🔍 Sobel边缘检测', fontsize=12, fontweight='bold')
    axes[1,2].axis('off')
    
    plt.tight_layout()
    plt.show()
    
    print("\n📋 OpenCV处理总结：")
    print("=" * 50)
    print(f"✅ 成功创建 {canvas.shape[1]}x{canvas.shape[0]} 像素的图像")
    print(f"✅ 检测到 {len(contours)} 个轮廓")
    print(f"✅ 应用了 8 种不同的图像处理技术")
    print(f"✅ 生成了 {len(contour_areas)} 个有效轮廓分析")
    print(f"✅ 图像平均亮度: {np.mean(gray):.2f}")
    print(f"✅ 图像对比度(标准差): {np.std(gray):.2f}")
    
    # 6. 实际应用示例
    print("\n🎯 实际应用场景示例...")
    
    # 创建一个模拟的文档图像
    doc_img = np.ones((400, 600, 3), dtype=np.uint8) * 255  # 白色背景
    
    # 添加一些"文字"（用矩形模拟）
    text_blocks = [
        ((50, 50), (550, 80)),
        ((50, 100), (500, 130)),
        ((50, 150), (520, 180)),
        ((50, 200), (480, 230)),
        ((50, 280), (300, 310)),
        ((350, 280), (550, 310))
    ]
    
    for start, end in text_blocks:
        cv2.rectangle(doc_img, start, end, (0, 0, 0), -1)
    
    # 添加噪声
    noise = np.random.normal(0, 25, doc_img.shape).astype(np.uint8)
    noisy_doc = cv2.add(doc_img, noise)
    
    # 文档处理流程
    doc_gray = cv2.cvtColor(noisy_doc, cv2.COLOR_BGR2GRAY)
    
    # 去噪
    denoised = cv2.medianBlur(doc_gray, 5)
    
    # 二值化
    _, binary = cv2.threshold(denoised, 127, 255, cv2.THRESH_BINARY)
    
    # 形态学清理
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    cleaned = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
    
    # 文档处理展示
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    fig.suptitle('📄 文档图像处理流程', fontsize=16, fontweight='bold')
    
    axes[0,0].imshow(cv2.cvtColor(noisy_doc, cv2.COLOR_BGR2RGB))
    axes[0,0].set_title('📄 带噪声的文档', fontsize=12, fontweight='bold')
    axes[0,0].axis('off')
    
    axes[0,1].imshow(denoised, cmap='gray')
    axes[0,1].set_title('🧹 去噪处理', fontsize=12, fontweight='bold')
    axes[0,1].axis('off')
    
    axes[1,0].imshow(binary, cmap='gray')
    axes[1,0].set_title('⚫⚪ 二值化', fontsize=12, fontweight='bold')
    axes[1,0].axis('off')
    
    axes[1,1].imshow(cleaned, cmap='gray')
    axes[1,1].set_title('✨ 形态学清理', fontsize=12, fontweight='bold')
    axes[1,1].axis('off')
    
    plt.tight_layout()
    plt.show()

except Exception as e:
    print(f"❌ OpenCV处理出错: {e}")
    print("请确保已正确安装OpenCV: pip install opencv-python")

print("\n✅ OpenCV计算机视觉示例完成！")



## 6. 🚀 FastAPI = 高性能Web API



In [None]:
print("🔹 FastAPI高性能Web API开发")
print("=" * 50)

# FastAPI代码示例（在Jupyter中展示，实际需要在.py文件中运行）
fastapi_code = '''
from fastapi import FastAPI, HTTPException, Depends, BackgroundTasks
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel, Field
from typing import List, Optional
import uvicorn
import asyncio
import time
from datetime import datetime
import json

# 创建FastAPI应用实例
app = FastAPI(
    title="🚀 Vibe Coding API",
    description="一个展示FastAPI强大功能的示例API",
    version="1.0.0",
    docs_url="/docs",  # Swagger UI
    redoc_url="/redoc"  # ReDoc
)

# 添加CORS中间件
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# 数据模型定义
class User(BaseModel):
    id: Optional[int] = None
    name: str = Field(..., min_length=1, max_length=50)
    email: str = Field(..., regex=r'^[^@]+@[^@]+\.[^@]+$')
    age: int = Field(..., ge=0, le=150)
    is_active: bool = True

class UserResponse(BaseModel):
    id: int
    name: str
    email: str
    age: int
    is_active: bool
    created_at: datetime

class Product(BaseModel):
    id: Optional[int] = None
    name: str
    price: float = Field(..., gt=0)
    description: Optional[str] = None
    category: str
    in_stock: bool = True

# 模拟数据库
users_db = []
products_db = []
logs_db = []

# 依赖注入示例
async def get_current_time():
    return datetime.now()

# 后台任务示例
def log_operation(operation: str, details: str):
    log_entry = {
        "timestamp": datetime.now().isoformat(),
        "operation": operation,
        "details": details
    }
    logs_db.append(log_entry)
    print(f"📝 记录操作: {operation} - {details}")

# 根路径
@app.get("/", tags=["基础"])
async def root():
    """
    🏠 API根路径 - 返回欢迎信息
    """
    return {
        "message": "🚀 欢迎使用 Vibe Coding API！",
        "version": "1.0.0",
        "docs": "/docs",
        "features": ["用户管理", "产品管理", "实时数据", "后台任务"]
    }

# 健康检查
@app.get("/health", tags=["系统"])
async def health_check(current_time: datetime = Depends(get_current_time)):
    """
    ❤️ 系统健康检查
    """
    return {
        "status": "healthy",
        "timestamp": current_time,
        "uptime": "正常运行",
        "database": "连接正常"
    }

# 用户管理API
@app.post("/users/", response_model=UserResponse, tags=["用户管理"])
async def create_user(
    user: User, 
    background_tasks: BackgroundTasks,
    current_time: datetime = Depends(get_current_time)
):
    """
    👤 创建新用户
    """
    # 检查邮箱是否已存在
    if any(u["email"] == user.email for u in users_db):
        raise HTTPException(status_code=400, detail="邮箱已存在")
    
    # 创建用户
    user_id = len(users_db) + 1
    user_data = {
        "id": user_id,
        "name": user.name,
        "email": user.email,
        "age": user.age,
        "is_active": user.is_active,
        "created_at": current_time
    }
    users_db.append(user_data)
    
    # 添加后台任务
    background_tasks.add_task(
        log_operation, 
        "CREATE_USER", 
        f"用户 {user.name} 已创建"
    )
    
    return user_data

@app.get("/users/", response_model=List[UserResponse], tags=["用户管理"])
async def get_users(skip: int = 0, limit: int = 10):
    """
    📋 获取用户列表
    """
    return users_db[skip:skip + limit]

@app.get("/users/{user_id}", response_model=UserResponse, tags=["用户管理"])
async def get_user(user_id: int):
    """
    🔍 根据ID获取用户信息
    """
    user = next((u for u in users_db if u["id"] == user_id), None)
    if not user:
        raise HTTPException(status_code=404, detail="用户不存在")
    return user

@app.put("/users/{user_id}", response_model=UserResponse, tags=["用户管理"])
async def update_user(user_id: int, user_update: User, background_tasks: BackgroundTasks):
    """
    ✏️ 更新用户信息
    """
    user_index = next((i for i, u in enumerate(users_db) if u["id"] == user_id), None)
    if user_index is None:
        raise HTTPException(status_code=404, detail="用户不存在")
    
    # 更新用户数据
    users_db[user_index].update({
        "name": user_update.name,
        "email": user_update.email,
        "age": user_update.age,
        "is_active": user_update.is_active
    })
    
    background_tasks.add_task(
        log_operation, 
        "UPDATE_USER", 
        f"用户 {user_id} 信息已更新"
    )
    
    return users_db[user_index]

@app.delete("/users/{user_id}", tags=["用户管理"])
async def delete_user(user_id: int, background_tasks: BackgroundTasks):
    """
    🗑️ 删除用户
    """
    user_index = next((i for i, u in enumerate(users_db) if u["id"] == user_id), None)
    if user_index is None:
        raise HTTPException(status_code=404, detail="用户不存在")
    
    deleted_user = users_db.pop(user_index)
    background_tasks.add_task(
        log_operation, 
        "DELETE_USER", 
        f"用户 {deleted_user['name']} 已删除"
    )
    
    return {"message": f"用户 {deleted_user['name']} 已成功删除"}

# 产品管理API
@app.post("/products/", tags=["产品管理"])
async def create_product(product: Product, background_tasks: BackgroundTasks):
    """
    🛍️ 创建新产品
    """
    product_id = len(products_db) + 1
    product_data = {
        "id": product_id,
        "name": product.name,
        "price": product.price,
        "description": product.description,
        "category": product.category,
        "in_stock": product.in_stock,
        "created_at": datetime.now()
    }
    products_db.append(product_data)
    
    background_tasks.add_task(
        log_operation, 
        "CREATE_PRODUCT", 
        f"产品 {product.name} 已创建"
    )
    
    return product_data

@app.get("/products/", tags=["产品管理"])
async def get_products(category: Optional[str] = None, in_stock: Optional[bool] = None):
    """
    📦 获取产品列表（支持筛选）
    """
    filtered_products = products_db
    
    if category:
        filtered_products = [p for p in filtered_products if p["category"] == category]
    
    if in_stock is not None:
        filtered_products = [p for p in filtered_products if p["in_stock"] == in_stock]
    
    return filtered_products

# 实时数据API
@app.get("/stats/realtime", tags=["统计数据"])
async def get_realtime_stats():
    """
    📊 获取实时统计数据
    """
    return {
        "timestamp": datetime.now(),
        "total_users": len(users_db),
        "active_users": len([u for u in users_db if u["is_active"]]),
        "total_products": len(products_db),
        "products_in_stock": len([p for p in products_db if p["in_stock"]]),
        "total_operations": len(logs_db)
    }

# 异步数据处理
@app.get("/data/process", tags=["数据处理"])
async def process_data_async():
    """
    ⚡ 异步数据处理示例
    """
    start_time = time.time()
    
    # 模拟异步数据处理
    await asyncio.sleep(1)  # 模拟耗时操作
    
    processing_time = time.time() - start_time
    
    return {
        "status": "completed",
        "processing_time": f"{processing_time:.2f}秒",
        "processed_items": 1000,
        "timestamp": datetime.now()
    }

# 文件上传示例
@app.post("/upload/", tags=["文件操作"])
async def upload_file(background_tasks: BackgroundTasks):
    """
    📁 文件上传示例（模拟）
    """
    # 在实际应用中，这里会处理文件上传
    file_info = {
        "filename": "example.jpg",
        "size": "2.5MB",
        "uploaded_at": datetime.now(),
        "status": "success"
    }
    
    background_tasks.add_task(
        log_operation, 
        "FILE_UPLOAD", 
        f"文件 {file_info['filename']} 上传成功"
    )
    
    return file_info

# 操作日志API
@app.get("/logs/", tags=["系统"])
async def get_operation_logs(limit: int = 50):
    """
    📝 获取操作日志
    """
    return logs_db[-limit:]

# WebSocket支持示例
@app.websocket("/ws")
async def websocket_endpoint(websocket):
    """
    🔌 WebSocket连接示例
    """
    await websocket.accept()
    try:
        while True:
            # 发送实时数据
            stats = {
                "timestamp": datetime.now().isoformat(),
                "users": len(users_db),
                "products": len(products_db),
                "random_value": time.time() % 100
            }
            await websocket.send_text(json.dumps(stats))
            await asyncio.sleep(5)  # 每5秒发送一次
    except:
        pass

if __name__ == "__main__":
    print("🚀 启动 FastAPI 服务器...")
    print("📖 API文档: http://localhost:8000/docs")
    print("📚 ReDoc文档: http://localhost:8000/redoc")
    
    uvicorn.run(
        app, 
        host="0.0.0.0", 
        port=8000,
        reload=True,  # 开发模式，代码更改时自动重载
        log_level="info"
    )
'''

print("FastAPI完整示例代码:")
print("=" * 80)
print(fastapi_code)

print("\n🛠️ 使用说明:")
print("=" * 50)
print("1. 📝 将上述代码保存为 'main.py'")
print("2. 📦 安装依赖: pip install fastapi uvicorn")
print("3. 🚀 运行服务: uvicorn main:app --reload")
print("4. 🌐 访问API文档: http://localhost:8000/docs")
print("5. 📚 查看ReDoc: http://localhost:8000/redoc")

print("\n🎯 API功能特性:")
print("=" * 50)
features = [
    "✅ 自动API文档生成 (Swagger UI + ReDoc)",
    "✅ 数据验证和序列化 (Pydantic)",
    "✅ 异步支持和高性能",
    "✅ 依赖注入系统",
    "✅ 后台任务处理",
    "✅ CORS中间件支持",
    "✅ WebSocket实时通信",
    "✅ 错误处理和状态码",
    "✅ 查询参数和路径参数",
    "✅ 请求体验证",
    "✅ 响应模型定义",
    "✅ 标签分组和文档组织"
]

for feature in features:
    print(f"  {feature}")

print("\n📡 API端点示例:")
print("=" * 50)
endpoints = [
    ("GET", "/", "获取API欢迎信息"),
    ("GET", "/health", "健康检查"),
    ("POST", "/users/", "创建用户"),
    ("GET", "/users/", "获取用户列表"),
    ("GET", "/users/{user_id}", "获取单个用户"),
    ("PUT", "/users/{user_id}", "更新用户"),
    ("DELETE", "/users/{user_id}", "删除用户"),
    ("POST", "/products/", "创建产品"),
    ("GET", "/products/", "获取产品列表"),
    ("GET", "/stats/realtime", "实时统计数据"),
    ("GET", "/data/process", "异步数据处理"),
    ("POST", "/upload/", "文件上传"),
    ("GET", "/logs/", "操作日志"),
    ("WebSocket", "/ws", "实时数据推送")
]

for method, endpoint, description in endpoints:
    print(f"  {method:10} {endpoint:20} - {description}")

print("\n🔧 测试API的方法:")
print("=" * 50)
print("1. 🌐 浏览器访问: http://localhost:8000/docs")
print("2. 📱 使用curl命令:")
print("   curl -X GET http://localhost:8000/")
print("   curl -X POST http://localhost:8000/users/ \\")
print("        -H 'Content-Type: application/json' \\")
print("        -d '{\"name\":\"张三\",\"email\":\"zhang@example.com\",\"age\":25}'")
print("3. 🔍 使用Postman或其他API测试工具")
print("4. 📊 Python requests库:")

requests_example = '''
import requests

# 获取API信息
response = requests.get("http://localhost:8000/")
print(response.json())

# 创建用户
user_data = {
    "name": "李四",
    "email": "lisi@example.com", 
    "age": 30
}
response = requests.post("http://localhost:8000/users/", json=user_data)
print(response.json())

# 获取用户列表
response = requests.get("http://localhost:8000/users/")
print(response.json())
'''

print(requests_example)

print("\n✅ FastAPI示例展示完成！")
print("💡 这个API展示了现代Web开发的最佳实践，包括类型提示、自动文档、异步处理等特性。")