比较压缩率

In [None]:
!pip install seaborn

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm 

plt.rcParams['font.sans-serif'] = ['SimHei']  # 显示中文
plt.rcParams['axes.unicode_minus'] = False  # 显示负号
output_csv = "./data_700face/compression_results.csv"  # 结果保存路径

def count_v_f(filepath):
    v_count = 0
    f_count = 0
    try:
        with open(filepath, 'r') as file:
            for line in tqdm(file, desc=f"Processing {os.path.basename(filepath)}", unit=" lines"):
            # for line in tqdm(file):
                if line.startswith('v '):
                    v_count += 1
                elif line.startswith('f '):
                    f_count += 1
    except Exception as e:
        print(f"\n文件读取错误 {filepath}: {str(e)}")
        return None, None
    return v_count, f_count

def get_obj_files(folder):
    """获取文件夹中所有OBJ文件的路径字典"""
    return {filename: os.path.join(folder, filename) 
            for filename in os.listdir(folder) 
            if filename.endswith('.obj')}

# 配置文件夹路径（根据实际情况修改）
folder1 = "./data_700face/new_p1"  # 压缩后的文件夹
folder2 = "./data_700face/new2"    # 原始文件夹

# 获取文件列表
folder1_files = get_obj_files(folder1)
folder2_files = get_obj_files(folder2)
common_files = set(folder1_files.keys()) & set(folder2_files.keys())

if not common_files:
    print("没有找到同名的OBJ文件！")
else:
    # 创建进度条
    pbar = tqdm(total=len(common_files), desc="总体进度")
    
    results = []
    for filename in sorted(common_files):
        # 统计部分（带异常处理）
        try:
            v1, f1 = count_v_f(folder1_files[filename])
            v2, f2 = count_v_f(folder2_files[filename])
            
            if None in [v1, f1, v2, f2]: 
                continue
                
            v_ratio = (1 - v1/v2)*100 if v2 !=0 else 0
            f_ratio = (1 - f1/f2)*100 if f2 !=0 else 0
            
            results.append({
                "filename": filename,
                "original_vertices": v2,
                "compressed_vertices": v1,
                "vertex_ratio": v_ratio,
                "original_faces": f2,
                "compressed_faces": f1,
                "face_ratio": f_ratio
            })
        except Exception as e:
            print(f"\n处理 {filename} 时发生异常: {str(e)}")
        finally:
            pbar.update(1)
    
    pbar.close()
    
    # 转换为DataFrame
    df = pd.DataFrame(results)
    
    # 保存原始数据
    df.to_csv(output_csv, index=False)
    print(f"\n结果已保存至 {output_csv}")

    # 可视化分析
    print("\n正在生成分析图表...")
    
    # 1. 压缩率分布直方图（双Y轴）
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 6))
    
    sns.histplot(df['vertex_ratio'], bins=100, ax=ax1, color='blue', kde=True)
    ax1.set_title('顶点压缩率分布 (N=58,000)')
    ax1.set_xlabel('压缩率 (%)')
    ax1.set_ylabel('文件数量')
    ax1.axvline(df['vertex_ratio'].median(), color='red', linestyle='--')
    
    sns.histplot(df['face_ratio'], bins=100, ax=ax2, color='green', kde=True)
    ax2.set_title('面压缩率分布 (N=58,000)')
    ax2.set_xlabel('压缩率 (%)')
    ax2.set_ylabel('文件数量')
    ax2.axvline(df['face_ratio'].median(), color='red', linestyle='--')
    
    plt.tight_layout()
    plt.show()

    # 2. 压缩率与原始大小的关系（散点图 + 热度图）
    plt.figure(figsize=(12, 6))
    
    # 使用hexbin处理大数据量
    hb = plt.hexbin(df['original_vertices'], df['vertex_ratio'], 
                   gridsize=100, cmap='Blues', bins='log')
    plt.colorbar(hb, label='数据密度 (log scale)')
    plt.title('顶点压缩率 vs 原始顶点数量')
    plt.xlabel('原始顶点数 (log scale)')
    plt.ylabel('压缩率 (%)')
    plt.xscale('log')
    plt.show()

    # 3. 前20最佳/最差压缩效果对比
    top20 = df.nlargest(20, 'vertex_ratio')
    bottom20 = df.nsmallest(20, 'vertex_ratio')
    
    plt.figure(figsize=(14, 8))
    plt.subplot(2,1,1)
    sns.barplot(x='filename', y='vertex_ratio', data=top20, palette='viridis')
    plt.title('顶点压缩率TOP20')
    plt.xticks(rotation=90)
    
    plt.subplot(2,1,2)
    sns.barplot(x='filename', y='vertex_ratio', data=bottom20, palette='rocket')
    plt.title('顶点压缩率BOTTOM20')
    plt.xticks(rotation=90)
    
    plt.tight_layout()
    plt.show()

    # 4. 压缩率统计箱线图
    plt.figure(figsize=(10, 6))
    sns.boxplot(data=df[['vertex_ratio', 'face_ratio']], 
               palette='Pastel1',
               showfliers=False)  # 隐藏异常值
    plt.title('压缩率分布箱线图')
    plt.ylabel('压缩率 (%)')
    plt.show()

    # 5. 压缩效果象限分析
    plt.figure(figsize=(10, 8))
    sns.jointplot(x=df['vertex_ratio'], y=df['face_ratio'],
                 kind='hex', 
                 height=8,
                 marginal_kws=dict(bins=30),
                 joint_kws=dict(gridsize=50))
    plt.suptitle('顶点与面压缩率联合分布')
    plt.subplots_adjust(top=0.95)
    plt.show()

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import numpy as np
from scipy import stats
from scipy.stats import gaussian_kde

plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']  # 更普适的中文字体
plt.rcParams['axes.unicode_minus'] = False
plt.rcParams['figure.dpi'] = 150  # 全局DPI设置
plt.rcParams['savefig.bbox'] = 'tight'  # 自动裁剪白边

plt.rcParams.update({
    'font.size': 12,
    'axes.titlesize': 14,
    'axes.labelsize': 12,
    'xtick.labelsize': 10,
    'ytick.labelsize': 10
})

output_csv = "D:/临时/data_process2/compression_results.csv"
output_figures = "D:/临时/data_process2/2"
os.makedirs(output_figures, exist_ok=True)


def analyze_and_visualize(df):
    print("\n正在生成分析图表...")

    # 1. 压缩率分布直方图 --------------------------------------------------
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 7))

    # 顶点压缩率
    sns.histplot(df['vertex_ratio'], bins=100, ax=ax1, color='royalblue', kde=True,
                 edgecolor='white', linewidth=0.5)
    median_v = df['vertex_ratio'].median()
    ax1.axvline(median_v, color='crimson', linestyle='--', linewidth=1.5)
    ax1.set_title(f'顶点压缩率分布（中位数:{median_v:.1f}%）', pad=20)
    ax1.set_xlabel('压缩率 (%)')
    ax1.set_ylabel('文件数量')
    ax1.grid(True, linestyle='--', alpha=0.6)

    # 面压缩率
    sns.histplot(df['face_ratio'], bins=100, ax=ax2, color='forestgreen', kde=True,
                 edgecolor='white', linewidth=0.5)
    median_f = df['face_ratio'].median()
    ax2.axvline(median_f, color='crimson', linestyle='--', linewidth=1.5)
    ax2.set_title(f'面压缩率分布（中位数:{median_f:.1f}%）', pad=20)
    ax2.set_xlabel('压缩率 (%)')
    ax2.grid(True, linestyle='--', alpha=0.6)

    plt.tight_layout(pad=3.0)
    plt.savefig(os.path.join(output_figures, '01_压缩率分布直方图.png'), dpi=300)
    plt.close()

    # 2. 顶点压缩率分布图 --------------------------------------------------
    plt.figure(figsize=(14, 7))
    hb = plt.hexbin(df['original_vertices'], df['vertex_ratio'],
                    gridsize=100, cmap='Blues', bins='log',
                    edgecolors='none', mincnt=1)

    # 添加规模区间标注 --------------------------------------------------
    # 修改顶点压缩分布图标注部分
    scale_divisions = [1e3, 1e4, 1e5]  # 调整为3个分割点
    colors = ['#FF6B6B', '#4ECDC4', '#45B7D1']  # 3个颜色
    labels = ['小规模 (1e2-1e3)', '中等规模 (1e3-1e5)', '大规模 (>1e5)']  # 3个标签

    # 绘制垂直分割线时应确保索引一致
    for i, div in enumerate(scale_divisions):  # 现在i范围是0-2
        plt.axvline(div, color=colors[i], linestyle='--', linewidth=1.5, alpha=0.8)
        plt.text(div * 1.2, 95, labels[i],
                 color=colors[i],
                 rotation=45,
                 verticalalignment='top',
                 fontsize=9,
                 bbox=dict(facecolor='white', alpha=0.8, edgecolor='none'))

    # 数据预处理
    valid_data = df[(df['original_vertices'] > 0) & (df['vertex_ratio'].notna())]
    x_log = np.log10(valid_data['original_vertices'])
    y = valid_data['vertex_ratio']

    # 计算回归模型
    slope, intercept, r_value, _, _ = stats.linregress(x_log, y)
    reg_line = slope * x_log + intercept

    # 绘制趋势线（显示为对数空间直线）
    plt.plot(10 ** x_log, reg_line,
             color='red', linewidth=2,
             label=f'y = {slope:.1f}log(x) + {intercept:.1f}\n(R²={r_value ** 2:.2f})')

    plt.xscale('log')
    plt.xticks([1e2, 1e3, 1e4, 1e5, 1e6],
               ['100', '1K', '10K', '100K', '1M'])
    plt.xlim(left=50)  # 设置最小显示值
    plt.colorbar(hb, label='数据密度（log尺度）', pad=0.02)
    plt.title('顶点压缩率 vs 原始顶点数量', pad=20, fontsize=14)
    plt.xlabel('原始顶点数（log尺度）', labelpad=12)
    plt.ylabel('压缩率 (%)', labelpad=12)
    plt.legend(loc='upper right', frameon=True, shadow=True)
    plt.grid(True, linestyle=':', alpha=0.5)

    plt.savefig(os.path.join(output_figures, '02_顶点压缩率分布图.png'), dpi=300)
    plt.close()

    # 3. TOP/BOTTOM对比图 -------------------------------------------------
    fig = plt.figure(figsize=(16, 12))

    # TOP20
    ax1 = plt.subplot(2, 1, 1)
    top20 = df.nlargest(20, 'vertex_ratio')
    sns.barplot(x='filename', y='vertex_ratio', data=top20,
                palette='viridis', ax=ax1)
    plt.title('顶点压缩率TOP20', pad=15)
    plt.xticks(rotation=45, ha='right', fontsize=9)
    plt.ylabel('压缩率 (%)', labelpad=10)
    plt.grid(axis='y', linestyle='--', alpha=0.6)

    # BOTTOM20
    ax2 = plt.subplot(2, 1, 2)
    bottom20 = df.nsmallest(20, 'vertex_ratio')
    sns.barplot(x='filename', y='vertex_ratio', data=bottom20,
                palette='rocket_r', ax=ax2)
    plt.title('顶点压缩率BOTTOM20', pad=15)
    plt.xticks(rotation=45, ha='right', fontsize=9)
    plt.ylabel('压缩率 (%)', labelpad=10)
    plt.grid(axis='y', linestyle='--', alpha=0.6)

    plt.tight_layout(h_pad=3.0)
    plt.savefig(os.path.join(output_figures, '03_TOP_BOTTOM对比图.png'), dpi=300)
    plt.close()

    # 4. 箱线图 ----------------------------------------------------------
    plt.figure(figsize=(12, 7))
    sns.boxplot(data=df[['vertex_ratio', 'face_ratio']],
                palette='Pastel1',
                showfliers=False,
                width=0.6,
                linewidth=1.5)

    plt.title('压缩率分布箱线图', pad=20)
    plt.ylabel('压缩率 (%)', labelpad=10)
    plt.xticks([0, 1], ['顶点压缩率', '面压缩率'], rotation=0)
    plt.grid(axis='y', linestyle='--', alpha=0.6)
    plt.savefig(os.path.join(output_figures, '04_压缩率箱线图.png'), dpi=300)
    plt.close()

    # 5. 联合分布图 ------------------------------------------------------
    joint = sns.jointplot(x=df['vertex_ratio'], y=df['face_ratio'],
                          kind='hex', height=9,
                          space=0.2,
                          marginal_kws=dict(bins=30, fill=False),
                          joint_kws=dict(gridsize=50, edgecolor='w', cmap='Blues'))

    # 添加等高线 ------------------------------------------------------
    x = joint.ax_joint.collections[0].get_offsets()[:, 0]
    y = joint.ax_joint.collections[0].get_offsets()[:, 1]

    # 计算核密度估计
    k = gaussian_kde(np.vstack([x, y]))
    xi, yi = np.mgrid[x.min():x.max():100j, y.min():y.max():100j]
    zi = k(np.vstack([xi.flatten(), yi.flatten()]))

    # 绘制等高线
    joint.ax_joint.contour(xi, yi, zi.reshape(xi.shape),
                           levels=6,
                           linewidths=1,
                           colors='red',
                           alpha=0.8)

    # 增强标签设置
    joint.fig.suptitle('顶点与面压缩率联合分布（含密度等高线）', y=1.02)
    joint.ax_joint.set_xlabel('顶点压缩率 (%)', labelpad=12)
    joint.ax_joint.set_ylabel('面压缩率 (%)', labelpad=12)
    joint.savefig(os.path.join(output_figures, '05_联合分布图.png'), dpi=300)
    plt.close()

    print(f"所有图表已保存至：{output_figures}")


# 主程序逻辑
if os.path.exists(output_csv):
    # 直接读取已有数据
    df = pd.read_csv(output_csv)
    print(f"检测到已有结果文件：{output_csv}")
    analyze_and_visualize(df)
else:
    # 需要重新处理数据
    def count_v_f(filepath):
        # 原计数函数保持不变
        v_count = 0
        f_count = 0
        try:
            with open(filepath, 'r') as file:
                for line in tqdm(file, desc=f"Processing {os.path.basename(filepath)}", unit=" lines"):
                    if line.startswith('v '):
                        v_count += 1
                    elif line.startswith('f '):
                        f_count += 1
        except Exception as e:
            print(f"\n文件读取错误 {filepath}: {str(e)}")
            return None, None
        return v_count, f_count


    def get_obj_files(folder):
        return {filename: os.path.join(folder, filename)
                for filename in os.listdir(folder)
                if filename.endswith('.obj')}


    # 配置文件夹路径
    folder1 = "./data_700face/new_p1"
    folder2 = "./data_700face/new2"

    folder1_files = get_obj_files(folder1)
    folder2_files = get_obj_files(folder2)
    common_files = set(folder1_files.keys()) & set(folder2_files.keys())

    if not common_files:
        print("没有找到同名的OBJ文件！")
    else:
        results = []
        pbar = tqdm(total=len(common_files), desc="处理进度")

        for filename in sorted(common_files):
            try:
                v1, f1 = count_v_f(folder1_files[filename])
                v2, f2 = count_v_f(folder2_files[filename])

                if None in [v1, f1, v2, f2]: continue

                results.append({
                    "filename": filename,
                    "original_vertices": v2,
                    "compressed_vertices": v1,
                    "vertex_ratio": (1 - v1 / v2) * 100 if v2 != 0 else 0,
                    "original_faces": f2,
                    "compressed_faces": f1,
                    "face_ratio": (1 - f1 / f2) * 100 if f2 != 0 else 0
                })
            except Exception as e:
                print(f"\n处理 {filename} 时异常: {str(e)}")
            finally:
                pbar.update(1)

        pbar.close()
        df = pd.DataFrame(results)
        df.to_csv(output_csv, index=False)
        print(f"\n结果已保存至 {output_csv}")
        analyze_and_visualize(df)