## 测井曲线“公因数”


In [8]:
"""
读取所有LAS文件并找出共有曲线
"""

import os
from pathlib import Path

import lasio
import pandas as pd

In [9]:
# ============ 1. 读取所有LAS文件 ============
las_dir = Path("..") / "data" / "vertical_well_common_las"

# 修复：避免重复匹配，统一转换为小写处理
las_files_lower = list(las_dir.glob("*.las"))
las_files_upper = list(las_dir.glob("*.LAS"))

# 合并并去重（使用文件路径的绝对路径去重）
las_files = list(set(las_files_lower + las_files_upper))
las_files.sort()  # 排序便于查看

print(f"找到 {len(las_files)} 个 LAS 文件:")
for f in las_files:
    print(f"  - {f.name}")
print("\n" + "=" * 80)

# 存储每个井的曲线信息
well_curves = {}
all_curves = []

for las_file in las_files:
    try:
        las = lasio.read(las_file)
        well_name = las_file.stem
        curves = list(las.keys())

        well_curves[well_name] = curves
        all_curves.extend(curves)

        print(f"✓ {well_name:15s} - {len(curves):3d} 条曲线")

    except Exception as e:
        print(f"✗ 读取 {las_file.name} 失败: {e}")

print("\n" + "=" * 80)

找到 7 个 LAS 文件:
  - PH1.las
  - PH13.las
  - PH2.las
  - PH3.las
  - PH4.las
  - PH5.las
  - PH6.las

✓ PH1             -  30 条曲线
✓ PH13            -  30 条曲线
✓ PH2             -  30 条曲线
✓ PH3             -  30 条曲线
✓ PH4             -  30 条曲线
✓ PH5             -  30 条曲线
✓ PH6             -  28 条曲线



In [10]:
# ============ 2. 统计曲线出现频率 ============
from collections import Counter

curve_counter = Counter(all_curves)
print(f"\n所有曲线统计 (共 {len(curve_counter)} 种不同曲线):\n")

# 按出现次数排序
sorted_curves = sorted(curve_counter.items(), key=lambda x: x[1], reverse=True)

for curve, count in sorted_curves:
    percentage = (count / len(well_curves)) * 100
    bar = "█" * int(percentage / 5)
    print(f"{curve:20s} | {count:3d}/{len(well_curves):3d} ({percentage:5.1f}%) {bar}")



所有曲线统计 (共 30 种不同曲线):

AMP                  |   7/  7 (100.0%) ████████████████████
BOOL_POR             |   7/  7 (100.0%) ████████████████████
CN                   |   7/  7 (100.0%) ████████████████████
DEN                  |   7/  7 (100.0%) ████████████████████
DEPT                 |   7/  7 (100.0%) ████████████████████
DT                   |   7/  7 (100.0%) ████████████████████
FACIES               |   7/  7 (100.0%) ████████████████████
FLUIDS               |   7/  7 (100.0%) ████████████████████
GR                   |   7/  7 (100.0%) ████████████████████
GR-NORM              |   7/  7 (100.0%) ████████████████████
GR1                  |   7/  7 (100.0%) ████████████████████
GRINPEFA             |   7/  7 (100.0%) ████████████████████
INPEFA               |   7/  7 (100.0%) ████████████████████
LITH                 |   7/  7 (100.0%) ████████████████████
LITH_SHOW            |   7/  7 (100.0%) ████████████████████
LLD1                 |   7/  7 (100.0%) ████████████████████
M

In [11]:
# ============ 3. 找出所有井共有的曲线 ============
print("\n" + "=" * 80)
print(f"\n所有 {len(well_curves)} 口井共有的曲线 (公因数):\n")

if well_curves:
    # 使用集合交集操作找出共有曲线
    common_curves = set(well_curves[list(well_curves.keys())[0]])

    for well_name, curves in well_curves.items():
        common_curves = common_curves.intersection(set(curves))

    common_curves_sorted = sorted(list(common_curves))

    if common_curves_sorted:
        print(f"共有 {len(common_curves_sorted)} 条共同曲线:\n")
        for idx, curve in enumerate(common_curves_sorted, 1):
            print(f"  {idx:2d}. {curve}")
    else:
        print("没有找到所有井都包含的共同曲线!")
else:
    print("没有成功读取任何LAS文件!")



所有 7 口井共有的曲线 (公因数):

共有 28 条共同曲线:

   1. AMP
   2. BOOL_POR
   3. CN
   4. DEN
   5. DEPT
   6. DT
   7. FACIES
   8. FLUIDS
   9. GR
  10. GR-NORM
  11. GR1
  12. GRINPEFA
  13. INPEFA
  14. LITH
  15. LITH_SHOW
  16. LLD1
  17. MLP-YC
  18. PEFA
  19. PERM
  20. POR
  21. SAND??SHADIBI
  22. SESMIC
  23. SESMIC2
  24. SVM-YC
  25. SW
  26. TWTPICKED
  27. TWTPICKED2
  28. VSH
