# 2_cancellation_data_analysis.ipynb
# 預售屋市場分析系統 - 解約資料深度分析

目標：
✅ 實作PRD中的解約資料解析邏輯
✅ 驗證解約統計結果
✅ 分析解約模式與趨勢

內容大綱：
1. 解約資料格式分析
2. 解約解析函數實作與測試
3. 解約統計與分布分析
4. 解約時間趨勢分析
5. 解約風險初步評估
6. 多重解約案例處理

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime, timedelta
import re
import warnings
warnings.filterwarnings('ignore')

In [2]:
import sys
from pathlib import Path
project_root = Path.cwd().parent.parent  # 找出根目錄：Path.cwd()找出現在所在目錄(/run).parent(上一層是notebook).parent(再上層一層business_district_discovery)
print(project_root)
sys.path.append(str(project_root))

c:\pylabs\presale_market_analysis


In [3]:
from matplotlib.font_manager import fontManager
import matplotlib as mlp
font_path = Path(project_root) / "ChineseFont.ttf"
fontManager.addfont(str(font_path))
mlp.rc('font', family="ChineseFont")
print(font_path)

c:\pylabs\presale_market_analysis\ChineseFont.ttf


In [4]:
# 設定顯示選項
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', 50)

# # 設定中文字型
# plt.rcParams['font.sans-serif'] = ['Microsoft JhengHei', 'Arial Unicode MS', 'sans-serif']
# plt.rcParams['axes.unicode_minus'] = False

# 設定圖表樣式
sns.set_style("whitegrid")
plt.style.use('default')

print("✅ 環境設定完成")
print(f"📅 分析時間: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

✅ 環境設定完成
📅 分析時間: 2025-07-27 10:57:13


In [5]:
# 預售屋備查 / 實價預售交易資料位置
community_dir = r"C:\pylabs\presale_market_analysis/data/raw"
community_fn = "community_11006_11406.csv"
community_path = os.path.join(community_dir,  community_fn)

transaction_dir = r"C:\pylabs\presale_market_analysis/data/raw"
tranaction_fn = "transaction_11006_11406.csv"
transaction_path = os.path.join(transaction_dir, tranaction_fn)

stats_dir = r"C:\pylabs\presale_market_analysis/data/processed"
stats_fn = "01_basic_analysis_summary.csv"
stats_path = os.path.join(stats_dir, stats_fn)

In [7]:
try:
    # 載入逐筆交易資料 (主要分析對象)
    transaction_df = pd.read_csv(community_path, encoding='utf-8')
    print(f"✅ 逐筆交易資料載入成功: {transaction_df.shape}")
    
    # 載入預售社區資料 (輔助分析)
    community_df = pd.read_csv(transaction_path, encoding='utf-8')
    print(f"✅ 預售社區資料載入成功: {community_df.shape}")
    
    # 載入 Notebook 1 的基礎分析結果
    try:
        basic_stats = pd.read_csv(stats_path)
        print(f"✅ 基礎分析結果載入成功")
    except FileNotFoundError:
        print("⚠️ 未找到基礎分析結果，將重新計算基礎統計")
        basic_stats = None
        
except FileNotFoundError as e:
    print(f"❌ 檔案載入失敗: {e}")
    print("📝 請確認檔案是否放置在 ../data/raw/ 資料夾中")
except Exception as e:
    print(f"❌ 載入過程發生錯誤: {e}")
    

✅ 逐筆交易資料載入成功: (8452, 19)
✅ 預售社區資料載入成功: (322100, 21)
✅ 基礎分析結果載入成功
