# 🏠 東京家賃予測システム - ディープラーニング版 v2

**【改善点】**
- クラス構造の整理とモジュール化
- 学習・予測ロジックの分離
- Webインターフェース統合
- コード重複削除と可読性向上

## 1. ライブラリインポート

In [None]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.decomposition import PCA

import matplotlib.pyplot as plt
plt.rcParams['font.family'] = ['Meiryo']
plt.rcParams['axes.unicode_minus'] = False

import matplotlib.font_manager as fmt
fmt.fontManager.addfont(r'./meiryo.ttc')  # 環境に応じてコメント解除

import ipywidgets as widgets
from IPython.display import display, HTML, clear_output

import warnings
warnings.filterwarnings('ignore')

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"使用デバイス: {device}")
print("=" * 80)

## 2. データ前処理クラス

In [None]:
class RentDataPreprocessor:
    """ディープラーニング用データ前処理クラス"""
    
    def __init__(self):
        self.scaler = StandardScaler()
        self.label_encoders = {}
        self.num_wards = 0
        self.num_structures = 0
        self.num_types = 0
        self.ward_avg_price_dict = {}  # ✅ 区別平均価格を保存
        
    def fit_transform(self, df):
        """データ前処理と変換（学習時）"""
        df_processed = df.copy()
        
        # 1. カテゴリ変数エンコーディング
        self.label_encoders['区'] = LabelEncoder()
        df_processed['区_encoded'] = self.label_encoders['区'].fit_transform(df['区'])
        self.num_wards = len(df['区'].unique())
        
        for col in ['建物構造', '建物タイプ']:
            self.label_encoders[col] = LabelEncoder()
            df_processed[f'{col}_encoded'] = self.label_encoders[col].fit_transform(df[col])
        
        self.num_structures = len(df['建物構造'].unique())
        self.num_types = len(df['建物タイプ'].unique())
        
        # 2. 数値変数の正規化
        numeric_cols = ['部屋サイズ_m2', '駅距離_分', '築年数_年']
        df_processed[numeric_cols] = self.scaler.fit_transform(df[numeric_cols])
        
        # 3. 区別平均価格（補助特徴量）
        ward_avg_price = df.groupby('区')['家賃_円'].mean()
        self.ward_avg_price_dict = ward_avg_price.to_dict()  # ✅ 保存
        
        df_processed['区_avg_price'] = df['区'].map(ward_avg_price)
        
        # 正規化パラメータを保存
        self.ward_avg_price_mean = df_processed['区_avg_price'].mean()
        self.ward_avg_price_std = df_processed['区_avg_price'].std()
        
        df_processed['区_avg_price'] = (
            (df_processed['区_avg_price'] - self.ward_avg_price_mean) / 
            self.ward_avg_price_std
        )
        
        return df_processed
    
    def transform(self, df):
        """学習済み前処理器で変換（予測時）"""
        df_processed = df.copy()
        
        # カテゴリ変数エンコーディング
        df_processed['区_encoded'] = self.label_encoders['区'].transform(df['区'])
        
        for col in ['建物構造', '建物タイプ']:
            df_processed[f'{col}_encoded'] = self.label_encoders[col].transform(df[col])
        
        # 数値変数の正規化
        numeric_cols = ['部屋サイズ_m2', '駅距離_分', '築年数_年']
        df_processed[numeric_cols] = self.scaler.transform(df[numeric_cols])
        
        # ✅ 区別平均価格を追加（学習時と同じ正規化）
        df_processed['区_avg_price'] = df['区'].map(self.ward_avg_price_dict)
        df_processed['区_avg_price'] = (
            (df_processed['区_avg_price'] - self.ward_avg_price_mean) / 
            self.ward_avg_price_std
        )
        
        return df_processed


class TokyoRentDataset(Dataset):
    """PyTorch Dataset for Tokyo Rent Data"""
    
    def __init__(self, features, targets):
        self.features = torch.FloatTensor(features)
        self.targets = torch.FloatTensor(targets)
    
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        return self.features[idx], self.targets[idx]

## 3. ディープラーニングモデル定義

In [None]:
class RentPredictionNetWithAttention(nn.Module):
    """Attentionメカニズムを含むディープラーニング家賃予測モデル"""
    
    def __init__(self, num_wards, num_structures, num_types, 
                 embedding_dim=32, hidden_dims=[512, 256, 128]):
        super(RentPredictionNetWithAttention, self).__init__()
        
        # 埋め込み層
        self.ward_embedding = nn.Embedding(num_wards, embedding_dim)
        self.structure_embedding = nn.Embedding(num_structures, embedding_dim // 2)
        self.type_embedding = nn.Embedding(num_types, embedding_dim // 2)
        
        # Attentionメカニズム
        self.attention = nn.Sequential(
            nn.Linear(embedding_dim, embedding_dim // 2),
            nn.Tanh(),
            nn.Linear(embedding_dim // 2, 1),
            nn.Softmax(dim=1)
        )
        
        # 交互作用層
        self.ward_room_interaction = nn.Linear(embedding_dim + 1, embedding_dim)
        self.ward_station_interaction = nn.Linear(embedding_dim + 1, embedding_dim)
        
        # 入力次元計算
        input_dim = 3 + embedding_dim * 3 + (embedding_dim // 2) * 2 + 1
        
        # メインネットワーク
        layers = []
        prev_dim = input_dim
        
        for hidden_dim in hidden_dims:
            layers.extend([
                nn.Linear(prev_dim, hidden_dim),
                nn.BatchNorm1d(hidden_dim),
                nn.LeakyReLU(0.1),
                nn.Dropout(0.25)
            ])
            prev_dim = hidden_dim
        
        # Residual接続
        self.skip_connection = nn.Linear(input_dim, hidden_dims[-1])
        
        # 出力層
        self.output_layer = nn.Sequential(
            nn.Linear(hidden_dims[-1], hidden_dims[-1] // 2),
            nn.ReLU(),
            nn.Linear(hidden_dims[-1] // 2, 1)
        )
        
        self.network = nn.Sequential(*layers)
        self._initialize_weights()
    
    def _initialize_weights(self):
        """重み初期化"""
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
    
    def forward(self, ward_idx, structure_idx, type_idx, numeric_features, ward_avg_price):
        """順伝播"""
        # 埋め込み
        ward_emb = self.ward_embedding(ward_idx)
        structure_emb = self.structure_embedding(structure_idx)
        type_emb = self.type_embedding(type_idx)
        
        # Attention適用
        attention_weights = self.attention(ward_emb)
        ward_emb_attended = ward_emb * attention_weights
        
        # 交互作用特徴量生成
        room_size = numeric_features[:, 0:1]
        station_dist = numeric_features[:, 1:2]
        
        ward_room_feat = self.ward_room_interaction(torch.cat([ward_emb, room_size], dim=1))
        ward_station_feat = self.ward_station_interaction(torch.cat([ward_emb, station_dist], dim=1))
        
        # 全特徴量結合
        features = torch.cat([
            numeric_features,
            ward_emb_attended,
            ward_room_feat,
            ward_station_feat,
            structure_emb,
            type_emb,
            ward_avg_price.unsqueeze(1)
        ], dim=1)
        
        # ネットワーク + Residual
        main_output = self.network(features)
        skip_output = self.skip_connection(features)
        combined = main_output + skip_output * 0.1
        
        output = self.output_layer(combined)
        
        return output

## 4. 学習・評価関数

In [None]:
def train_model(model, train_loader, val_loader, num_epochs=100, learning_rate=0.001):
    """モデル学習"""
    criterion = nn.MSELoss()
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=0.01)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=10, factor=0.5)
    
    train_losses = []
    val_losses = []
    best_val_loss = float('inf')
    
    for epoch in range(num_epochs):
        # Training
        model.train()
        train_loss = 0
        for batch_features, batch_targets in train_loader:
            batch_features = batch_features.to(device)
            batch_targets = batch_targets.to(device)
            
            # 特徴量分離
            ward_idx = batch_features[:, 0].long()
            structure_idx = batch_features[:, 1].long()
            type_idx = batch_features[:, 2].long()
            numeric_features = batch_features[:, 3:6]
            ward_avg_price = batch_features[:, 6]
            
            optimizer.zero_grad()
            outputs = model(ward_idx, structure_idx, type_idx, numeric_features, ward_avg_price)
            loss = criterion(outputs.squeeze(), batch_targets)
            loss.backward()
            
            # 勾配クリッピング
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            
            optimizer.step()
            train_loss += loss.item()
        
        # Validation
        model.eval()
        val_loss = 0
        with torch.no_grad():
            for batch_features, batch_targets in val_loader:
                batch_features = batch_features.to(device)
                batch_targets = batch_targets.to(device)
                
                ward_idx = batch_features[:, 0].long()
                structure_idx = batch_features[:, 1].long()
                type_idx = batch_features[:, 2].long()
                numeric_features = batch_features[:, 3:6]
                ward_avg_price = batch_features[:, 6]
                
                outputs = model(ward_idx, structure_idx, type_idx, numeric_features, ward_avg_price)
                loss = criterion(outputs.squeeze(), batch_targets)
                val_loss += loss.item()
        
        avg_train_loss = train_loss / len(train_loader)
        avg_val_loss = val_loss / len(val_loader)
        train_losses.append(avg_train_loss)
        val_losses.append(avg_val_loss)
        
        scheduler.step(avg_val_loss)
        
        # 最良モデル保存
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            torch.save(model.state_dict(), 'best_rent_dl_model.pth')
        
        if (epoch + 1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], '
                  f'Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}')
    
    return train_losses, val_losses


def evaluate_model(model, test_loader):
    """モデル評価"""
    model.eval()
    predictions = []
    actuals = []
    
    with torch.no_grad():
        for batch_features, batch_targets in test_loader:
            batch_features = batch_features.to(device)
            
            ward_idx = batch_features[:, 0].long()
            structure_idx = batch_features[:, 1].long()
            type_idx = batch_features[:, 2].long()
            numeric_features = batch_features[:, 3:6]
            ward_avg_price = batch_features[:, 6]
            
            outputs = model(ward_idx, structure_idx, type_idx, numeric_features, ward_avg_price)
            predictions.extend(outputs.cpu().numpy().flatten() * 10000)
            actuals.extend(batch_targets.numpy() * 10000)
    
    predictions = np.array(predictions)
    actuals = np.array(actuals)
    
    # 性能指標
    mae = np.mean(np.abs(predictions - actuals))
    rmse = np.sqrt(np.mean((predictions - actuals) ** 2))
    r2 = 1 - (np.sum((actuals - predictions) ** 2) / np.sum((actuals - np.mean(actuals)) ** 2))
    
    return predictions, actuals, {'mae': mae, 'rmse': rmse, 'r2': r2}

## 5. メイン実行：データロード・学習・評価

In [None]:
print("=" * 80)
print("🏠 東京家賃予測システム - ディープラーニング版")
print("=" * 80)

# 1. データロード
print("\n1. データロード")
df = pd.read_csv('tokyo_rent_data_v2.csv')
print(f"  データ数: {len(df)}")

# 2. 前処理
print("\n2. データ前処理")
preprocessor = RentDataPreprocessor()
df_processed = preprocessor.fit_transform(df)

# 3. 特徴量・ターゲット分離
feature_cols = ['区_encoded', '建物構造_encoded', '建物タイプ_encoded',
                '部屋サイズ_m2', '駅距離_分', '築年数_年', '区_avg_price']

X = df_processed[feature_cols].values
y = df['家賃_円'].values / 10000  # 万単位でスケーリング

# 4. データ分割
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

print(f"  訓練データ: {len(X_train)}")
print(f"  検証データ: {len(X_val)}")
print(f"  テストデータ: {len(X_test)}")

# 5. DataLoader生成
train_dataset = TokyoRentDataset(X_train, y_train)
val_dataset = TokyoRentDataset(X_val, y_val)
test_dataset = TokyoRentDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# 6. モデル構築
print("\n3. モデル構築")
model = RentPredictionNetWithAttention(
    preprocessor.num_wards, 
    preprocessor.num_structures, 
    preprocessor.num_types
).to(device)

print(f"  パラメータ数: {sum(p.numel() for p in model.parameters()):,}")

# 7. 学習
print("\n4. モデル学習開始")
train_losses, val_losses = train_model(model, train_loader, val_loader, num_epochs=50)

# 8. 評価
print("\n5. モデル評価")
model.load_state_dict(torch.load('best_rent_dl_model.pth', map_location=device, weights_only=True))
predictions, actuals, metrics = evaluate_model(model, test_loader)

print(f"  MAE: ¥{metrics['mae']:,.0f}")
print(f"  RMSE: ¥{metrics['rmse']:,.0f}")
print(f"  R² Score: {metrics['r2']:.4f}")

# 9. モデル保存
torch.save({
    'model_state_dict': model.state_dict(),
    'preprocessor': preprocessor,
    'model_config': {
        'num_wards': preprocessor.num_wards,
        'num_structures': preprocessor.num_structures,
        'num_types': preprocessor.num_types
    },
    'metrics': metrics
}, 'rent_dl_model_complete.pth')

print("\n✅ モデル保存完了: rent_dl_model_complete.pth")

## 6. 結果可視化

In [None]:
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# 1. 学習曲線
axes[0, 0].plot(train_losses, label='Train Loss', alpha=0.7)
axes[0, 0].plot(val_losses, label='Validation Loss', alpha=0.7)
axes[0, 0].set_xlabel('Epoch')
axes[0, 0].set_ylabel('Loss')
axes[0, 0].set_title('学習履歴')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# 2. 予測 vs 実測値
axes[0, 1].scatter(actuals, predictions, alpha=0.5)
axes[0, 1].plot([actuals.min(), actuals.max()], [actuals.min(), actuals.max()], 'r--', lw=2)
axes[0, 1].set_xlabel('実測家賃 (¥)')
axes[0, 1].set_ylabel('予測家賃 (¥)')
axes[0, 1].set_title(f'予測 vs 実測 (R² = {metrics["r2"]:.4f})')
axes[0, 1].grid(True, alpha=0.3)

# 3. 残差プロット
residuals = actuals - predictions
axes[1, 0].scatter(predictions, residuals, alpha=0.5)
axes[1, 0].axhline(y=0, color='r', linestyle='--', lw=2)
axes[1, 0].set_xlabel('予測家賃 (¥)')
axes[1, 0].set_ylabel('残差 (¥)')
axes[1, 0].set_title('残差プロット')
axes[1, 0].grid(True, alpha=0.3)

# 4. 区埋め込み可視化（PCA）
ward_embeddings = model.ward_embedding.weight.detach().cpu().numpy()
ward_names = preprocessor.label_encoders['区'].classes_

pca = PCA(n_components=2)
ward_embeddings_2d = pca.fit_transform(ward_embeddings)

ward_avg_prices = df.groupby('区')['家賃_円'].mean()
colors = [ward_avg_prices[ward] for ward in ward_names]

scatter = axes[1, 1].scatter(ward_embeddings_2d[:, 0], ward_embeddings_2d[:, 1], 
                             c=colors, cmap='RdYlBu_r', s=100, alpha=0.7)

for i, ward in enumerate(ward_names):
    if ward in ['港区', '千代田区', '渋谷区', '足立区', '葛飾区']:
        axes[1, 1].annotate(ward, (ward_embeddings_2d[i, 0], ward_embeddings_2d[i, 1]),
                          fontsize=9, ha='center')

axes[1, 1].set_xlabel('埋め込み次元 1')
axes[1, 1].set_ylabel('埋め込み次元 2')
axes[1, 1].set_title('区埋め込みの可視化')
plt.colorbar(scatter, ax=axes[1, 1], label='平均家賃')

plt.tight_layout()
plt.savefig('deep_learning_results.png', dpi=300, bbox_inches='tight')
plt.show()

print("✅ 可視化完了: deep_learning_results.png")

## 7. インタラクティブ予測インターフェース

In [None]:
# モデルロード
checkpoint = torch.load('rent_dl_model_complete.pth', map_location=device, weights_only=False)
model_loaded = RentPredictionNetWithAttention(
    checkpoint['model_config']['num_wards'],
    checkpoint['model_config']['num_structures'],
    checkpoint['model_config']['num_types']
).to(device)
model_loaded.load_state_dict(checkpoint['model_state_dict'])
model_loaded.eval()

preprocessor_loaded = checkpoint['preprocessor']

# UI作成
ward_list = sorted(preprocessor_loaded.label_encoders['区'].classes_)
structure_list = sorted(preprocessor_loaded.label_encoders['建物構造'].classes_)
type_list = sorted(preprocessor_loaded.label_encoders['建物タイプ'].classes_)

ward_dropdown = widgets.Dropdown(options=ward_list, value='新宿区', description='区:')
room_slider = widgets.IntSlider(value=30, min=15, max=100, step=5, description='部屋サイズ(m²):')
station_slider = widgets.IntSlider(value=5, min=1, max=20, step=1, description='駅距離(分):')
age_slider = widgets.IntSlider(value=10, min=0, max=50, step=1, description='築年数(年):')
structure_dropdown = widgets.Dropdown(options=structure_list, value='RC造', description='建物構造:')
type_dropdown = widgets.Dropdown(options=type_list, value='マンション', description='建物タイプ:')
predict_button = widgets.Button(description='🔮 家賃予測', button_style='success')
output_area = widgets.Output()

def predict_rent(b):
    """予測実行"""
    with output_area:
        output_area.clear_output()
        
        # 入力データ作成
        test_df = pd.DataFrame([{
            '区': ward_dropdown.value,
            '建物構造': structure_dropdown.value,
            '建物タイプ': type_dropdown.value,
            '部屋サイズ_m2': room_slider.value,
            '駅距離_分': station_slider.value,
            '築年数_年': age_slider.value
        }])
        
        # 前処理
        test_processed = preprocessor_loaded.transform(test_df)
        
        # 特徴量抽出
        features = torch.FloatTensor(test_processed[feature_cols].values).to(device)
        
        # 予測
        with torch.no_grad():
            ward_idx = features[:, 0].long()
            structure_idx = features[:, 1].long()
            type_idx = features[:, 2].long()
            numeric_features = features[:, 3:6]
            ward_avg_price = features[:, 6]
            
            prediction = model_loaded(ward_idx, structure_idx, type_idx, 
                                    numeric_features, ward_avg_price)
            predicted_rent = prediction.item() * 10000
        
        # 結果表示
        html_output = f"""
        <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); 
                    border-radius: 15px; padding: 25px; color: white;">
            <h2 style="margin: 0 0 20px 0;">🏠 予測結果</h2>
            <div style="background: rgba(255,255,255,0.1); padding: 15px; border-radius: 10px;">
                <p><strong>区:</strong> {ward_dropdown.value}</p>
                <p><strong>部屋サイズ:</strong> {room_slider.value} m²</p>
                <p><strong>駅距離:</strong> 徒歩{station_slider.value}分</p>
                <p><strong>築年数:</strong> {age_slider.value}年</p>
                <p><strong>建物構造:</strong> {structure_dropdown.value}</p>
                <p><strong>建物タイプ:</strong> {type_dropdown.value}</p>
            </div>
            <div style="background: white; color: #667eea; padding: 20px; 
                        border-radius: 10px; margin-top: 20px; text-align: center;">
                <div style="font-size: 14px;">予測月額家賃</div>
                <div style="font-size: 48px; font-weight: bold; margin: 10px 0;">
                    ¥{predicted_rent:,.0f}
                </div>
                <div style="font-size: 12px; color: #999;">ディープラーニングモデル使用</div>
            </div>
        </div>
        """
        display(HTML(html_output))

predict_button.on_click(predict_rent)

# UIレイアウト
ui = widgets.VBox([
    widgets.HTML("<h2 style='color: #667eea;'>🏠 東京家賃予測システム（ディープラーニング）</h2>"),
    ward_dropdown,
    room_slider,
    station_slider,
    age_slider,
    structure_dropdown,
    type_dropdown,
    predict_button,
    output_area
])

display(ui)

print("\n🚀 インターフェース準備完了！")