# exp008 - Advanced Feature Engineering (Kaggleチュートリアル参考)

## 🎯 目標
- 高度な特徴量エンジニアリング手法の実装
- Title、Cabin、Ticket、Familyの詳細分析
- exp004（0.77990）を超える性能を目指す

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import japanize_matplotlib
import warnings
warnings.filterwarnings('ignore')

import lightgbm as lgb
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
import re

plt.rcParams['font.family'] = 'IPAexGothic'

print("🚀 exp008 - Advanced Feature Engineering")
print("Kaggleチュートリアルの手法を参考に実装")

# データ読み込み
train_df = pd.read_csv('/Users/koki.ogai/Documents/ghq/github.com/oddgai/kaggle-projects/titanic/data/train.csv')
test_df = pd.read_csv('/Users/koki.ogai/Documents/ghq/github.com/oddgai/kaggle-projects/titanic/data/test.csv')

# データ結合（前処理の一貫性のため）
df_all = pd.concat([train_df, test_df], sort=False).reset_index(drop=True)
df_all['is_train'] = df_all['Survived'].notna()

print(f"\n全データ shape: {df_all.shape}")
print(f"訓練データ: {df_all['is_train'].sum()}件")
print(f"テストデータ: {(~df_all['is_train']).sum()}件")

## 🔧 Phase 1: Title（称号）の高度な処理

In [None]:
# Titleの抽出と詳細分析
df_all['Title'] = df_all['Name'].str.extract(r' ([A-Za-z]+)\.', expand=False)

print("=== Title分布 ===")
print(df_all['Title'].value_counts())

# Title別の生存率（訓練データのみ）
title_survival = df_all[df_all['is_train']].groupby('Title')['Survived'].agg(['mean', 'count']).round(3)
print("\n=== Title別生存率 ===")
print(title_survival.sort_values('mean', ascending=False))

# Titleのマッピング（より詳細な分類）
title_mapping = {
    # 一般的な称号
    'Mr': 'Mr',
    'Miss': 'Miss',
    'Mrs': 'Mrs',
    'Master': 'Master',
    
    # フランス語の称号
    'Mlle': 'Miss',  # Mademoiselle
    'Mme': 'Mrs',    # Madame
    'Ms': 'Miss',
    
    # 軍人
    'Col': 'Officer',
    'Major': 'Officer',
    'Capt': 'Officer',
    
    # 貴族
    'Lady': 'Royalty',
    'Sir': 'Royalty',
    'Countess': 'Royalty',
    'Don': 'Royalty',
    'Dona': 'Royalty',
    'Jonkheer': 'Royalty',
    
    # 聖職者・専門職
    'Dr': 'Dr',
    'Rev': 'Rev'
}

df_all['Title_Grouped'] = df_all['Title'].map(title_mapping)

# 年齢とTitleの組み合わせで詳細化
df_all['Is_Child'] = ((df_all['Title'] == 'Master') | 
                      ((df_all['Title'] == 'Miss') & (df_all['Age'] < 18))).astype(int)

df_all['Is_Young_Miss'] = ((df_all['Title'] == 'Miss') & 
                           (df_all['Age'] >= 18) & (df_all['Age'] < 30)).astype(int)

df_all['Is_Mrs'] = (df_all['Title'] == 'Mrs').astype(int)

print("\n=== Title特徴量作成完了 ===")
print(f"Title_Grouped分布:")
print(df_all['Title_Grouped'].value_counts())

## 🏠 Phase 2: Cabin（客室）の詳細処理

In [None]:
# Cabin情報の詳細抽出
df_all['Has_Cabin'] = df_all['Cabin'].notna().astype(int)

# デッキの抽出
df_all['Deck'] = df_all['Cabin'].str[0]

# 複数客室を持つ乗客の処理
df_all['Cabin_Count'] = df_all['Cabin'].str.split().str.len()
df_all['Cabin_Count'] = df_all['Cabin_Count'].fillna(0)

# デッキによるスコアリング（救命ボートへの近さ）
deck_mapping = {
    'A': 7, 'B': 6, 'C': 5, 'D': 4,
    'E': 3, 'F': 2, 'G': 1, 'T': 0
}
df_all['Deck_Num'] = df_all['Deck'].map(deck_mapping)

# Pclassとデッキの相関を利用した欠損値補完
for pclass in [1, 2, 3]:
    deck_mode = df_all[(df_all['Pclass'] == pclass) & df_all['Deck'].notna()]['Deck'].mode()
    if len(deck_mode) > 0:
        df_all.loc[(df_all['Pclass'] == pclass) & df_all['Deck'].isna(), 'Deck'] = deck_mode[0]

# デッキが不明な場合はPclassベースで推定
df_all['Deck'] = df_all['Deck'].fillna('U')  # Unknown
df_all['Deck_Num'] = df_all['Deck_Num'].fillna(-1)

# 客室番号から位置を推定（数値部分）
def extract_cabin_number(cabin):
    if pd.isna(cabin):
        return -1
    numbers = re.findall(r'\d+', str(cabin))
    if numbers:
        return int(numbers[0])
    return -1

df_all['Cabin_Number'] = df_all['Cabin'].apply(extract_cabin_number)

# 客室位置スコア（前方/中央/後方）
df_all['Cabin_Position'] = pd.cut(df_all['Cabin_Number'], 
                                   bins=[-2, 0, 50, 100, 200],
                                   labels=['Unknown', 'Front', 'Middle', 'Back'])

print("=== Cabin特徴量作成完了 ===")
print(f"\nデッキ分布:")
print(df_all['Deck'].value_counts())
print(f"\n複数客室保有者: {(df_all['Cabin_Count'] > 1).sum()}人")

## 🎫 Phase 3: Ticket（チケット）の詳細分析

In [None]:
# チケット番号の前処理
df_all['Ticket_Cleaned'] = df_all['Ticket'].str.replace('[^A-Za-z0-9]', '', regex=True)

# チケットプレフィックスの抽出
df_all['Ticket_Prefix'] = df_all['Ticket'].str.extract(r'([A-Za-z\.]+)', expand=False)
df_all['Ticket_Prefix'] = df_all['Ticket_Prefix'].str.replace('[^A-Za-z]', '', regex=True)
df_all['Ticket_Prefix'] = df_all['Ticket_Prefix'].fillna('NONE')

# チケット番号（数値部分）の抽出
df_all['Ticket_Number'] = df_all['Ticket'].str.extract(r'(\d+)', expand=False)
df_all['Ticket_Number'] = pd.to_numeric(df_all['Ticket_Number'], errors='coerce')

# チケットグループサイズ（同じチケット番号を持つ人数）
ticket_counts = df_all['Ticket'].value_counts()
df_all['Ticket_Group_Size'] = df_all['Ticket'].map(ticket_counts)

# 連番チケットの識別（家族や団体の可能性）
df_all['Ticket_Number_Sorted'] = df_all.groupby('Ticket_Prefix')['Ticket_Number'].rank(method='dense')

# チケット価格の正規化（グループサイズで割る）
df_all['Fare_Per_Person'] = df_all['Fare'] / df_all['Ticket_Group_Size']

# チケットプレフィックス別の統計
prefix_stats = df_all[df_all['is_train']].groupby('Ticket_Prefix')['Survived'].agg(['mean', 'count'])
prefix_stats = prefix_stats[prefix_stats['count'] >= 5]  # 5件以上のプレフィックスのみ

print("=== 主要チケットプレフィックス別生存率 ===")
print(prefix_stats.sort_values('mean', ascending=False).head(10))

# 希少プレフィックスの統合
frequent_prefixes = prefix_stats[prefix_stats['count'] >= 10].index.tolist()
df_all['Ticket_Prefix_Grouped'] = df_all['Ticket_Prefix'].apply(
    lambda x: x if x in frequent_prefixes else 'OTHER'
)

print(f"\n=== Ticket特徴量作成完了 ===")
print(f"グループチケット: {(df_all['Ticket_Group_Size'] > 1).sum()}件")
print(f"一人あたり運賃中央値: ${df_all['Fare_Per_Person'].median():.2f}")

## 👨‍👩‍👧‍👦 Phase 4: Family（家族）の複雑な関係

In [None]:
# 基本的な家族サイズ
df_all['FamilySize'] = df_all['SibSp'] + df_all['Parch'] + 1
df_all['IsAlone'] = (df_all['FamilySize'] == 1).astype(int)

# 家族サイズのカテゴリ化
df_all['FamilySize_Cat'] = pd.cut(df_all['FamilySize'], 
                                   bins=[0, 1, 3, 5, 20],
                                   labels=['Alone', 'Small', 'Medium', 'Large'])

# 苗字の抽出
df_all['Surname'] = df_all['Name'].str.split(',').str[0]

# 同一苗字グループのサイズ
surname_counts = df_all['Surname'].value_counts()
df_all['Surname_Count'] = df_all['Surname'].map(surname_counts)

# 女性と子供を持つ男性の識別（家族の保護者）
df_all['Is_Mother'] = ((df_all['Sex'] == 'female') & 
                       (df_all['Parch'] > 0) & 
                       (df_all['Age'] > 18)).astype(int)

df_all['Is_Father'] = ((df_all['Sex'] == 'male') & 
                       (df_all['Parch'] > 0) & 
                       (df_all['Age'] > 18)).astype(int)

# 家族タイプの詳細分類
def classify_family_type(row):
    if row['IsAlone']:
        return 'Alone'
    elif row['Is_Mother']:
        return 'Mother'
    elif row['Is_Father']:
        return 'Father'
    elif row['Is_Child']:
        return 'Child'
    elif row['SibSp'] > 0 and row['Parch'] == 0:
        return 'Sibling'
    elif row['SibSp'] == 0 and row['Parch'] > 0:
        return 'Parent_Child'
    else:
        return 'Extended'

df_all['Family_Type'] = df_all.apply(classify_family_type, axis=1)

# 同一苗字・チケットグループの生存率（訓練データから計算）
train_data = df_all[df_all['is_train']].copy()

# 苗字別生存率
surname_survival = train_data.groupby('Surname')['Survived'].agg(['mean', 'count'])
surname_survival.columns = ['Surname_Survival_Rate', 'Surname_Group_Count']
df_all = df_all.merge(surname_survival, on='Surname', how='left')

# 少人数苗字の生存率は全体平均で補完
overall_survival_rate = train_data['Survived'].mean()
df_all.loc[df_all['Surname_Group_Count'] < 3, 'Surname_Survival_Rate'] = overall_survival_rate

print("=== Family Type分布 ===")
print(df_all['Family_Type'].value_counts())

# Family Type別生存率（訓練データ）
family_survival = train_data.groupby('Family_Type')['Survived'].mean().round(3)
print("\n=== Family Type別生存率 ===")
print(family_survival.sort_values(ascending=False))

## 🔄 Phase 5: 欠損値の高度な補完

In [None]:
# Age（年齢）の欠損値補完
# Title、Pclass、Sex、Fareを使った予測
from sklearn.ensemble import RandomForestRegressor

# 年齢予測用の特徴量
age_features = ['Pclass', 'SibSp', 'Parch', 'Fare_Per_Person']

# カテゴリカル変数のエンコード（年齢予測用）
le_sex = LabelEncoder()
df_all['Sex_Encoded'] = le_sex.fit_transform(df_all['Sex'])
age_features.append('Sex_Encoded')

le_title = LabelEncoder()
df_all['Title_Encoded'] = le_title.fit_transform(df_all['Title_Grouped'].fillna('Unknown'))
age_features.append('Title_Encoded')

# 年齢予測モデル
age_train = df_all[df_all['Age'].notna()][age_features + ['Age']].copy()
age_test = df_all[df_all['Age'].isna()][age_features].copy()

# 欠損値処理
age_train = age_train.fillna(age_train.median())
age_test = age_test.fillna(age_train.median())

if len(age_test) > 0:
    rf_age = RandomForestRegressor(n_estimators=100, random_state=42)
    rf_age.fit(age_train[age_features], age_train['Age'])
    predicted_ages = rf_age.predict(age_test[age_features])
    df_all.loc[df_all['Age'].isna(), 'Age'] = predicted_ages
    print(f"年齢を{len(age_test)}件予測補完")

# Fare（運賃）の欠損値補完
# Pclass、Embarked、Ticket_Prefixベースで中央値補完
df_all['Fare'] = df_all.groupby(['Pclass', 'Embarked'])['Fare'].transform(
    lambda x: x.fillna(x.median())
)
df_all['Fare'] = df_all['Fare'].fillna(df_all['Fare'].median())
df_all['Fare_Per_Person'] = df_all['Fare_Per_Person'].fillna(df_all['Fare_Per_Person'].median())

# Embarked（乗船港）の欠損値補完
# 最頻値で補完
df_all['Embarked'] = df_all['Embarked'].fillna(df_all['Embarked'].mode()[0])

print("\n=== 欠損値補完完了 ===")
print(df_all.isnull().sum()[df_all.isnull().sum() > 0])

## ⚡ Phase 6: 追加の特徴量エンジニアリング

In [None]:
# 年齢グループ
df_all['Age_Cat'] = pd.cut(df_all['Age'], 
                           bins=[0, 12, 18, 35, 60, 100],
                           labels=['Child', 'Teen', 'Adult', 'MiddleAge', 'Senior'])

# 運賃グループ
df_all['Fare_Cat'] = pd.qcut(df_all['Fare'], q=5, labels=['VeryLow', 'Low', 'Medium', 'High', 'VeryHigh'])

# Sex-Pclass交互作用
df_all['Sex_Pclass'] = df_all['Sex'] + '_' + df_all['Pclass'].astype(str)

# Age-Sex交互作用
df_all['Age_Sex'] = df_all['Age_Cat'].astype(str) + '_' + df_all['Sex']

# Title-Pclass交互作用
df_all['Title_Pclass'] = df_all['Title_Grouped'].astype(str) + '_' + df_all['Pclass'].astype(str)

# 生存優先度スコア（ドメイン知識）
df_all['Priority_Score'] = 0
df_all.loc[df_all['Sex'] == 'female', 'Priority_Score'] += 100
df_all.loc[df_all['Is_Child'] == 1, 'Priority_Score'] += 80
df_all.loc[df_all['Pclass'] == 1, 'Priority_Score'] += 30
df_all.loc[df_all['Pclass'] == 2, 'Priority_Score'] += 15
df_all.loc[df_all['Is_Mother'] == 1, 'Priority_Score'] += 20

# 社会経済的地位スコア
df_all['SES_Score'] = (
    (4 - df_all['Pclass']) * 30 +  # クラスの逆数
    df_all['Fare_Per_Person'].rank(pct=True) * 100 +  # 運賃ランク
    df_all['Has_Cabin'] * 50  # 客室保有
)

# 名前の長さ（社会的地位の代理指標）
df_all['Name_Length'] = df_all['Name'].str.len()

# チケット文字列の長さ
df_all['Ticket_Length'] = df_all['Ticket'].str.len()

print("=== 追加特徴量作成完了 ===")
print(f"\n作成された特徴量数: {len(df_all.columns)}")

## 🔢 Phase 7: カテゴリカル変数のエンコーディング

In [None]:
# カテゴリカル変数のリスト
categorical_cols = [
    'Sex', 'Embarked', 'Title_Grouped', 'Deck', 'Cabin_Position',
    'Ticket_Prefix_Grouped', 'FamilySize_Cat', 'Family_Type',
    'Age_Cat', 'Fare_Cat', 'Sex_Pclass', 'Age_Sex', 'Title_Pclass'
]

# Label Encoding
label_encoders = {}
for col in categorical_cols:
    if col in df_all.columns:
        le = LabelEncoder()
        df_all[col + '_Encoded'] = le.fit_transform(df_all[col].astype(str))
        label_encoders[col] = le

print("カテゴリカルエンコーディング完了")
print(f"エンコードされた変数: {len(categorical_cols)}個")

## 🎯 Phase 8: モデル構築

In [None]:
# 訓練データとテストデータの分割
train_processed = df_all[df_all['is_train']].copy()
test_processed = df_all[~df_all['is_train']].copy()

# 使用する特徴量の選択
exclude_cols = [
    'PassengerId', 'Name', 'Ticket', 'Cabin', 'Survived', 'is_train',
    'Surname', 'Title', 'Ticket_Cleaned', 'Ticket_Number',
    # カテゴリカル変数（エンコード済みを使用）
    'Sex', 'Embarked', 'Title_Grouped', 'Deck', 'Cabin_Position',
    'Ticket_Prefix', 'Ticket_Prefix_Grouped', 'FamilySize_Cat', 'Family_Type',
    'Age_Cat', 'Fare_Cat', 'Sex_Pclass', 'Age_Sex', 'Title_Pclass'
]

feature_cols = [col for col in df_all.columns 
                if col not in exclude_cols and 
                df_all[col].dtype in ['int64', 'float64', 'int32', 'float32', 'int8', 'int16']]

# NaN処理
for col in feature_cols:
    if train_processed[col].isnull().any():
        train_processed[col] = train_processed[col].fillna(train_processed[col].median())
        test_processed[col] = test_processed[col].fillna(train_processed[col].median())

X_train = train_processed[feature_cols]
y_train = train_processed['Survived']
X_test = test_processed[feature_cols]

print(f"使用する特徴量数: {len(feature_cols)}")
print(f"\nTop 20特徴量:")
for i, col in enumerate(feature_cols[:20], 1):
    print(f"{i:2d}. {col}")

In [None]:
# LightGBMモデル（exp004の設定ベース + 改良）
params = {
    'objective': 'regression',
    'metric': 'rmse',
    'num_leaves': 31,
    'learning_rate': 0.03,
    'feature_fraction': 0.8,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'reg_alpha': 0.1,
    'reg_lambda': 0.1,
    'min_child_samples': 20,
    'min_split_gain': 0.01,
    'min_child_weight': 0.001,
    'random_state': 42,
    'n_estimators': 1000,
    'verbose': -1
}

# 5-fold Cross Validation
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = []
oof_predictions = np.zeros(len(X_train))
test_predictions = np.zeros(len(X_test))
feature_importance = np.zeros(len(feature_cols))

print("\n=== 5-Fold Cross Validation ===")
for fold, (train_idx, val_idx) in enumerate(kf.split(X_train, y_train), 1):
    X_tr, X_val = X_train.iloc[train_idx], X_train.iloc[val_idx]
    y_tr, y_val = y_train.iloc[train_idx], y_train.iloc[val_idx]
    
    # モデル訓練
    model = lgb.LGBMRegressor(**params)
    model.fit(
        X_tr, y_tr,
        eval_set=[(X_val, y_val)],
        callbacks=[lgb.early_stopping(100), lgb.log_evaluation(0)]
    )
    
    # 予測
    val_pred_proba = model.predict(X_val)
    val_pred = (val_pred_proba >= 0.5).astype(int)
    test_pred_proba = model.predict(X_test)
    
    # スコア計算
    fold_score = accuracy_score(y_val, val_pred)
    cv_scores.append(fold_score)
    
    # 予測値保存
    oof_predictions[val_idx] = val_pred_proba
    test_predictions += test_pred_proba / 5
    
    # 特徴量重要度
    feature_importance += model.feature_importances_ / 5
    
    print(f"Fold {fold}: {fold_score:.4f} (trees: {model.n_estimators_})")

# 結果サマリー
cv_mean = np.mean(cv_scores)
cv_std = np.std(cv_scores)
oof_score = accuracy_score(y_train, (oof_predictions >= 0.5).astype(int))

print(f"\n=== Cross Validation結果 ===")
print(f"CV Mean: {cv_mean:.4f} ± {cv_std:.4f}")
print(f"OOF Score: {oof_score:.4f}")

In [None]:
# 特徴量重要度分析
importance_df = pd.DataFrame({
    'feature': feature_cols,
    'importance': feature_importance
}).sort_values('importance', ascending=False)

print("=== Top 20 重要特徴量 ===")
for i, row in importance_df.head(20).iterrows():
    print(f"{importance_df.index.get_loc(i)+1:2d}. {row['feature']:30s}: {row['importance']:8.2f}")

# 新規特徴量の重要度確認
new_features = [
    'Priority_Score', 'SES_Score', 'Surname_Survival_Rate', 
    'Ticket_Group_Size', 'Fare_Per_Person', 'Deck_Num',
    'Is_Mother', 'Is_Father', 'Is_Child'
]

print("\n=== 新規特徴量の重要度 ===")
for feat in new_features:
    if feat in importance_df['feature'].values:
        rank = importance_df.index.get_loc(importance_df[importance_df['feature'] == feat].index[0]) + 1
        imp = importance_df[importance_df['feature'] == feat]['importance'].values[0]
        print(f"Rank {rank:3d}: {feat:25s}: {imp:8.2f}")

## 📊 Phase 9: 結果分析と提出

In [None]:
# 過去実験との比較
past_results = {
    'exp001': {'cv': 0.8496, 'kaggle': 0.77272, 'features': 16},
    'exp004': {'cv': 0.8462, 'kaggle': 0.77990, 'features': 23},
    'exp006': {'cv': 0.8440, 'kaggle': 0.77272, 'features': 15},
    'exp007': {'cv': None, 'kaggle': 0.77751, 'features': 25}
}

print("=== 過去実験との比較 ===")
for exp, results in past_results.items():
    print(f"{exp}: CV={results['cv']}, Kaggle={results['kaggle']:.5f}, Features={results['features']}")

print(f"\nexp008 (Advanced FE):")
print(f"  CV Score: {cv_mean:.4f} ± {cv_std:.4f}")
print(f"  Features: {len(feature_cols)}")

# exp004基準での期待値計算
exp004_ratio = past_results['exp004']['kaggle'] / past_results['exp004']['cv']
expected_kaggle = cv_mean * exp004_ratio
print(f"  期待Kaggle Score: {expected_kaggle:.5f}")

if cv_mean > past_results['exp004']['cv']:
    print(f"\n🎉 exp004のCVを上回った！ (+{cv_mean - past_results['exp004']['cv']:.4f})")
else:
    print(f"\n📊 exp004のCVには及ばず ({cv_mean - past_results['exp004']['cv']:.4f})")

In [None]:
# 提出ファイル作成
submission = pd.DataFrame({
    'PassengerId': test_df['PassengerId'],
    'Survived': (test_predictions >= 0.5).astype(int)
})

# 予測分布の確認
print("=== 予測分布 ===")
print(f"生存予測: {submission['Survived'].sum()} ({submission['Survived'].mean():.1%})")
print(f"死亡予測: {len(submission) - submission['Survived'].sum()} ({1 - submission['Survived'].mean():.1%})")
print(f"\n訓練データ生存率: {y_train.mean():.1%}")
print(f"テスト予測生存率: {submission['Survived'].mean():.1%}")

# ファイル保存
import os
os.makedirs('/Users/koki.ogai/Documents/ghq/github.com/oddgai/kaggle-projects/titanic/results/exp008', exist_ok=True)
submission.to_csv('/Users/koki.ogai/Documents/ghq/github.com/oddgai/kaggle-projects/titanic/results/exp008/result.csv', index=False)

print(f"\n✅ 提出ファイル保存完了")
print(f"Path: results/exp008/result.csv")

In [None]:
# 実験サマリー
print("\n" + "="*70)
print("           🚀 EXP008 ADVANCED FEATURE ENGINEERING")
print("="*70)

print(f"\n📊 最終結果:")
print(f"  CV Score: {cv_mean:.4f} ± {cv_std:.4f}")
print(f"  OOF Score: {oof_score:.4f}")
print(f"  特徴量数: {len(feature_cols)}")

print(f"\n🌟 主要な新特徴量:")
print(f"  • Title詳細分類（Officer, Royalty, Dr, Rev）")
print(f"  • Cabin詳細（デッキ、位置、複数客室）")
print(f"  • Ticket分析（プレフィックス、グループサイズ）")
print(f"  • Family詳細（Mother, Father, Family Type）")
print(f"  • 高度な欠損値補完（RandomForest予測）")

print(f"\n💡 技術的改善点:")
print(f"  • {len(feature_cols)}個の特徴量（過去最多）")
print(f"  • 多様な交互作用特徴量")
print(f"  • ドメイン知識と統計手法の融合")

print(f"\n🎯 期待Kaggleスコア: {expected_kaggle:.5f}")
if expected_kaggle > past_results['exp004']['kaggle']:
    improvement = expected_kaggle - past_results['exp004']['kaggle']
    print(f"  → exp004を {improvement:.5f} 上回る見込み！🎉")
    print(f"  → 0.78の壁突破の可能性！")

print("\n" + "="*70)
print("  Advanced Feature Engineering - The Devil is in the Details!")
print("  Kaggle提出をお待ちしています...")
print("="*70)