In [10]:
import pandas as pd

from IPython.display import HTML
import matplotlib.pyplot as plt
import seaborn as sns
import os
import numpy as np

plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
plt.rcParams['axes.unicode_minus'] = False

cols = ['label'] + [f'I{i}' for i in range(1, 14)] + [f'C{i}' for i in range(1, 27)]

num_features = [f'I{i}' for i in range(1, 14)]  # Numerical features
cat_features = [f'C{i}' for i in range(1, 27)]  # Categorical features

data = pd.read_csv('../../data/raw/train.txt', sep='\t', header=None, names=cols, nrows=10000)

os.makedirs('eda_report_dist', exist_ok=True)

# Get current date in YYYY-MM-DD format
current_date = pd.Timestamp.now().strftime("%Y-%m-%d")
# Define analyst name as variable
analyst_name = "Charle"


In [11]:
# ==================== 1. Label Distribution Analysis ====================
label_dist = data['label'].value_counts(normalize=True).sort_index()
label_counts = data['label'].value_counts().sort_index()

# Generate label distribution table
label_table = pd.DataFrame({
    'Class': ['Negative(0)', 'Positive(1)'],
    'Count': label_counts.values,
    'Percentage(%)': (label_dist.values * 100).round(2)
}).style \
    .format({'Percentage(%)': '{:.2f}%', 'Count': '{:,}'}) \
    .background_gradient(subset=['Percentage(%)'], cmap='Oranges') \
    .to_html()


In [12]:
# ==================== 2. Data Sample Preview ====================
feature_table = (data
                 .head(10)
                 .style
                 .background_gradient(subset=num_features, cmap='Blues')
                 .to_html())

In [13]:
# ========== 3. Missing Value Analysis (Distinguish between numerical and categorical) ====================
total_records = len(data)

# Numerical features missing value statistics
if len(num_features) > 0:
    num_missing_stats = data[num_features].isnull().sum().sort_values(ascending=False)
    num_missing_df = pd.DataFrame({
        'Missing Count': num_missing_stats,
        'Missing Ratio(%)': (num_missing_stats / total_records * 100).round(2)
    })
    num_missing_df = num_missing_df[num_missing_df['Missing Count'] > 0]
else:
    num_missing_df = pd.DataFrame(columns=['Missing Count', 'Missing Ratio(%)'])

# Categorical features missing value statistics
if len(cat_features) > 0:
    cat_missing_stats = data[cat_features].isnull().sum().sort_values(ascending=False)
    cat_missing_df = pd.DataFrame({
        'Missing Count': cat_missing_stats,
        'Missing Ratio(%)': (cat_missing_stats / total_records * 100).round(2)
    })
    cat_missing_df = cat_missing_df[cat_missing_df['Missing Count'] > 0]
else:
    cat_missing_df = pd.DataFrame(columns=['Missing Count', 'Missing Ratio(%)'])


# Generate styled missing value tables (by type)
def highlight_high_missing(val):
    color = 'red' if val > 20 else ''  # Highlight if missing >20% in red
    return f'background-color: {color}' if val > 0 else ''


# Numerical missing value table
num_missing_table = (
    num_missing_df.style
    .map(highlight_high_missing, subset=['Missing Ratio(%)'])
    .format({
        'Missing Count': '{:,}',  # Thousands separator
        'Missing Ratio(%)': '{:.2f}%'  # Percentage format
    })
    .set_table_styles([{
        'selector': 'caption',
        'props': 'font-size: 1.2em; color: #2c3e50; font-weight: bold;'
    }])
    .to_html()
) if not num_missing_df.empty else "<p>No missing values in numerical features</p>"

# Categorical missing value table
cat_missing_table = (
    cat_missing_df.style
    .map(highlight_high_missing, subset=['Missing Ratio(%)'])
    .format({
        'Missing Count': '{:,}',
        'Missing Ratio(%)': '{:.2f}%'
    })
    .set_table_styles([{
        'selector': 'caption',
        'props': 'font-size: 1.2em; color: #2c3e50; font-weight: bold;'
    }])
    .to_html()
) if not cat_missing_df.empty else "<p>No missing values in categorical features</p>"

# Generate visualization charts for missing values by type
num_missing_plot = ""
if not num_missing_df.empty:
    plt.figure(figsize=(12, 8))
    ax = plt.gca()

    # Bar chart (count)
    num_missing_df.head(20)['Missing Count'].plot.bar(
        ax=ax,
        color='#3498db',
        alpha=0.7,
        label='Missing Count'
    )
    ax.set_ylabel('Number of Missing Records', color='#3498db')
    ax.tick_params(axis='y', colors='#3498db')

    # Line chart (ratio)
    ax2 = ax.twinx()
    num_missing_df.head(20)['Missing Ratio(%)'].plot(
        ax=ax2,
        color='#e74c3c',
        marker='o',
        linewidth=2,
        label='Missing Ratio'
    )
    ax2.set_ylabel('Missing Ratio(%)', color='#e74c3c')
    ax2.tick_params(axis='y', colors='#e74c3c')
    ax2.set_ylim(0, 100)

    plt.title(f'Top 20 Missing Values in Numerical Features (Total Records: {total_records:,})', pad=20)
    ax.legend(loc='upper left')
    ax2.legend(loc='upper right')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plot_path = f'eda_report_dist/missing_numerical_plot.png'
    plt.savefig(plot_path)
    plt.close()
    num_missing_plot = '<div style="text-align: center;"><img src="./eda_report_dist/missing_numerical_plot.png" width="900"></div>'

cat_missing_plot = ""
if not cat_missing_df.empty:
    plt.figure(figsize=(12, 8))
    ax = plt.gca()

    # Bar chart (count)
    cat_missing_df.head(20)['Missing Count'].plot.bar(
        ax=ax,
        color='#9b59b6',
        alpha=0.7,
        label='Missing Count'
    )
    ax.set_ylabel('Number of Missing Records', color='#9b59b6')
    ax.tick_params(axis='y', colors='#9b59b6')

    # Line chart (ratio)
    ax2 = ax.twinx()
    cat_missing_df.head(20)['Missing Ratio(%)'].plot(
        ax=ax2,
        color='#e67e22',
        marker='o',
        linewidth=2,
        label='Missing Ratio'
    )
    ax2.set_ylabel('Missing Ratio(%)', color='#e67e22')
    ax2.tick_params(axis='y', colors='#e67e22')
    ax2.set_ylim(0, 100)

    plt.title(f'Top 20 Missing Values in Categorical Features (Total Records: {total_records:,})', pad=20)
    ax.legend(loc='upper left')
    ax2.legend(loc='upper right')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()

    plot_path = f'eda_report_dist/missing_categorical_plot.png'
    plt.savefig(plot_path)
    plt.close()
    cat_missing_plot = '<div style="text-align: center;"><img src="./eda_report_dist/missing_categorical_plot.png" width="900"></div>'

In [14]:
# ==================== 4. Numerical Feature Discrimination Analysis ====================
def calc_iv_woe(df, feature, target, bins=10, force_cut=False):
    """
    Calculate Information Value (IV) and Weight of Evidence (WOE) for a numerical feature.
    
    Parameters:
    df (pd.DataFrame): Input dataframe containing the feature and target
    feature (str): Name of the numerical feature to analyze
    target (str): Name of the target variable (binary: 0/1)
    bins (int): Number of bins for discretization
    force_cut (bool): If True, use equal-width binning instead of equal-frequency
    
    Returns:
    float: Calculated IV value for the feature
    pd.DataFrame: Dataframe containing WOE and statistics for each bin
    """
    df = df[[feature, target]].copy()

    # Automatic binning (equal frequency binning to avoid empty bins)
    try:
        if not force_cut:
            df['bins'] = pd.qcut(df[feature], q=bins, duplicates='drop')
        else:
            df['bins'] = pd.cut(df[feature], bins=bins)
    except Exception as e:
        print(f"Using equal-width binning instead of equal-frequency binning for {feature}: {str(e)}")
        df['bins'] = pd.cut(df[feature], bins=bins)

    # Calculate bin statistics
    grouped = df.groupby('bins', observed=True).agg({
        target: ['count', 'sum', 'mean']
    })
    grouped.columns = ['total', 'bad', 'bad_rate']
    grouped['good'] = grouped['total'] - grouped['bad']

    # Calculate WOE and IV
    total_good = grouped['good'].sum()
    total_bad = grouped['bad'].sum()

    # Avoid division by zero with small epsilon
    grouped['good_pct'] = grouped['good'] / (total_good + 1e-10)
    grouped['bad_pct'] = grouped['bad'] / (total_bad + 1e-10)
    grouped['woe'] = np.log((grouped['good_pct'] + 1e-10) / (grouped['bad_pct'] + 1e-10))
    grouped['iv'] = (grouped['good_pct'] - grouped['bad_pct']) * grouped['woe']

    iv = grouped['iv'].sum()
    return iv, grouped.reset_index()


# Calculate IV values for all features and create IV value table
iv_values = {}
for col in num_features:
    try:
        iv, woe_df = calc_iv_woe(data, col, 'label', bins=10)
        iv_values[col] = iv
    except Exception as e:
        print(f"Error calculating IV for {col}: {str(e)}")
        iv_values[col] = np.nan

# Create and format IV dataframe
iv_df = pd.DataFrame.from_dict(iv_values, orient='index', columns=['IV_Value']).sort_values('IV_Value', ascending=False)

# Generate styled HTML table for top IV features
iv_table = iv_df.head(20).style \
    .background_gradient(subset=['IV_Value'], cmap='YlOrRd') \
    .format({'IV_Value': '{:.4f}'}) \
    .to_html()

# Generate analysis charts for top numerical features
os.makedirs('eda_report_dist/feature_plots', exist_ok=True)
top_features = iv_df.head(5).index.tolist()  # Take top 5 features with highest IV values

feature_plots_html = ""
for feature in top_features:
    try:
        iv, woe_df = calc_iv_woe(data, feature, 'label', bins=10)

        plt.figure(figsize=(12, 6))

        # Left: Distribution difference of feature across different labels
        plt.subplot(1, 2, 1)
        sns.boxplot(data=data,
                    x='label',
                    y=feature,
                    hue='label',
                    palette=['#3498db', '#e74c3c'],
                    legend=False,
                    showfliers=False)
        plt.title(f'"{feature}" Label Distribution IV={iv:.4f}')
        plt.xlabel('Label')
        plt.ylabel('Feature Value')

        # Right: WOE monotonic trend
        plt.subplot(1, 2, 2)
        plt.plot(woe_df.index, woe_df['woe'],
                 marker='o', color='#2ecc71', linewidth=2)
        plt.axhline(0, color='grey', linestyle='--')
        plt.title(f'"{feature}" WOE Trend')
        plt.xlabel('Bin')
        plt.ylabel('WOE Value')

        # Fix: Explicitly convert bin labels to strings to avoid Matplotlib parsing issues
        bin_labels = [str(bin) for bin in woe_df['bins']]
        # Show only some labels to avoid overcrowding
        step = max(1, len(woe_df) // 5)  # Show at most 5 labels
        plt.xticks(range(0, len(woe_df), step),
                   [bin_labels[i] for i in range(0, len(woe_df), step)],
                   rotation=45, ha='right')  # Rotate labels to avoid overlap

        plt.tight_layout()

        # Save image
        plot_path = f'eda_report_dist/feature_plots/{feature}.png'
        plt.savefig(plot_path, bbox_inches='tight')  # Ensure complete display of labels
        plt.close()

        # Add to HTML report
        feature_plots_html += f"""
        <div class="feature-analysis">
            <h4>{feature} (IV={iv:.4f})</h4>
            <img src="{plot_path}" width="900">
        </div>
        """
    except Exception as e:
        print(f"Error generating plot for {feature}: {str(e)}")

In [15]:
# ==================== 5. Categorical Feature Analysis ====================
def analyze_categorical_features(df, cat_features, top_n=10):
    """
    Analyzes categorical features by calculating cardinality, top categories,
    and generating distribution visualizations.
    
    Parameters:
    df (pd.DataFrame): Input dataframe containing categorical features
    cat_features (list): List of categorical feature names to analyze
    top_n (int): Number of top categories to display
    
    Returns:
    dict: Analysis results containing cardinality and top category information
    """
    analysis_results = {}

    for col in cat_features:
        # Calculate cardinality (number of unique categories)
        cardinality = df[col].nunique()

        # Calculate top N frequent categories
        top_categories = df[col].value_counts().head(top_n)
        top_categories_pct = (top_categories / len(df)).round(4) * 100

        # Store results in dictionary
        analysis_results[col] = {
            'cardinality': cardinality,
            'top_categories': top_categories.index.tolist(),
            'top_counts': top_categories.values.tolist(),
            'top_percentages(%)': top_categories_pct.values.tolist()
        }

        # Generate visualization chart
        plt.figure(figsize=(12, 6))
        ax = sns.barplot(x=top_categories_pct.index,
                         y=top_categories_pct.values,
                         hue=top_categories_pct.index,
                         palette='viridis',
                         legend=False)

        plt.title(f'"{col}" Top {top_n} Categories Distribution\n(Total Cardinality: {cardinality})')
        plt.xlabel('Category')
        plt.ylabel('Percentage (%)')
        plt.xticks(rotation=45)

        # Add value labels on bars
        for p in ax.patches:
            ax.annotate(f'{p.get_height():.1f}%',
                        (p.get_x() + p.get_width() / 2., p.get_height()),
                        ha='center', va='center',
                        xytext=(0, 5),
                        textcoords='offset points')

        plt.tight_layout()
        plt.savefig(f'eda_report_dist/cat_dist_plots/{col}_top{top_n}.png', bbox_inches='tight')
        plt.close()

    return analysis_results


# Execute categorical feature analysis
os.makedirs('eda_report_dist/cat_dist_plots', exist_ok=True)
if len(cat_features) > 0:
    cat_analysis = analyze_categorical_features(data, cat_features)
else:
    cat_analysis = {}


# Generate HTML report for categorical features
def generate_cat_report(cat_analysis):
    """
    Generates HTML report section for categorical feature analysis results.
    
    Parameters:
    cat_analysis (dict): Results from analyze_categorical_features function
    
    Returns:
    str: HTML string containing the categorical feature report
    """
    report_html = ""

    # Create overview table for categorical features
    overview_df = pd.DataFrame({
        'Feature Name': list(cat_analysis.keys()),
        'Cardinality': [x['cardinality'] for x in cat_analysis.values()],
        'Top 1 Category': [x['top_categories'][0] for x in cat_analysis.values()],
        'Top 1 Percentage (%)': [x['top_percentages(%)'][0] for x in cat_analysis.values()]
    })

    overview_table = overview_df.style \
        .background_gradient(subset=['Cardinality'], cmap='YlOrRd') \
        .background_gradient(subset=['Top 1 Percentage (%)'], cmap='Blues') \
        .format({'Top 1 Percentage (%)': '{:.1f}%'}) \
        .to_html()

    report_html += f"""
    <div class="report-section">
        <h2 class="section-title">5. Categorical Features Overview</h2>
        {overview_table}
    </div>
    """

    # Add detailed analysis for each categorical feature
    for col, stats in cat_analysis.items():
        # Create top 10 categories table
        top10_df = pd.DataFrame({
            'Category': stats['top_categories'],
            'Count': stats['top_counts'],
            'Percentage (%)': stats['top_percentages(%)']
        })

        top10_table = top10_df.style \
            .format({'Percentage (%)': '{:.2f}%', 'Count': '{:,}'}) \
            .background_gradient(subset=['Percentage (%)'], cmap='Blues') \
            .to_html()

        report_html += f"""
        <div class="feature-analysis">
            <h3>{col} (Cardinality: {stats['cardinality']})</h3>
            <div class="row" style="height: 300px;"> <!-- Fixed total row height (adjust as needed) -->
                <!-- Table container -->
                <div class="col-md-6 d-flex align-items-stretch"> 
                    <div style="width: 100%; overflow: auto;"> <!-- Scroll when exceeding height -->
                        {top10_table}
                    </div>
                </div>
                <!-- Image container -->
                <div class="col-md-6 d-flex align-items-stretch">
                    <div style="width: 100%; display: flex; align-items: flex-end; justify-content: flex-end;">
                        <img src="eda_report_dist/cat_dist_plots/{col}_top10.png" style="height: 100%; width: auto; object-fit: contain;">
                    </div>
                </div>
            </div>
        </div>
        """

    return report_html


# Generate complete categorical feature report
if cat_analysis:
    cat_report = generate_cat_report(cat_analysis)
else:
    cat_report = "<p>No valid categorical features detected</p>"

In [16]:
# ==================== 6. Numerical Feature Correlation Analysis ====================
# 1. Calculate feature correlation matrix
corr_matrix = data[num_features + ['label']].corr()

# 2. Generate heatmap
plt.figure(figsize=(12, 10))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', center=0, fmt='.2f')
plt.title('Numerical Feature Correlation Heatmap', fontsize=14)
plt.tight_layout()

plt.savefig(f'eda_report_dist/correlation_heatmap.png')
plt.close()

# 3. Generate correlation ranking with target variable
target_corr = corr_matrix['label'].drop('label').sort_values(key=abs, ascending=False)
corr_table = pd.DataFrame({
    'Feature': target_corr.index,
    'Correlation Coefficient': target_corr.values,
    'Correlation Strength': pd.cut(abs(target_corr),
                                   bins=[0, 0.2, 0.4, 0.6, 0.8, 1],
                                   labels=['Very Weak', 'Weak', 'Moderate', 'Strong', 'Very Strong'])
}).head(20)


# 4. Function to detect highly correlated feature pairs
def get_high_corr_pairs(corr_matrix, threshold=0.7):
    """
    Detect highly correlated feature pairs
    Parameters:
        corr_matrix: Correlation coefficient matrix
        threshold: Correlation threshold
    Returns:
        Sorted list of highly correlated feature pairs in format [(feature1, feature2, correlation_coefficient), ...]
    """
    # Create mask for upper triangular matrix
    upper_mask = np.triu(np.ones(corr_matrix.shape), k=1).astype(bool)
    # Get upper triangular matrix to avoid duplicates
    upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))

    high_corr_pairs = []
    # Iterate through all feature pairs
    for col in upper.columns:
        for row in upper.index:
            if upper.loc[row, col] and abs(corr_matrix.loc[row, col]) > threshold:
                high_corr_pairs.append((col, row, corr_matrix.loc[row, col]))
    # Sort by absolute value
    return sorted(high_corr_pairs, key=lambda x: abs(x[2]), reverse=True)


high_corr_pairs = get_high_corr_pairs(corr_matrix.drop('label', axis=1).drop('label', axis=0))

# 5. Generate feature statistics table
feature_stats = pd.DataFrame({
    'Mean': data[num_features].mean(),
    'Standard Deviation': data[num_features].std(),
    'Missing Values Count': data[num_features].isnull().sum(),
    'Missing Values Ratio': data[num_features].isnull().mean(),
    'Minimum': data[num_features].min(),
    '25th Percentile': data[num_features].quantile(0.25),
    'Median': data[num_features].median(),
    '75th Percentile': data[num_features].quantile(0.75),
    'Maximum': data[num_features].max()
}).round(4)

# 6. Generate detailed WOE tables for TOP5 features
top_features = target_corr.head(5).index.tolist()
woe_tables_html = ""

for feature in top_features:
    try:
        iv, woe_df = calc_iv_woe(data, feature, 'label')

        # Enhance WOE table
        woe_df['Sample Count'] = woe_df['total']
        woe_df['Sample Percentage(%)'] = (woe_df['total'] / len(data)) * 100
        woe_df['Bad Rate(%)'] = woe_df['bad_rate'] * 100

        # Format WOE table
        woe_table = woe_df[['bins', 'Sample Count', 'Sample Percentage(%)', 'bad', 'good', 'Bad Rate(%)', 'woe', 'iv']] \
            .rename(columns={
            'bins': 'Bin',
            'bad': 'Bad Count',
            'good': 'Good Count',
            'woe': 'WOE Value',
            'iv': 'IV Contribution'
        }) \
            .style \
            .background_gradient(subset=['Bad Rate(%)'], cmap='Reds') \
            .background_gradient(subset=['WOE Value'], cmap='coolwarm') \
            .format({
            'Sample Percentage(%)': '{:.2f}%',
            'Bad Rate(%)': '{:.2f}%',
            'WOE Value': '{:.4f}',
            'IV Contribution': '{:.6f}'
        }) \
            .set_caption(f'Feature: {feature} (IV={iv:.4f})') \
            .to_html()

        woe_tables_html += f"""
        <div class="woe-analysis">
            <h4>{feature} Binning Analysis (IV={iv:.4f})</h4>
            {woe_table}
        </div>
        """
    except Exception as e:
        print(f"Error generating WOE table for {feature}: {str(e)}")

# 7. Generate correlation analysis HTML content
correlation_html = f"""
<div class="correlation-analysis">
    <h3>Feature Correlation Heatmap</h3>
    <div style="text-align: center;">
        <img src="./eda_report_dist/correlation_heatmap.png" width="800">
    </div>
    
    <h3>Top 20 Features with Strongest Correlation to Target Variable</h3>
    {corr_table.style
.background_gradient(subset=['Correlation Coefficient'], cmap='coolwarm')
.format({'Correlation Coefficient': '{:.4f}'})
.to_html()}
     
    <div class="correlation-classification">
        <strong>Correlation Strength Explanation：</strong><br>
        • 0.0-0.2: Very Weak<br>
        • 0.2-0.4: Weak<br>
        • 0.4-0.6: Moderate<br>
        • 0.6-0.8: Strong<br>
        • 0.8-1.0: Very Strong
    </div>
    
    <h3>Feature Statistics Table</h3>
    {feature_stats.style
.background_gradient(subset=['Missing Values Ratio'], cmap='Reds')
.background_gradient(subset=['Standard Deviation'], cmap='Blues')
.format({
    'Mean': '{:.4f}',
    'Standard Deviation': '{:.4f}',
    'Missing Values Count': '{:,}',
    'Missing Values Ratio': '{:.2%}',
    'Minimum': '{:.4f}',
    '25th Percentile': '{:.4f}',
    'Median': '{:.4f}',
    '75th Percentile': '{:.4f}',
    'Maximum': '{:.4f}'
})
.to_html()}
     
    <h3>TOP5 Features WOE Analysis</h3>
    {woe_tables_html}
</div>
"""

In [21]:
correlation_html += "</div>"

# Combine HTML report
html_report = f"""
<html>
<head>
  <style>
    .report-section {{
      margin: 5px 0;
      padding: 10px;
      border-radius: 5px;
      background: #f9f9f9;
    }}
    .highlight {{
      color: #e74c3c;
      font-weight: bold;
    }}
    .summary {{
      margin: 15px 0;
      padding: 10px;
      background: #ebf5fb;
      border-left: 4px solid #3498db;
    }}
    .iv-classification {{
      background: #f2f4f4;
      padding: 10px;
      margin: 10px 0;
      border-radius: 5px;
    }}
    .feature-analysis {{
      margin-bottom: 30px;
      border-bottom: 1px solid #eee;
      padding-bottom: 20px;
    }}
    .section-title {{
      color: #2c3e50;
      border-bottom: 1px solid #3498db;
      padding-bottom: 5px;
    }}
    .cat-analysis {{
        margin-bottom: 30px;
        border: 1px solid #eee;
        padding: 15px;
        border-radius: 5px;
    }}
    .cat-analysis h3 {{
        color: #2c3e50;
        border-bottom: 1px solid #3498db;
        padding-bottom: 5px;
    }}
    .row {{
        display: flex;
        flex-wrap: wrap;
        margin: 0 -15px;
    }}
    .col-md-6 {{
        flex: 0 0 50%;
        max-width: 50%;
        padding: 0 15px;
        box-sizing: border-box;
    }}
    .correlation-analysis {{
        margin-bottom: 30px;
    }}
    .correlation-analysis ul {{
        list-style-type: none;
        padding-left: 0;
    }}
    .correlation-analysis li {{
        padding: 5px 0;
        border-bottom: 1px solid #eee;
    }}
  </style>
</head>
<body>
  <h1 style="color: #2c3e50; text-align: center;">EDA Report for Feature Engineering</h1>
  <p style="text-align: right; color: #7f8c8d;">Generated: {current_date}    Analyst: {analyst_name}</p>
  
  <div class="report-section">
    <h2 class="section-title">1. Label Distribution Analysis</h2>
    <div class="summary">
      The dataset contains <span class="highlight">{len(data):,}</span> records,
      with positive samples accounting for <span class="highlight">{label_dist[1]:.2%}</span>,
      and negative samples accounting for <span class="highlight">{label_dist[0]:.2%}</span>
    </div>
    {label_table}
  </div>
  
  <div class="report-section">
    <h2 class="section-title">2. Data Sample Preview</h2>
    {feature_table}
  </div>
  
  <div class="report-section">
    <h2 class="section-title">3. Missing Value Analysis</h2>
    <div class="summary">
        A total of <span class="highlight">{len(num_missing_df) + len(cat_missing_df)}</span> features have missing values,<br>
        Numerical features with missing values: <span class="highlight">{len(num_missing_df)}</span>,<br>
        Categorical features with missing values: <span class="highlight">{len(cat_missing_df)}</span>
    </div>
    <h3>Numerical Features Missing Details</h3>
    <div style="display: flex; flex-wrap: wrap; gap: 20px; margin: 20px 0; align-items: center; height: 400px;">
      <!-- Table section - ~40% width -->
      <div style="flex: 0 0 40%; max-width: 40%; height: 100%; overflow: auto; display: flex; align-items: center;">
        <div style="width: 100%;">
          {num_missing_table}
        </div>
      </div>
      <!-- Chart section - ~60% width -->
      <div style="flex: 0 0 55%; max-width: 55%; height: 100%; display: flex; align-items: center; justify-content: center;">
        <div style="max-height: 100%; width: 100%;">
          {num_missing_plot}
        </div>
      </div>
    </div>
    <h3>Categorical Features Missing Details</h3>
    <div style="display: flex; flex-wrap: wrap; gap: 20px; margin: 20px 0; align-items: center; height: 400px;">
      <!-- Table section - ~40% width -->
      <div style="flex: 0 0 40%; max-width: 40%; height: 100%; overflow: auto; display: flex; align-items: center;">
        <div style="width: 100%;">
          {cat_missing_table}
        </div>
      </div>
      <!-- Chart section - ~60% width -->
      <div style="flex: 0 0 55%; max-width: 55%; height: 100%; display: flex; align-items: center; justify-content: center;">
        <div style="max-height: 100%; width: 100%;">
          {cat_missing_plot}
        </div>
      </div>
    </div>
  </div>
  
  <div class="report-section">
    <h2 class="section-title">4. Numerical Feature Discrimination Analysis</h2>
    <div class="summary">
      A total of <span class="highlight">{len(num_features)}</span> numerical features were analyzed,
      the feature with the highest IV value is <span class="highlight">{iv_df.index[0]}</span>(IV={iv_df.iloc[0, 0]:.4f}),
      <span class="highlight">{len(iv_df[iv_df['IV_Value'] > 0.3])}</span> features have strong predictive power (IV>0.3)
    </div>
    <div class="iv-classification">
      <strong>IV Value Predictive Power Classification：</strong><br>
      • < 0.02: No predictive power<br>
      • 0.02-0.1: Weak predictive power<br>
      • 0.1-0.3: Moderate predictive power<br>
      • > 0.3: Strong predictive power
    </div>
    
    <h3>Top 20 Features by IV Value</h3>
    {iv_table}
    
    <h3>Key Features Detailed Analysis</h3>
    {feature_plots_html}
    <p style="color: #7f8c8d;">Note: Complete analysis charts are saved in the feature_plots directory</p>
  </div>
  
  <div class="report-section">
    {cat_report}
  </div>
  
  <div class="report-section">
    <h2 class="section-title">6. Numerical Feature Correlation Analysis</h2>
    <div class="summary">
      The feature with the strongest correlation to the target variable is <span class="highlight">{target_corr.index[0]}</span>(r={target_corr.iloc[0]:.2f}),
      a total of <span class="highlight">{len(target_corr[abs(target_corr) > 0.5])}</span> strongly correlated features were found (|r|>0.5)
    </div>
    {correlation_html}
  </div>
</body>
</html>
"""

HTML(html_report)

Unnamed: 0,Class,Count,Percentage(%)
0,Negative(0),7818,78.18%
1,Positive(1),2182,21.82%

Unnamed: 0,label,I1,I2,I3,I4,I5,I6,I7,I8,I9,I10,I11,I12,I13,C1,C2,C3,C4,C5,C6,C7,C8,C9,C10,C11,C12,C13,C14,C15,C16,C17,C18,C19,C20,C21,C22,C23,C24,C25,C26
0,0,1.0,1,5.0,0.0,1382.0,4.0,15.0,2.0,181.0,1.0,2.0,,2.0,68fd1e64,80e26c9b,fb936136,7b4723c4,25c83c98,7e0ccccf,de7995b8,1f89b562,a73ee510,a8cd5504,b2cb9c98,37c9c164,2824a5f6,1adce6ef,8ba8b39a,891b62e7,e5ba7672,f54016b9,21ddcdc9,b1252a9d,07b5194c,,3a171ecb,c5c50484,e8b83407,9727dd16
1,0,2.0,0,44.0,1.0,102.0,8.0,2.0,2.0,4.0,1.0,1.0,,4.0,68fd1e64,f0cf0024,6f67f7e5,41274cd7,25c83c98,fe6b92e5,922afcc0,0b153874,a73ee510,2b53e5fb,4f1b46f3,623049e6,d7020589,b28479f6,e6c5b5cd,c92f3b61,07c540c4,b04e4670,21ddcdc9,5840adea,60f6221e,,3a171ecb,43f13e8b,e8b83407,731c3655
2,0,2.0,0,1.0,14.0,767.0,89.0,4.0,2.0,245.0,1.0,3.0,3.0,45.0,287e684f,0a519c5c,02cf9876,c18be181,25c83c98,7e0ccccf,c78204a1,0b153874,a73ee510,3b08e48b,5f5e6091,8fe001f4,aa655a2f,07d13a8f,6dc710ed,36103458,8efede7f,3412118d,,,e587c466,ad3062eb,3a171ecb,3b183c5c,,
3,0,,893,,,4392.0,,0.0,0.0,0.0,,0.0,,,68fd1e64,2c16a946,a9a87e68,2e17d6f6,25c83c98,fe6b92e5,2e8a689b,0b153874,a73ee510,efea433b,e51ddf94,a30567ca,3516f6e6,07d13a8f,18231224,52b8680f,1e88c74f,74ef3502,,,6b3a5ca6,,3a171ecb,9117a34a,,
4,0,3.0,-1,,0.0,2.0,0.0,3.0,0.0,0.0,1.0,1.0,,0.0,8cf07265,ae46a29d,c81688bb,f922efad,25c83c98,13718bbd,ad9fa255,0b153874,a73ee510,5282c137,e5d8af57,66a76a26,f06c53ac,1adce6ef,8ff4b403,01adbab4,1e88c74f,26b3c7a7,,,21c9516a,,32c7478e,b34f3128,,
5,0,,-1,,,12824.0,,0.0,0.0,6.0,,0.0,,,05db9164,6c9c9cf3,2730ec9c,5400db8b,43b19349,6f6d9be8,53b5f978,0b153874,a73ee510,3b08e48b,91e8fc27,be45b877,9ff13f22,07d13a8f,06969a20,9bc7fff5,776ce399,92555263,,,242bb710,8ec974f4,be7c41b4,72c78f11,,
6,0,,1,2.0,,3168.0,,0.0,1.0,2.0,,0.0,,,439a44a4,ad4527a2,c02372d0,d34ebbaa,43b19349,fe6b92e5,4bc6ffea,0b153874,a73ee510,3b08e48b,a4609aab,14d63538,772a00d7,07d13a8f,f9d1382e,b00d3dc9,776ce399,cdfa8259,,,20062612,,93bad2c0,1b256e61,,
7,1,1.0,4,2.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,,0.0,68fd1e64,2c16a946,503b9dbc,e4dbea90,f3474129,13718bbd,38eb9cf4,1f89b562,a73ee510,547c0ffe,bc8c9f21,60ab2f07,46f42a63,07d13a8f,18231224,e6b6bdc7,e5ba7672,74ef3502,,,5316a17f,,32c7478e,9117a34a,,
8,0,,44,4.0,8.0,19010.0,249.0,28.0,31.0,141.0,,1.0,,8.0,05db9164,d833535f,d032c263,c18be181,25c83c98,7e0ccccf,d5b6acf2,0b153874,a73ee510,2acdcf4e,086ac2d2,dfbb09fb,41a6ae00,b28479f6,e2502ec9,84898b2a,e5ba7672,42a2edb9,,,0014c32a,,32c7478e,3b183c5c,,
9,0,,35,,1.0,33737.0,21.0,1.0,2.0,3.0,,1.0,,1.0,05db9164,510b40a5,d03e7c24,eb1fd928,25c83c98,,52283d1c,0b153874,a73ee510,015ac893,e51ddf94,951fe4a9,3516f6e6,07d13a8f,2ae4121c,8ec71479,d4bb7bd8,70d0f5f9,,,0e63fca0,,32c7478e,0e8fe315,,

Unnamed: 0,Missing Count,Missing Ratio(%)
I12,7735,77.35%
I1,4481,44.81%
I10,4481,44.81%
I6,2511,25.11%
I3,2037,20.37%
I4,1978,19.78%
I13,1978,19.78%
I5,494,4.94%
I7,492,4.92%
I9,492,4.92%

Unnamed: 0,Missing Count,Missing Ratio(%)
C22,8182,81.82%
C26,4496,44.96%
C25,4496,44.96%
C20,4496,44.96%
C19,4496,44.96%
C6,1332,13.32%
C16,357,3.57%
C3,357,3.57%
C4,357,3.57%
C24,357,3.57%

Unnamed: 0,IV_Value
I6,0.3787
I5,0.2645
I7,0.2435
I11,0.1805
I13,0.1634
I1,0.1353
I12,0.0818
I3,0.0569
I10,0.0568
I2,0.0285

Unnamed: 0,Feature Name,Cardinality,Top 1 Category,Top 1 Percentage (%)
0,C1,175,05db9164,50.0%
1,C2,386,38a947a1,12.3%
2,C3,5520,d032c263,4.7%
3,C4,4032,d16679b9,7.3%
4,C5,56,25c83c98,66.6%
5,C6,7,7e0ccccf,45.9%
6,C7,3184,3f4ec687,1.1%
7,C8,93,0b153874,58.9%
8,C9,3,a73ee510,88.7%
9,C10,2986,3b08e48b,27.6%

Unnamed: 0,Category,Count,Percentage (%)
0,05db9164,5005,50.05%
1,68fd1e64,1717,17.17%
2,5a9ed9b0,840,8.40%
3,8cf07265,480,4.80%
4,be589b51,341,3.41%
5,5bfa8ab5,220,2.20%
6,87552397,171,1.71%
7,f473b8dc,146,1.46%
8,39af2607,115,1.15%
9,ae82ea21,74,0.74%

Unnamed: 0,Category,Count,Percentage (%)
0,38a947a1,1235,12.35%
1,09e68b86,652,6.52%
2,80e26c9b,467,4.67%
3,d833535f,348,3.48%
4,4f25e98b,346,3.46%
5,287130e0,331,3.31%
6,38d50e09,303,3.03%
7,0a519c5c,229,2.29%
8,207b2d81,224,2.24%
9,08d6d899,189,1.89%

Unnamed: 0,Category,Count,Percentage (%)
0,d032c263,470,4.70%
1,b00d1501,331,3.31%
2,77f2f2e5,271,2.71%
3,aa8c1539,169,1.69%
4,02cf9876,149,1.49%
5,ad4b77ff,126,1.26%
6,74e1a23a,105,1.05%
7,9143c832,90,0.90%
8,4470baf4,73,0.73%
9,2cbec47f,68,0.68%

Unnamed: 0,Category,Count,Percentage (%)
0,d16679b9,730,7.30%
1,c18be181,619,6.19%
2,85dd697c,340,3.40%
3,13508380,180,1.80%
4,f922efad,133,1.33%
5,9a6888fb,105,1.05%
6,29998ed1,99,0.99%
7,f56b7dd5,90,0.90%
8,6a14f9b9,81,0.81%
9,8c8a4c47,75,0.75%

Unnamed: 0,Category,Count,Percentage (%)
0,25c83c98,6656,66.56%
1,4cf72387,1538,15.38%
2,43b19349,641,6.41%
3,384874ce,353,3.53%
4,30903e74,226,2.26%
5,0942e0a7,134,1.34%
6,f281d2a7,91,0.91%
7,b0530c50,74,0.74%
8,b2241560,48,0.48%
9,f3474129,41,0.41%

Unnamed: 0,Category,Count,Percentage (%)
0,7e0ccccf,4593,45.93%
1,fbad5c96,1940,19.40%
2,fe6b92e5,1229,12.29%
3,13718bbd,414,4.14%
4,6f6d9be8,343,3.43%
5,3bf701e7,146,1.46%
6,e3520422,3,0.03%

Unnamed: 0,Category,Count,Percentage (%)
0,3f4ec687,105,1.05%
1,9b98e9fc,86,0.86%
2,970f01b2,83,0.83%
3,49b74ebc,82,0.82%
4,38eb9cf4,75,0.75%
5,88002ee1,66,0.66%
6,d0bdaa98,65,0.65%
7,dc7659bd,57,0.57%
8,d2d741ca,56,0.56%
9,81bb0302,52,0.52%

Unnamed: 0,Category,Count,Percentage (%)
0,0b153874,5888,58.88%
1,5b392875,1701,17.01%
2,1f89b562,768,7.68%
3,37e4aa92,414,4.14%
4,062b5529,268,2.68%
5,51d76abe,165,1.65%
6,c8ddd494,131,1.31%
7,64523cfa,99,0.99%
8,6c41e35e,83,0.83%
9,985e3fcb,54,0.54%

Unnamed: 0,Category,Count,Percentage (%)
0,a73ee510,8871,88.71%
1,7cc72ec2,1125,11.25%
2,a18233ea,4,0.04%

Unnamed: 0,Category,Count,Percentage (%)
0,3b08e48b,2757,27.57%
1,fbbf2c95,105,1.05%
2,efea433b,104,1.04%
3,5ba575e7,69,0.69%
4,0e9ead52,67,0.67%
5,fa7d0797,65,0.65%
6,6c47047a,64,0.64%
7,451bd4e4,52,0.52%
8,f6f942d1,51,0.51%
9,5162b19c,45,0.45%

Unnamed: 0,Category,Count,Percentage (%)
0,c4adf918,203,2.03%
1,7f8ffe57,197,1.97%
2,e51ddf94,133,1.33%
3,f25fe7e9,102,1.02%
4,5874c9c9,92,0.92%
5,36bccca0,83,0.83%
6,a7b606c4,78,0.78%
7,755e4a50,78,0.78%
8,b7094596,70,0.70%
9,4ba74619,67,0.67%

Unnamed: 0,Category,Count,Percentage (%)
0,dfbb09fb,470,4.70%
1,e0d76380,331,3.31%
2,9f32b866,271,2.71%
3,d8c29807,169,1.69%
4,8fe001f4,149,1.49%
5,a2f4e8b5,126,1.26%
6,fb8fab62,105,1.05%
7,6aaba33c,99,0.99%
8,ae1bb660,90,0.90%
9,bb669e25,73,0.73%

Unnamed: 0,Category,Count,Percentage (%)
0,85dbe138,221,2.21%
1,46f42a63,204,2.04%
2,3516f6e6,149,1.49%
3,ebd756bd,129,1.29%
4,740c210d,119,1.19%
5,80467802,118,1.18%
6,6e5da64f,110,1.10%
7,dd183b4c,102,1.02%
8,1aa94af3,82,0.82%
9,5978055e,78,0.78%

Unnamed: 0,Category,Count,Percentage (%)
0,07d13a8f,3629,36.29%
1,b28479f6,3456,34.56%
2,1adce6ef,1604,16.04%
3,64c94865,362,3.62%
4,8ceecbc8,245,2.45%
5,cfef1c29,226,2.26%
6,051219e6,140,1.40%
7,f862f261,108,1.08%
8,f7c1b33f,70,0.70%
9,ad1cc976,45,0.45%

Unnamed: 0,Category,Count,Percentage (%)
0,36721ddc,195,1.95%
1,52baadf5,150,1.50%
2,10040656,119,1.19%
3,f3635baf,100,1.00%
4,0f942372,99,0.99%
5,7ac43a46,99,0.99%
6,dbc5e126,91,0.91%
7,a733d362,88,0.88%
8,dfab705f,78,0.78%
9,d2f03b75,77,0.77%

Unnamed: 0,Category,Count,Percentage (%)
0,84898b2a,470,4.70%
1,1203a270,331,3.31%
2,31ca40b6,271,2.71%
3,c64d548f,169,1.69%
4,36103458,149,1.49%
5,89052618,126,1.26%
6,c6b1e1b2,105,1.05%
7,b041b04a,99,0.99%
8,bad5ee18,90,0.90%
9,f8b34416,80,0.80%

Unnamed: 0,Category,Count,Percentage (%)
0,e5ba7672,4304,43.04%
1,07c540c4,1333,13.33%
2,d4bb7bd8,1269,12.69%
3,3486227d,769,7.69%
4,776ce399,602,6.02%
5,2005abd1,494,4.94%
6,1e88c74f,442,4.42%
7,8efede7f,420,4.20%
8,27c07bd6,367,3.67%

Unnamed: 0,Category,Count,Percentage (%)
0,5aed7436,510,5.10%
1,891589e7,290,2.90%
2,7ef5affa,223,2.23%
3,f54016b9,205,2.05%
4,005c6740,189,1.89%
5,281769c2,175,1.75%
6,1f868fdd,164,1.64%
7,582152eb,149,1.49%
8,bd17c3da,146,1.46%
9,c21c3e4c,136,1.36%

Unnamed: 0,Category,Count,Percentage (%)
0,21ddcdc9,3310,33.10%
1,55dd3565,201,2.01%
2,cf99e5de,121,1.21%
3,9437f62f,85,0.85%
4,1d1eb838,48,0.48%
5,9653bb65,47,0.47%
6,712d530c,45,0.45%
7,5b885066,42,0.42%
8,1d04f4a4,38,0.38%
9,2b558521,37,0.37%

Unnamed: 0,Category,Count,Percentage (%)
0,5840adea,1990,19.90%
1,a458ea53,1772,17.72%
2,b1252a9d,1742,17.42%

Unnamed: 0,Category,Count,Percentage (%)
0,0014c32a,470,4.70%
1,73d06dde,331,3.31%
2,dfcfc3fa,271,2.71%
3,5f957280,169,1.69%
4,e587c466,149,1.49%
5,d4703ebd,126,1.26%
6,99c09e97,105,1.05%
7,723b4dfd,99,0.99%
8,0429f84b,90,0.90%
9,f3ddd519,80,0.80%

Unnamed: 0,Category,Count,Percentage (%)
0,ad3062eb,956,9.56%
1,c9d4222a,698,6.98%
2,78e2e389,68,0.68%
3,8ec974f4,58,0.58%
4,c0061c6d,33,0.33%
5,8651fddb,4,0.04%
6,ccfd4002,1,0.01%

Unnamed: 0,Category,Count,Percentage (%)
0,32c7478e,4515,45.15%
1,3a171ecb,2024,20.24%
2,423fab69,1045,10.45%
3,be7c41b4,724,7.24%
4,bcdee96c,605,6.05%
5,c7dc6720,406,4.06%
6,55dd3565,315,3.15%
7,dbb486d7,141,1.41%
8,93bad2c0,107,1.07%
9,85d5a995,49,0.49%

Unnamed: 0,Category,Count,Percentage (%)
0,aee52b6f,755,7.55%
1,1793a828,746,7.46%
2,3b183c5c,688,6.88%
3,3fdb382b,578,5.78%
4,b34f3128,338,3.38%
5,45ab94c8,230,2.30%
6,9117a34a,151,1.51%
7,335a6a1e,106,1.06%
8,df487a73,92,0.92%
9,c0d61a5c,90,0.90%

Unnamed: 0,Category,Count,Percentage (%)
0,e8b83407,1807,18.07%
1,001f3601,1093,10.93%
2,ea9a246c,622,6.22%
3,010f6491,360,3.60%
4,2bf691b1,247,2.47%
5,9b3e8820,241,2.41%
6,445bbe3b,192,1.92%
7,f0f449dd,154,1.54%
8,cb079c2d,152,1.52%
9,9d93af03,125,1.25%

Unnamed: 0,Category,Count,Percentage (%)
0,49d68486,454,4.54%
1,c84c4aec,180,1.80%
2,2fede552,151,1.51%
3,b7d9c3bc,130,1.30%
4,984e0db0,124,1.24%
5,aa5f0a15,110,1.10%
6,9904c656,108,1.08%
7,c27f155b,107,1.07%
8,b9809574,82,0.82%
9,56be3401,57,0.57%

Unnamed: 0,Feature,Correlation Coefficient,Correlation Strength
I10,I10,0.1637,Very Weak
I11,I11,0.1346,Very Weak
I6,I6,-0.11,Very Weak
I1,I1,0.0984,Very Weak
I7,I7,0.0931,Very Weak
I13,I13,-0.093,Very Weak
I5,I5,-0.0833,Very Weak
I12,I12,0.0685,Very Weak
I2,I2,0.0485,Very Weak
I8,I8,-0.0462,Very Weak

Unnamed: 0,Mean,Standard Deviation,Missing Values Count,Missing Values Ratio,Minimum,25th Percentile,Median,75th Percentile,Maximum
I1,3.3796,9.4198,4481,44.81%,0.0,0.0,1.0,3.0,214.0
I2,110.8643,443.486,0,0.00%,-2.0,0.0,3.0,38.0,18522.0
I3,46.9496,893.9815,2037,20.37%,0.0,2.0,7.0,21.0,65535.0
I4,8.0136,11.0273,1978,19.78%,0.0,2.0,4.0,10.0,280.0
I5,17082.2644,62568.6571,494,4.94%,0.0,272.0,2480.0,10491.25,1618112.0
I6,143.395,365.5873,2511,25.11%,0.0,9.0,40.0,129.0,12167.0
I7,14.5258,52.5801,492,4.92%,0.0,0.0,3.0,10.0,1658.0
I8,13.0819,17.568,10,0.10%,0.0,2.0,7.0,20.0,682.0
I9,124.675,277.3918,492,4.92%,0.0,10.0,41.0,123.0,7335.0
I10,0.5771,0.6563,4481,44.81%,0.0,0.0,0.0,1.0,6.0

Unnamed: 0,Bin,Sample Count,Sample Percentage(%),Bad Count,Good Count,Bad Rate(%),WOE Value,IV Contribution
0,"(-0.001, 1.0]",5153,51.53%,1315,3838,25.52%,0.069,0.004374
1,"(1.0, 6.0]",366,3.66%,167,199,45.63%,-0.8268,0.052412

Unnamed: 0,Bin,Sample Count,Sample Percentage(%),Bad Count,Good Count,Bad Rate(%),WOE Value,IV Contribution
0,"(-0.001, 1.0]",5957,59.57%,1004,4953,16.85%,0.3524,0.070247
1,"(1.0, 2.0]",1279,12.79%,358,921,27.99%,-0.2987,0.012973
2,"(2.0, 3.0]",700,7.00%,214,486,30.57%,-0.4234,0.014695
3,"(3.0, 5.0]",659,6.59%,200,459,30.35%,-0.4129,0.013125
4,"(5.0, 79.0]",913,9.13%,352,561,38.55%,-0.7775,0.069506

Unnamed: 0,Bin,Sample Count,Sample Percentage(%),Bad Count,Good Count,Bad Rate(%),WOE Value,IV Contribution
0,"(-0.001, 2.0]",910,9.10%,441,469,48.46%,-1.0026,0.145719
1,"(2.0, 6.0]",621,6.21%,243,378,39.13%,-0.6224,0.036476
2,"(6.0, 13.0]",740,7.40%,254,486,34.32%,-0.4153,0.018664
3,"(13.0, 24.0]",731,7.31%,205,526,28.04%,-0.1219,0.001493
4,"(24.0, 40.0]",749,7.49%,174,575,23.23%,0.1311,0.001664
5,"(40.0, 64.8]",742,7.42%,152,590,20.49%,0.2921,0.007838
6,"(64.8, 100.6]",749,7.49%,139,610,18.56%,0.4148,0.015429
7,"(100.6, 172.4]",749,7.49%,128,621,17.09%,0.5151,0.023128
8,"(172.4, 334.2]",749,7.49%,104,645,13.89%,0.7607,0.046935
9,"(334.2, 12167.0]",749,7.49%,81,668,10.81%,1.0456,0.081357

Unnamed: 0,Bin,Sample Count,Sample Percentage(%),Bad Count,Good Count,Bad Rate(%),WOE Value,IV Contribution
0,"(-0.001, 1.0]",3515,35.15%,759,2756,21.59%,0.2874,0.049017
1,"(1.0, 2.0]",493,4.93%,153,340,31.03%,-0.2036,0.003872
2,"(2.0, 4.0]",551,5.51%,187,364,33.94%,-0.3361,0.012103
3,"(4.0, 8.0]",439,4.39%,164,275,37.36%,-0.4852,0.020641
4,"(8.0, 214.0]",521,5.21%,219,302,42.03%,-0.6808,0.049671

Unnamed: 0,Bin,Sample Count,Sample Percentage(%),Bad Count,Good Count,Bad Rate(%),WOE Value,IV Contribution
0,"(-0.001, 1.0]",3730,37.30%,492,3238,13.19%,0.6406,0.132965
1,"(1.0, 2.0]",848,8.48%,178,670,20.99%,0.0819,0.000585
2,"(2.0, 3.0]",611,6.11%,146,465,23.90%,-0.0852,0.000477
3,"(3.0, 5.0]",861,8.61%,205,656,23.81%,-0.0804,0.000599
4,"(5.0, 8.0]",778,7.78%,227,551,29.18%,-0.3568,0.011422
5,"(8.0, 14.0]",818,8.18%,235,583,28.73%,-0.335,0.01053
6,"(14.0, 30.0]",924,9.24%,297,627,32.14%,-0.4964,0.027106
7,"(30.0, 1658.0]",938,9.38%,348,590,37.10%,-0.7157,0.059822
