In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

**IMPORTS AND SETUP**

In [1]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import xgboost as xgb
import joblib
import warnings
warnings.filterwarnings('ignore')

# Set random seed for reproducibility
RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)

# Plotting configuration
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")



**DATA LOADING**

In [2]:

# Load the dataset
df = pd.read_csv('/kaggle/input/vehicle-sales-data/car_prices.csv')

print("\n" + "="*70)
print("DATASET OVERVIEW")
print("="*70)
print(f"Dataset shape: {df.shape}")
print(f"\nFirst few rows:")
print(df.head())
print(f"\nData types:")
print(df.dtypes)
print(f"\nMissing values:")
print(df.isnull().sum())
print(f"\nBasic statistics:")
print(df.describe())

FileNotFoundError: [Errno 2] No such file or directory: '/kaggle/input/vehicle-sales-data/car_prices.csv'

**OUTLIER DETECTION AND FILTERING**

In [None]:

print("\n" + "="*70)
print("OUTLIER FILTERING")
print("="*70)

# Store original size
original_size = len(df)

# Filter outliers based on domain knowledge
df_filtered = df[
    (df['year'] >= 1990) &
    (df['year'] <= 2026) &
    (df['odometer'] > 0) &
    (df['odometer'] <= 500000) &
    (df['sellingprice'] > 500) &
    (df['sellingprice'] <= 150000) &
    (df['condition'] >= 1) &
    (df['condition'] <= 49)
].copy()

# Additional statistical outlier removal using IQR for sellingprice
Q1 = df_filtered['sellingprice'].quantile(0.01)
Q3 = df_filtered['sellingprice'].quantile(0.99)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
upper_bound = Q3 + 1.5 * IQR

df_filtered = df_filtered[
    (df_filtered['sellingprice'] >= lower_bound) &
    (df_filtered['sellingprice'] <= upper_bound)
]

print(f"Original dataset: {original_size:,} rows")
print(f"After filtering: {len(df_filtered):,} rows")
print(f"Removed: {original_size - len(df_filtered):,} rows ({100*(original_size - len(df_filtered))/original_size:.2f}%)")


**STRATIFIED SAMPLING**

In [None]:

print("\n" + "="*70)
print("STRATIFIED SAMPLING")
print("="*70)

# Target sample size (10-20% of filtered data)
target_size = min(100000, int(len(df_filtered) * 0.2))

# Create stratification bins
df_filtered['year_bin'] = pd.cut(df_filtered['year'], bins=10, labels=False)
df_filtered['price_bin'] = pd.qcut(df_filtered['sellingprice'], q=10, labels=False, duplicates='drop')

# Perform stratified sampling
df_sample = df_filtered.groupby(['year_bin', 'price_bin'], group_keys=False).apply(
    lambda x: x.sample(min(len(x), max(1, int(len(x) * target_size / len(df_filtered)))), random_state=RANDOM_STATE)
).reset_index(drop=True)

# Remove temporary binning columns
df_sample = df_sample.drop(['year_bin', 'price_bin'], axis=1)

print(f"Target sample size: {target_size:,}")
print(f"Actual sample size: {len(df_sample):,}")
print(f"Sampling ratio: {100*len(df_sample)/len(df_filtered):.2f}%")

# Verify distribution preservation
print("\nYear distribution comparison:")
print("Original:")
print(df_filtered['year'].value_counts(bins=5, sort=False).head())
print("\nSampled:")
print(df_sample['year'].value_counts(bins=5, sort=False).head())

**ATA CLEANING AND STANDARDIZATION**

In [None]:

print("\n" + "="*70)
print("DATA CLEANING AND STANDARDIZATION")
print("="*70)

# 1. Handle missing values first
df_sample['make'] = df_sample['make'].fillna('Unknown')
df_sample['model'] = df_sample['model'].fillna('Unknown')
df_sample['body'] = df_sample['body'].fillna('Unknown')
df_sample['transmission'] = df_sample['transmission'].fillna('unknown')

# 2. Standardize make (brand) - convert to title case
df_sample['make'] = df_sample['make'].astype(str).str.strip().str.title()
print(f"Unique makes after cleaning: {df_sample['make'].nunique()}")

# 3. Standardize model - convert to title case and strip whitespace
df_sample['model'] = df_sample['model'].astype(str).str.strip().str.title()
print(f"Unique models after cleaning: {df_sample['model'].nunique()}")

# 4. Standardize and classify body types
def standardize_body_type(body):
    """Standardize body type classifications"""
    if pd.isna(body):
        return 'Other'
    
    body = str(body).lower().strip()
    
    # Coupe variations
    if 'coupe' in body or 'cpe' in body:
        return 'Coupe'
    
    # Sedan variations
    if 'sedan' in body or 'sdn' in body:
        return 'Sedan'
    
    # SUV variations
    if 'suv' in body or 'sport utility' in body:
        return 'SUV'
    
    # Truck variations
    if 'truck' in body or 'pickup' in body:
        return 'Truck'
    
    # Van variations
    if 'van' in body or 'minivan' in body:
        return 'Van'
    
    # Wagon variations
    if 'wagon' in body or 'wgn' in body:
        return 'Wagon'
    
    # Convertible variations
    if 'convertible' in body or 'conv' in body or 'cabriolet' in body:
        return 'Convertible'
    
    # Hatchback variations
    if 'hatchback' in body or 'hatch' in body:
        return 'Hatchback'
    
    # Default
    return 'Other'

df_sample['body'] = df_sample['body'].apply(standardize_body_type)
print(f"\nBody types after standardization:")
print(df_sample['body'].value_counts())

# 5. Standardize transmission
df_sample['transmission'] = df_sample['transmission'].astype(str).str.strip().str.lower()

print(f"\nMissing values after cleaning:")
print(df_sample.isnull().sum())

print(f"\nData cleaning summary:")
print(f"  - Makes standardized: {df_sample['make'].nunique()} unique brands")
print(f"  - Models standardized: {df_sample['model'].nunique()} unique models")
print(f"  - Body types reduced to standard categories")
print(f"  - Transmission values normalized")


**ENCODING CATEGORICAL VARIABLES**

In [None]:

print("\n" + "="*70)
print("ENCODING CATEGORICAL VARIABLES")
print("="*70)

# Create label encoders for categorical variables
categorical_cols = ['make', 'model', 'body', 'transmission']
label_encoders = {}

for col in categorical_cols:
    le = LabelEncoder()
    df_sample[f'{col}_encoded'] = le.fit_transform(df_sample[col].astype(str))
    label_encoders[col] = le
    print(f"{col}: {len(le.classes_)} unique values")

**PREPARE FEATURES AND TARGET**

In [None]:

print("\n" + "="*70)
print("PREPARING FEATURES AND TARGET")
print("="*70)

# Select features for the model (excluding mmr)
feature_cols = [
    'year', 'condition', 'odometer',
    'make_encoded', 'model_encoded', 'body_encoded', 'transmission_encoded'
]

X = df_sample[feature_cols]
y = df_sample['sellingprice']

print(f"Features shape: {X.shape}")
print(f"Target shape: {y.shape}")
print(f"\nFeatures used:")
for i, col in enumerate(feature_cols, 1):
    print(f"  {i}. {col}")

**TRAIN-TEST SPLIT**

In [None]:

print("\n" + "="*70)
print("TRAIN-TEST SPLIT")
print("="*70)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=RANDOM_STATE
)

print(f"Training set: {X_train.shape[0]:,} samples")
print(f"Test set: {X_test.shape[0]:,} samples")
print(f"Split ratio: 80/20")

**MODEL TRAINING - XGBOOST**

In [None]:

print("\n" + "="*70)
print("XGBOOST MODEL TRAINING")
print("="*70)

# Define XGBoost parameters
params = {
    'objective': 'reg:squarederror',
    'max_depth': 6,
    'learning_rate': 0.1,
    'n_estimators': 200,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'random_state': RANDOM_STATE,
    'n_jobs': -1,
    'eval_metric': 'rmse'
}

print("Model parameters:")
for key, value in params.items():
    print(f"  {key}: {value}")

# Train the model
print("\nTraining XGBoost model...")
try:
    model = xgb.XGBRegressor(**params)
    model.fit(X_train, y_train, verbose=False)
    print("Training completed successfully!")
except Exception as e:
    print(f"Error during training: {e}")
    raise

**MODEL EVALUATION**

In [None]:

print("\n" + "="*70)
print("MODEL EVALUATION")
print("="*70)

# Verify model was trained
if 'model' not in locals():
    raise NameError("Model not found!")

# Make predictions
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

# Calculate metrics
train_r2 = r2_score(y_train, y_train_pred)
test_r2 = r2_score(y_test, y_test_pred)
train_mae = mean_absolute_error(y_train, y_train_pred)
test_mae = mean_absolute_error(y_test, y_test_pred)
train_rmse = np.sqrt(mean_squared_error(y_train, y_train_pred))
test_rmse = np.sqrt(mean_squared_error(y_test, y_test_pred))
train_mape = np.mean(np.abs((y_train - y_train_pred) / y_train)) * 100
test_mape = np.mean(np.abs((y_test - y_test_pred) / y_test)) * 100

print("\nPERFORMANCE METRICS:")
print("-" * 50)
print(f"{'Metric':<20} {'Training':<15} {'Test':<15}")
print("-" * 50)
print(f"{'R² Score':<20} {train_r2:<15.4f} {test_r2:<15.4f}")
print(f"{'MAE ($)':<20} {train_mae:<15.2f} {test_mae:<15.2f}")
print(f"{'RMSE ($)':<20} {train_rmse:<15.2f} {test_rmse:<15.2f}")
print(f"{'MAPE (%)':<20} {train_mape:<15.2f} {test_mape:<15.2f}")
print("-" * 50)

# Cross-validation
print("\nPerforming 5-fold cross-validation...")
cv_scores = cross_val_score(model, X_train, y_train, cv=5, scoring='r2')
print(f"CV R² scores: {cv_scores}")
print(f"Mean CV R²: {cv_scores.mean():.4f} (+/- {cv_scores.std() * 2:.4f})")

**FEATURE IMPORTANCE ANALYSIS**

In [None]:

print("\n" + "="*70)
print("FEATURE IMPORTANCE ANALYSIS")
print("="*70)

# Get feature importance
importance_df = pd.DataFrame({
    'feature': feature_cols,
    'importance': model.feature_importances_
}).sort_values('importance', ascending=False)

print("\nFeature Importance Ranking:")
print("-" * 40)
for idx, row in importance_df.iterrows():
    print(f"{row['feature']:<25} {row['importance']:.4f}")

# Visualize feature importance
plt.figure(figsize=(10, 6))
plt.barh(importance_df['feature'], importance_df['importance'])
plt.xlabel('Importance Score')
plt.title('Feature Importances - XGBoost Model')
plt.gca().invert_yaxis()
plt.tight_layout()
plt.savefig('feature_importance.png', dpi=300, bbox_inches='tight')
plt.show()


**SAVE MODEL AND ENCODERS**

In [None]:

print("\n" + "="*70)
print("SAVING MODEL AND ENCODERS")
print("="*70)

# Save the model
joblib.dump(model, 'vehicle_price_model.pkl')
print("Model saved as 'vehicle_price_model.pkl'")

# Save label encoders
joblib.dump(label_encoders, 'label_encoders.pkl')
print("Label encoders saved as 'label_encoders.pkl'")

# Save feature columns
joblib.dump(feature_cols, 'feature_columns.pkl')
print("Feature columns saved as 'feature_columns.pkl'")

# Save make-model mapping for UI dropdown
# Handle any NaN values in model column before creating mapping
make_model_mapping = {}
for make in df_sample['make'].unique():
    models = df_sample[df_sample['make'] == make]['model'].dropna().unique()
    # Convert to string and sort
    models = sorted([str(m) for m in models])
    if models:  # Only add if there are models
        make_model_mapping[make] = models

joblib.dump(make_model_mapping, 'make_model_mapping.pkl')
print("Make-model mapping saved as 'make_model_mapping.pkl'")

# Save sample statistics for later use (updated with hybrid metrics)
statistics = {
    'train_size': len(X_train),
    'test_size': len(X_test),
    # ML-only metrics
    'ml_r2_score': test_r2,
    'ml_mae': test_mae,
    'ml_rmse': test_rmse,
    'ml_mape': test_mape,
}
joblib.dump(statistics, 'model_statistics.pkl')
print("Model statistics saved as 'model_statistics.pkl'")

**RULE-BASED SYSTEM INTEGRATION**

In [None]:

print("\n" + "="*70)
print("RULE-BASED SYSTEM INTEGRATION")
print("="*70)

# Define the rule-based system class
class VehiclePricingRules:
    """Expert rule-based system for vehicle pricing"""
    
    def __init__(self):
        self.rules_applied = []
        self.warnings = []
    
    def reset(self):
        self.rules_applied = []
        self.warnings = []
    
    def adjust_prediction(self, base_prediction, vehicle_data):
        """Apply expert rules to adjust ML model prediction"""
        adjusted_price = base_prediction
        adjustments = []
        
        year = vehicle_data.get('year')
        odometer = vehicle_data.get('odometer')
        condition = vehicle_data.get('condition')
        make = vehicle_data.get('make', '')
        model = vehicle_data.get('model', '')
        body = vehicle_data.get('body', '')
        transmission = vehicle_data.get('transmission', '')
        
        # Rule 1: Luxury Depreciation Curve
        luxury_brands = ['Bmw', 'Mercedes-Benz', 'Audi', 'Lexus', 'Porsche', 'Tesla']
        if make in luxury_brands and year:
            vehicle_age = 2026 - year
            if vehicle_age > 9:
                depreciation_factor = 1 - (vehicle_age - 9) * 0.01
                adjustment = adjusted_price * (depreciation_factor - 1)
                adjusted_price *= depreciation_factor
                adjustments.append(('Luxury depreciation', adjustment))
        
        # Rule 2: Popular Truck Premium
        popular_trucks = ['F-150', 'Silverado', 'Ram', 'Tundra', 'Tacoma']
        if body == 'Truck' and any(truck in model for truck in popular_trucks):
            premium = base_prediction * 0.01
            adjusted_price += premium
            adjustments.append(('Popular truck premium', premium))
        
        # Rule 3: High Mileage Penalty
        if odometer and odometer > 190000:
            penalty_rate = min((odometer - 190000) / 100000 * 0.029, 0.1)
            penalty = base_prediction * penalty_rate
            adjusted_price -= penalty
            adjustments.append(('High mileage penalty', -penalty))
        
        # Rule 4: Excellent Condition Bonus
        if condition and condition >= 47:
            bonus = base_prediction * 0.035
            adjusted_price += bonus
            adjustments.append(('Excellent condition bonus', bonus))
        
        # Rule 5: Poor Condition Penalty
        elif condition and condition <= 10:
            penalty = base_prediction * 0.08
            adjusted_price -= penalty
            adjustments.append(('Poor condition penalty', -penalty))
        
        # Rule 6: Low Mileage Premium
        if odometer and year:
            vehicle_age = 2026 - year
            expected_mileage = vehicle_age * 14000
            if odometer < expected_mileage * 0.15 and vehicle_age > 2:
                premium = base_prediction * 0.028
                adjusted_price += premium
                adjustments.append(('Low mileage premium', premium))
        
        # Ensure price doesn't go below minimum
        adjusted_price = max(adjusted_price, 180)
        
        return {
            'adjusted_price': adjusted_price,
            'base_price': base_prediction,
            'adjustments': adjustments,
            'total_adjustment': adjusted_price - base_prediction
        }

# Initialize rule-based system
rules_engine = VehiclePricingRules()

**HYBRID SYSTEM EVALUATION**

In [None]:

print("\n" + "="*70)
print("HYBRID SYSTEM EVALUATION (ML + Rules)")
print("="*70)

# Check if we have test predictions
if 'y_test_pred' not in locals():
    print("ERROR: Test predictions not found. Skipping hybrid evaluation.")
    print("Please ensure Part 10 (Model Evaluation) ran successfully.")
    # Set default values
    hybrid_r2 = test_r2
    hybrid_mae = test_mae
    hybrid_rmse = test_rmse
    hybrid_mape = test_mape
    rule_adjustments_list = np.zeros(len(y_test))
else:
    # Apply rules to test set predictions
    print("\nApplying rule-based adjustments to test set...")
    hybrid_predictions = []
    rule_adjustments_list = []

    for idx in range(len(y_test)):
        # Get the ML prediction
        ml_pred = y_test_pred[idx]
        
        # Get vehicle data
        test_idx = X_test.index[idx]
        vehicle_data = {
            'year': X_test.iloc[idx]['year'],
            'make': df_sample.loc[test_idx, 'make'],
            'model': df_sample.loc[test_idx, 'model'],
            'body': df_sample.loc[test_idx, 'body'],
            'transmission': df_sample.loc[test_idx, 'transmission'],
            'condition': X_test.iloc[idx]['condition'],
            'odometer': X_test.iloc[idx]['odometer']
        }
        
        # Apply rules
        result = rules_engine.adjust_prediction(ml_pred, vehicle_data)
        hybrid_predictions.append(result['adjusted_price'])
        rule_adjustments_list.append(result['total_adjustment'])

    hybrid_predictions = np.array(hybrid_predictions)
    rule_adjustments_list = np.array(rule_adjustments_list)

    # Calculate hybrid system metrics
    hybrid_r2 = r2_score(y_test, hybrid_predictions)
    hybrid_mae = mean_absolute_error(y_test, hybrid_predictions)
    hybrid_rmse = np.sqrt(mean_squared_error(y_test, hybrid_predictions))
    hybrid_mape = np.mean(np.abs((y_test - hybrid_predictions) / y_test)) * 100

    print("\nHYBRID SYSTEM PERFORMANCE:")
    print("=" * 70)
    print(f"{'Metric':<20} {'ML Only':<15} {'Hybrid (ML+Rules)':<20} {'Improvement':<15}")
    print("-" * 70)
    print(f"{'R² Score':<20} {test_r2:<15.4f} {hybrid_r2:<20.4f} {(hybrid_r2-test_r2)*100:+.2f}%")
    print(f"{'MAE ($)':<20} {test_mae:<15.2f} {hybrid_mae:<20.2f} {test_mae-hybrid_mae:+.2f}")
    print(f"{'RMSE ($)':<20} {test_rmse:<15.2f} {hybrid_rmse:<20.2f} {test_rmse-hybrid_rmse:+.2f}")
    print(f"{'MAPE (%)':<20} {test_mape:<15.2f} {hybrid_mape:<20.2f} {test_mape-hybrid_mape:+.2f}%")
    print("=" * 70)

    # Statistics on rule adjustments
    print("\nRULE ADJUSTMENT STATISTICS:")
    print("-" * 50)
    print(f"Average adjustment: ${np.mean(rule_adjustments_list):+,.2f}")
    print(f"Median adjustment: ${np.median(rule_adjustments_list):+,.2f}")
    print(f"Max positive adjustment: ${np.max(rule_adjustments_list):+,.2f}")
    print(f"Max negative adjustment: ${np.min(rule_adjustments_list):+,.2f}")
    print(f"% predictions adjusted: {np.sum(rule_adjustments_list != 0) / len(rule_adjustments_list) * 100:.1f}%")

    # Visualize hybrid system improvements
    try:
        fig, axes = plt.subplots(1, 2, figsize=(15, 6))

        # Plot 1: ML vs Hybrid comparison
        axes[0].scatter(y_test, y_test_pred, alpha=0.3, s=10, label='ML Only', color='blue')
        axes[0].scatter(y_test, hybrid_predictions, alpha=0.3, s=10, label='Hybrid', color='green')
        axes[0].plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
        axes[0].set_xlabel('Actual Price ($)')
        axes[0].set_ylabel('Predicted Price ($)')
        axes[0].set_title(f'ML vs Hybrid Predictions\nHybrid R²={hybrid_r2:.4f} vs ML R²={test_r2:.4f}')
        axes[0].legend()
        axes[0].grid(True, alpha=0.3)

        # Plot 2: Rule adjustments distribution
        axes[1].hist(rule_adjustments_list, bins=50, edgecolor='black', alpha=0.7)
        axes[1].axvline(x=0, color='r', linestyle='--', lw=2, label='No adjustment')
        axes[1].set_xlabel('Adjustment Amount ($)')
        axes[1].set_ylabel('Frequency')
        axes[1].set_title('Distribution of Rule-Based Adjustments')
        axes[1].legend()
        axes[1].grid(True, alpha=0.3)

        plt.tight_layout()
        plt.savefig('hybrid_system_analysis.png', dpi=300, bbox_inches='tight')
        print("\nHybrid system analysis plot saved as 'hybrid_system_analysis.png'")
        plt.show()
    except Exception as e:
        print(f"\nWarning: Could not generate hybrid system plots: {e}")


**EXAMPLE PREDICTIONS WITH HYBRID SYSTEM**

In [None]:

print("\n" + "="*70)
print("EXAMPLE PREDICTIONS - HYBRID SYSTEM")
print("="*70)

# Create example predictions
examples = [
    {'year': 2015, 'make': 'Ford', 'model': 'F-150', 'body': 'Truck', 
     'transmission': 'automatic', 'condition': 35, 'odometer': 50000},
    {'year': 2020, 'make': 'Toyota', 'model': 'Camry', 'body': 'Sedan',
     'transmission': 'automatic', 'condition': 40, 'odometer': 25000},
    {'year': 2015, 'make': 'Bmw', 'model': '3 Series', 'body': 'Sedan',
     'transmission': 'automatic', 'condition': 35, 'odometer': 85000}
]

print("\nComparing ML-only vs Hybrid predictions:")
print("=" * 90)

for i, ex in enumerate(examples, 1):
    # Prepare features
    ex_df = pd.DataFrame([ex])
    
    # Encode categoricals
    for col in categorical_cols:
        if ex[col] in label_encoders[col].classes_:
            ex_df[f'{col}_encoded'] = label_encoders[col].transform([ex[col]])[0]
        else:
            ex_df[f'{col}_encoded'] = 0
    
    # Make ML prediction
    ml_pred = model.predict(ex_df[feature_cols])[0]
    
    # Apply rules
    hybrid_result = rules_engine.adjust_prediction(ml_pred, ex)
    
    print(f"\nExample {i}: {ex['year']} {ex['make']} {ex['model']}")
    print(f"  Details: {ex['body']}, {ex['odometer']:,} miles, Condition {ex['condition']}/49")
    print(f"  ML Prediction:     ${ml_pred:>10,.2f}")
    
    if hybrid_result['adjustments']:
        print(f"  Rule Adjustments:")
        for rule_name, adjustment in hybrid_result['adjustments']:
            print(f"    - {rule_name}: ${adjustment:>+10,.2f}")
    else:
        print(f"  Rule Adjustments:  (none applied)")
    
    print(f"  Hybrid Price:      ${hybrid_result['adjusted_price']:>10,.2f}")
    print(f"  Total Adjustment:  ${hybrid_result['total_adjustment']:>+10,.2f}")
    print("-" * 90)

print("\n" + "="*70)
print("MODEL TRAINING COMPLETE!")
print("="*70)
print("\nFiles created:")
print("  - vehicle_price_model.pkl")
print("  - label_encoders.pkl")
print("  - feature_columns.pkl")
print("  - model_statistics.pkl (includes hybrid metrics)")
print("  - make_model_mapping.pkl (for UI dropdowns)")
print("  - feature_importance.png")
print("  - hybrid_system_analysis.png")
print("\nAI Techniques Implemented:")
print("  1. Statistical Learning (XGBoost) - Gradient Boosting")
print("  2. Rule-Based Expert System - Knowledge-Based AI")
print("\nHybrid System Performance:")
print(f"  ML-only R²: {test_r2:.4f}")
print(f"  Hybrid R²:  {hybrid_r2:.4f} (improvement: {(hybrid_r2-test_r2)*100:+.2f}%)")
print(f"  Rules applied to {np.sum(rule_adjustments_list != 0) / len(rule_adjustments_list) * 100:.1f}% of predictions")