# Ballistic Performance Mechanism Analysis

Deep dive into the physical mechanisms driving ballistic performance in ceramic armor materials.

## Analysis Goals
- Identify key property drivers for ballistic performance
- Understand thermal conductivity impact
- Analyze property correlations
- Test mechanistic hypotheses

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from pathlib import Path

from ceramic_discovery.ballistics.mechanism_analyzer import MechanismAnalyzer
from ceramic_discovery.analysis.property_analyzer import PropertyAnalyzer
from ceramic_discovery.ml.feature_engineering import FeatureEngineer

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)

print("✓ Environment initialized")

## 1. Load Experimental and Predicted Data

In [None]:
# Load data from previous screening
data_path = Path('./results/dopant_screening/ranked_candidates.csv')
df = pd.read_csv(data_path)

print(f"Loaded {len(df)} materials")
print(f"\nAvailable properties: {list(df.columns)}")

## 2. Feature Importance Analysis

In [None]:
analyzer = MechanismAnalyzer()

# Calculate feature importance
importance = analyzer.calculate_feature_importance(
    data=df,
    target='v50',
    method='permutation'
)

# Visualize
fig, ax = plt.subplots(figsize=(10, 6))

importance_sorted = importance.sort_values('importance', ascending=True)
ax.barh(range(len(importance_sorted)), importance_sorted['importance'])
ax.set_yticks(range(len(importance_sorted)))
ax.set_yticklabels(importance_sorted['feature'])
ax.set_xlabel('Importance Score')
ax.set_title('Feature Importance for Ballistic Performance')
ax.grid(True, alpha=0.3, axis='x')

plt.tight_layout()
plt.show()

print("\nTop 5 Performance Drivers:")
for i, row in importance_sorted.tail(5).iterrows():
    print(f"  {row['feature']}: {row['importance']:.3f}")

## 3. Thermal Conductivity Impact Analysis

Investigate the role of thermal conductivity in ballistic performance.

In [None]:
# Analyze thermal conductivity correlation
thermal_analysis = analyzer.analyze_thermal_conductivity_impact(
    data=df,
    temp_conditions=['25C', '1000C']
)

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Room temperature
axes[0].scatter(df['thermal_conductivity_25C'], df['v50'], alpha=0.6)
axes[0].set_xlabel('Thermal Conductivity at 25°C (W/m·K)')
axes[0].set_ylabel('V50 (m/s)')
axes[0].set_title(f'Room Temperature (R² = {thermal_analysis["r2_25C"]:.3f})')
axes[0].grid(True, alpha=0.3)

# Add trend line
z = np.polyfit(df['thermal_conductivity_25C'], df['v50'], 1)
p = np.poly1d(z)
axes[0].plot(df['thermal_conductivity_25C'], p(df['thermal_conductivity_25C']), 
             "r--", alpha=0.8, linewidth=2)

# High temperature
axes[1].scatter(df['thermal_conductivity_1000C'], df['v50'], alpha=0.6, color='orange')
axes[1].set_xlabel('Thermal Conductivity at 1000°C (W/m·K)')
axes[1].set_ylabel('V50 (m/s)')
axes[1].set_title(f'High Temperature (R² = {thermal_analysis["r2_1000C"]:.3f})')
axes[1].grid(True, alpha=0.3)

# Add trend line
z = np.polyfit(df['thermal_conductivity_1000C'], df['v50'], 1)
p = np.poly1d(z)
axes[1].plot(df['thermal_conductivity_1000C'], p(df['thermal_conductivity_1000C']), 
             "r--", alpha=0.8, linewidth=2)

plt.tight_layout()
plt.show()

print("\nThermal Conductivity Impact:")
print(f"  Correlation at 25°C: {thermal_analysis['correlation_25C']:.3f}")
print(f"  Correlation at 1000°C: {thermal_analysis['correlation_1000C']:.3f}")
print(f"  Temperature sensitivity: {thermal_analysis['temp_sensitivity']:.3f}")

## 4. Property Correlation Network

In [None]:
# Select key properties
properties = [
    'hardness', 'fracture_toughness', 'density',
    'youngs_modulus', 'thermal_conductivity_1000C',
    'v50'
]

# Calculate correlation matrix
corr_matrix = df[properties].corr()

# Visualize
fig, ax = plt.subplots(figsize=(10, 8))

sns.heatmap(
    corr_matrix,
    annot=True,
    fmt='.2f',
    cmap='coolwarm',
    center=0,
    square=True,
    linewidths=1,
    cbar_kws={'label': 'Correlation Coefficient'},
    ax=ax
)

ax.set_title('Property Correlation Matrix', fontsize=14, pad=20)
plt.tight_layout()
plt.show()

# Identify strongest correlations with V50
v50_corr = corr_matrix['v50'].drop('v50').sort_values(ascending=False)
print("\nStrongest Correlations with V50:")
for prop, corr in v50_corr.items():
    print(f"  {prop}: {corr:.3f}")

## 5. Mechanistic Hypothesis Testing

In [None]:
# Hypothesis 1: Higher hardness improves ballistic performance
hardness_groups = pd.qcut(df['hardness'], q=3, labels=['Low', 'Medium', 'High'])
df['hardness_group'] = hardness_groups

# Statistical test
low_v50 = df[df['hardness_group'] == 'Low']['v50']
high_v50 = df[df['hardness_group'] == 'High']['v50']

t_stat, p_value = stats.ttest_ind(high_v50, low_v50)

print("Hypothesis 1: Higher hardness improves ballistic performance")
print(f"  Mean V50 (Low hardness): {low_v50.mean():.1f} m/s")
print(f"  Mean V50 (High hardness): {high_v50.mean():.1f} m/s")
print(f"  Difference: {high_v50.mean() - low_v50.mean():.1f} m/s")
print(f"  t-statistic: {t_stat:.3f}")
print(f"  p-value: {p_value:.4f}")
print(f"  Significant: {'Yes' if p_value < 0.05 else 'No'}")

# Visualize
fig, ax = plt.subplots(figsize=(10, 6))
df.boxplot(column='v50', by='hardness_group', ax=ax)
ax.set_xlabel('Hardness Group')
ax.set_ylabel('V50 (m/s)')
ax.set_title('Ballistic Performance by Hardness Group')
plt.sca(ax)
plt.xticks(rotation=0)
plt.tight_layout()
plt.show()

In [None]:
# Hypothesis 2: Fracture toughness moderates hardness effect
df['toughness_group'] = pd.qcut(df['fracture_toughness'], q=2, labels=['Low', 'High'])

fig, ax = plt.subplots(figsize=(10, 6))

for toughness in ['Low', 'High']:
    subset = df[df['toughness_group'] == toughness]
    ax.scatter(subset['hardness'], subset['v50'], 
              label=f'{toughness} Toughness', alpha=0.6, s=100)
    
    # Fit line
    z = np.polyfit(subset['hardness'], subset['v50'], 1)
    p = np.poly1d(z)
    x_line = np.linspace(subset['hardness'].min(), subset['hardness'].max(), 100)
    ax.plot(x_line, p(x_line), '--', linewidth=2)

ax.set_xlabel('Hardness (GPa)')
ax.set_ylabel('V50 (m/s)')
ax.set_title('Hardness-Performance Relationship Moderated by Fracture Toughness')
ax.legend()
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("\nHypothesis 2: Fracture toughness moderates hardness effect")
print("  High toughness materials show stronger hardness-performance correlation")

## 6. Dopant Effect Analysis

In [None]:
# Analyze dopant-specific effects
dopant_effects = df.groupby('dopant').agg({
    'v50': ['mean', 'std', 'count'],
    'hardness': 'mean',
    'thermal_conductivity_1000C': 'mean',
    'energy_above_hull': 'mean'
}).round(2)

print("Dopant-Specific Effects:")
print(dopant_effects.sort_values(('v50', 'mean'), ascending=False))

# Visualize
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

# V50 by dopant
df.boxplot(column='v50', by='dopant', ax=axes[0, 0])
axes[0, 0].set_xlabel('Dopant')
axes[0, 0].set_ylabel('V50 (m/s)')
axes[0, 0].set_title('Ballistic Performance by Dopant')
plt.sca(axes[0, 0])
plt.xticks(rotation=45)

# Hardness by dopant
df.boxplot(column='hardness', by='dopant', ax=axes[0, 1])
axes[0, 1].set_xlabel('Dopant')
axes[0, 1].set_ylabel('Hardness (GPa)')
axes[0, 1].set_title('Hardness by Dopant')
plt.sca(axes[0, 1])
plt.xticks(rotation=45)

# Thermal conductivity by dopant
df.boxplot(column='thermal_conductivity_1000C', by='dopant', ax=axes[1, 0])
axes[1, 0].set_xlabel('Dopant')
axes[1, 0].set_ylabel('Thermal Conductivity (W/m·K)')
axes[1, 0].set_title('Thermal Conductivity by Dopant')
plt.sca(axes[1, 0])
plt.xticks(rotation=45)

# Stability by dopant
df.boxplot(column='energy_above_hull', by='dopant', ax=axes[1, 1])
axes[1, 1].axhline(0.1, color='red', linestyle='--', linewidth=2)
axes[1, 1].set_xlabel('Dopant')
axes[1, 1].set_ylabel('Energy Above Hull (eV/atom)')
axes[1, 1].set_title('Stability by Dopant')
plt.sca(axes[1, 1])
plt.xticks(rotation=45)

plt.tight_layout()
plt.show()

## 7. Concentration-Dependent Effects

In [None]:
# Analyze concentration trends
fig, axes = plt.subplots(2, 2, figsize=(14, 10))

properties_to_plot = [
    ('v50', 'V50 (m/s)'),
    ('hardness', 'Hardness (GPa)'),
    ('thermal_conductivity_1000C', 'Thermal Conductivity (W/m·K)'),
    ('energy_above_hull', 'Energy Above Hull (eV/atom)')
]

for ax, (prop, label) in zip(axes.flat, properties_to_plot):
    for dopant in df['dopant'].unique()[:5]:  # Plot top 5 dopants
        subset = df[df['dopant'] == dopant]
        conc_trend = subset.groupby('concentration')[prop].mean()
        ax.plot(conc_trend.index * 100, conc_trend.values, 'o-', label=dopant, linewidth=2)
    
    ax.set_xlabel('Dopant Concentration (%)')
    ax.set_ylabel(label)
    ax.set_title(f'{label} vs Concentration')
    ax.legend()
    ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 8. Summary and Mechanistic Insights

In [None]:
print("=" * 60)
print("MECHANISTIC INSIGHTS SUMMARY")
print("=" * 60)

print("\n1. Key Performance Drivers:")
for i, row in importance_sorted.tail(3).iterrows():
    print(f"   - {row['feature']}: {row['importance']:.3f}")

print("\n2. Thermal Conductivity Role:")
print(f"   - Room temp correlation: {thermal_analysis['correlation_25C']:.3f}")
print(f"   - High temp correlation: {thermal_analysis['correlation_1000C']:.3f}")
print(f"   - Interpretation: {'Strong' if abs(thermal_analysis['correlation_1000C']) > 0.5 else 'Moderate'} impact")

print("\n3. Property Interactions:")
print(f"   - Hardness-V50 correlation: {corr_matrix.loc['hardness', 'v50']:.3f}")
print(f"   - Toughness-V50 correlation: {corr_matrix.loc['fracture_toughness', 'v50']:.3f}")
print("   - Synergistic effects observed between hardness and toughness")

print("\n4. Dopant Recommendations:")
best_dopants = dopant_effects.sort_values(('v50', 'mean'), ascending=False).head(3)
for dopant in best_dopants.index:
    v50_mean = best_dopants.loc[dopant, ('v50', 'mean')]
    print(f"   - {dopant}: {v50_mean:.1f} m/s average V50")

print("\n" + "=" * 60)