# Global Foundries Wafer Manufacturing Optimization Dashboard

## 🎯 Project Overview
This notebook creates an interactive optimization dashboard for analyzing the **WM-811K wafer dataset** to improve semiconductor manufacturing processes for Global Foundries.

### Key Objectives:
- Analyze defect patterns and yield optimization opportunities
- Create interactive visualizations for manufacturing process monitoring
- Implement real-time tracking of optimization algorithms
- Provide actionable insights for process engineers

### Dataset Information:
- **WM-811K Dataset**: 811,457 wafer map images
- **8 Defect Classes**: Center, Donut, Edge-Loc, Edge-Ring, Loc, Random, Scratch, Near-full
- **Real Production Data**: 46,293 manufacturing lots from actual semiconductor fabs

## 📚 Section 1: Import Required Libraries
Import necessary libraries for data analysis, visualization, and optimization algorithms.

In [None]:
# Core Data Science Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Interactive Visualization
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.offline as pyo
pyo.init_notebook_mode(connected=True)

# Optimization Libraries
from scipy.optimize import minimize, differential_evolution, basinhopping
from scipy import stats
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

# Interactive Widgets
import ipywidgets as widgets
from IPython.display import display, clear_output, HTML
import threading
import time

# Image Processing (for wafer maps)
import cv2
from PIL import Image

# Set up plotting style
plt.style.use('default')
sns.set_palette("husl")

print("✅ All libraries imported successfully!")
print(f"📊 Plotly version: {px.__version__ if hasattr(px, '__version__') else 'Latest'}")
print(f"🐼 Pandas version: {pd.__version__}")
print(f"🔢 NumPy version: {np.__version__}")

## 🏭 Section 2: Generate Sample Wafer Manufacturing Data
Create realistic sample datasets representing wafer manufacturing processes, defect patterns, and optimization runs for Global Foundries analysis.

In [None]:
# Define Global Foundries manufacturing parameters
DEFECT_CLASSES = ["Center", "Donut", "Edge-Loc", "Edge-Ring", "Loc", "Random", "Scratch", "Near-full"]
PROCESS_PARAMETERS = ["temperature", "pressure", "etch_time", "deposition_rate", "chamber_flow"]

def generate_wafer_manufacturing_data(n_wafers=5000, n_lots=100):
    """Generate realistic wafer manufacturing data for Global Foundries analysis"""
    
    np.random.seed(42)  # For reproducible results
    
    # Generate lot information
    lot_ids = [f"GF_LOT_{i:04d}" for i in range(n_lots)]
    
    # Generate wafer data
    data = []
    
    for i in range(n_wafers):
        lot_id = np.random.choice(lot_ids)
        
        # Process parameters (realistic semiconductor values)
        temperature = np.random.normal(1050, 25)  # °C
        pressure = np.random.normal(10, 1.5)      # Torr
        etch_time = np.random.normal(120, 15)     # seconds
        deposition_rate = np.random.normal(50, 5) # Å/min
        chamber_flow = np.random.normal(200, 20)  # sccm
        
        # Defect pattern (weighted realistic distribution)
        defect_weights = [0.15, 0.12, 0.18, 0.15, 0.20, 0.10, 0.05, 0.05]
        defect_class = np.random.choice(DEFECT_CLASSES, p=defect_weights)
        
        # Yield calculation based on process parameters and defect type
        base_yield = 0.85
        temp_effect = -0.001 * abs(temperature - 1050)
        pressure_effect = -0.01 * abs(pressure - 10)
        defect_effect = {"Center": -0.15, "Donut": -0.20, "Edge-Loc": -0.10, 
                        "Edge-Ring": -0.12, "Loc": -0.08, "Random": -0.25,
                        "Scratch": -0.30, "Near-full": -0.40}[defect_class]
        
        yield_rate = base_yield + temp_effect + pressure_effect + defect_effect + np.random.normal(0, 0.05)
        yield_rate = max(0.0, min(1.0, yield_rate))  # Clamp between 0 and 1
        
        # Wafer position on lot (for spatial analysis)
        wafer_x = np.random.randint(0, 25)
        wafer_y = np.random.randint(0, 25)
        
        # Timestamp
        timestamp = datetime.now() - timedelta(days=np.random.randint(0, 365))
        
        data.append({
            'wafer_id': f"W_{i:06d}",
            'lot_id': lot_id,
            'defect_class': defect_class,
            'yield_rate': yield_rate,
            'temperature': temperature,
            'pressure': pressure,
            'etch_time': etch_time,
            'deposition_rate': deposition_rate,
            'chamber_flow': chamber_flow,
            'wafer_x': wafer_x,
            'wafer_y': wafer_y,
            'timestamp': timestamp
        })
    
    return pd.DataFrame(data)

def generate_optimization_history(objective_func, bounds, algorithm_name, max_iterations=100):
    """Generate optimization history for different algorithms"""
    
    history = {
        'iteration': [],
        'best_value': [],
        'current_value': [],
        'parameters': [],
        'algorithm': algorithm_name
    }
    
    # Simulate optimization progress
    best_so_far = float('inf')
    
    for i in range(max_iterations):
        # Generate random parameter values within bounds
        params = [np.random.uniform(bound[0], bound[1]) for bound in bounds]
        current_value = objective_func(params)
        
        # Update best value
        if current_value < best_so_far:
            best_so_far = current_value
        
        history['iteration'].append(i)
        history['best_value'].append(best_so_far)
        history['current_value'].append(current_value)
        history['parameters'].append(params.copy())
    
    return pd.DataFrame(history)

# Generate main dataset
print("🏭 Generating Global Foundries wafer manufacturing data...")
wafer_data = generate_wafer_manufacturing_data(n_wafers=5000, n_lots=100)

print(f"✅ Generated {len(wafer_data)} wafer records")
print(f"📊 Covering {wafer_data['lot_id'].nunique()} manufacturing lots")
print(f"🎯 Average yield: {wafer_data['yield_rate'].mean():.3f}")

# Display sample data
display(wafer_data.head(10))

## 📊 Section 3: Create Interactive Performance Metrics Dashboard
Build interactive widgets and plots to display key performance indicators for Global Foundries manufacturing optimization.

In [None]:
def create_performance_metrics_dashboard(data):
    """Create interactive dashboard for Global Foundries manufacturing KPIs"""
    
    # Calculate key metrics
    overall_yield = data['yield_rate'].mean()
    yield_std = data['yield_rate'].std()
    defect_rate = (data['yield_rate'] < 0.8).mean()
    high_yield_rate = (data['yield_rate'] > 0.9).mean()
    
    # Create subplots
    fig = make_subplots(
        rows=2, cols=3,
        subplot_titles=('Yield Distribution', 'Defect Class Analysis', 'Process Temperature vs Yield',
                       'Yield by Lot', 'Temporal Trends', 'Process Optimization Opportunities'),
        specs=[[{"type": "histogram"}, {"type": "bar"}, {"type": "scatter"}],
               [{"type": "box"}, {"type": "scatter"}, {"type": "heatmap"}]]
    )
    
    # 1. Yield Distribution
    fig.add_trace(
        go.Histogram(x=data['yield_rate'], nbinsx=30, name='Yield Distribution',
                    marker_color='lightblue', opacity=0.7),
        row=1, col=1
    )
    
    # 2. Defect Class Analysis
    defect_counts = data['defect_class'].value_counts()
    fig.add_trace(
        go.Bar(x=defect_counts.index, y=defect_counts.values, name='Defect Frequency',
               marker_color='coral'),
        row=1, col=2
    )
    
    # 3. Process Temperature vs Yield
    fig.add_trace(
        go.Scatter(x=data['temperature'], y=data['yield_rate'], mode='markers',
                  name='Temp vs Yield', marker=dict(color='green', size=4, opacity=0.6)),
        row=1, col=3
    )
    
    # 4. Yield by Lot (sample of lots)
    lot_yields = data.groupby('lot_id')['yield_rate'].mean().head(20)
    fig.add_trace(
        go.Box(y=lot_yields.values, name='Lot Yields'),
        row=2, col=1
    )
    
    # 5. Temporal Trends
    daily_yield = data.groupby(data['timestamp'].dt.date)['yield_rate'].mean().reset_index()
    fig.add_trace(
        go.Scatter(x=daily_yield['timestamp'], y=daily_yield['yield_rate'],
                  mode='lines+markers', name='Daily Yield Trend',
                  line=dict(color='purple', width=2)),
        row=2, col=2
    )
    
    # 6. Correlation Heatmap Data Preparation
    process_cols = ['temperature', 'pressure', 'etch_time', 'deposition_rate', 'chamber_flow', 'yield_rate']
    corr_matrix = data[process_cols].corr()
    
    fig.add_trace(
        go.Heatmap(z=corr_matrix.values, x=corr_matrix.columns, y=corr_matrix.columns,
                  colorscale='RdBu', zmid=0, name='Process Correlation'),
        row=2, col=3
    )
    
    # Update layout
    fig.update_layout(
        height=800,
        showlegend=False,
        title_text=f"🏭 Global Foundries Manufacturing Performance Dashboard<br>" +
                  f"Overall Yield: {overall_yield:.1%} | Defect Rate: {defect_rate:.1%} | " +
                  f"High Yield Rate: {high_yield_rate:.1%}",
        title_x=0.5
    )
    
    return fig

# Create and display the dashboard
print("📊 Creating Global Foundries Performance Metrics Dashboard...")
performance_fig = create_performance_metrics_dashboard(wafer_data)
performance_fig.show()

# Create KPI summary widgets
def create_kpi_widgets(data):
    """Create interactive KPI widgets"""
    
    # Calculate metrics
    metrics = {
        'Overall Yield': f"{data['yield_rate'].mean():.1%}",
        'Total Wafers': f"{len(data):,}",
        'Defect Rate': f"{(data['yield_rate'] < 0.8).mean():.1%}",
        'Best Lot Yield': f"{data.groupby('lot_id')['yield_rate'].mean().max():.1%}",
        'Avg Temperature': f"{data['temperature'].mean():.1f}°C",
        'Yield Std Dev': f"{data['yield_rate'].std():.3f}"
    }
    
    # Create widgets
    widgets_list = []
    for metric, value in metrics.items():
        widget = widgets.HTML(
            value=f"""
            <div style="
                background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
                color: white;
                padding: 15px;
                margin: 5px;
                border-radius: 10px;
                text-align: center;
                box-shadow: 0 4px 6px rgba(0,0,0,0.1);
                min-width: 150px;
            ">
                <h3 style="margin: 0; font-size: 18px;">{metric}</h3>
                <p style="margin: 5px 0 0 0; font-size: 24px; font-weight: bold;">{value}</p>
            </div>
            """
        )
        widgets_list.append(widget)
    
    return widgets.HBox(widgets_list[:3]), widgets.HBox(widgets_list[3:])

# Display KPI widgets
print("🎯 Creating KPI Widgets...")
kpi_row1, kpi_row2 = create_kpi_widgets(wafer_data)
display(kpi_row1)
display(kpi_row2)