# LKM 2 - Perbandingan Fungsi Aktivasi

## Tujuan Pembelajaran
- Memahami berbagai jenis fungsi aktivasi
- Membandingkan karakteristik ReLU, Sigmoid, dan Tanh
- Menganalisis kelebihan dan kekurangan masing-masing fungsi
- Memvisualisasikan perbedaan dalam konteks neural networks

In [None]:
# Import library
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
plt.rcParams['figure.figsize'] = (12, 8)
plt.rcParams['font.size'] = 12

print("✅ Library berhasil diimport!")

## 1. Definisi Fungsi Aktivasi

Mari kita definisikan berbagai fungsi aktivasi yang umum digunakan:

In [None]:
# Implementasi sesuai LKM
def relu(x):
    """ReLU: Rectified Linear Unit"""
    return np.maximum(0, x)

def sigmoid(x):
    """Sigmoid function"""
    return 1 / (1 + np.exp(-np.clip(x, -500, 500)))  # clip untuk stabilitas numerik

def tanh(x):
    """Hyperbolic tangent"""
    return np.tanh(x)

def leaky_relu(x, alpha=0.01):
    """Leaky ReLU"""
    return np.where(x > 0, x, alpha * x)

def elu(x, alpha=1.0):
    """Exponential Linear Unit"""
    return np.where(x > 0, x, alpha * (np.exp(x) - 1))

def swish(x):
    """Swish activation function"""
    return x * sigmoid(x)

# Test functions
test_input = np.array([-2, -1, 0, 1, 2])
print("=== TEST FUNGSI AKTIVASI ===")
print(f"Input: {test_input}")
print(f"ReLU: {relu(test_input)}")
print(f"Sigmoid: {sigmoid(test_input)}")
print(f"Tanh: {tanh(test_input)}")
print(f"Leaky ReLU: {leaky_relu(test_input)}")
print(f"ELU: {elu(test_input)}")
print(f"Swish: {swish(test_input)}")

## 2. Visualisasi Perbandingan Fungsi Aktivasi

Sesuai dengan kode dalam LKM, mari kita buat visualisasi perbandingan:

In [None]:
# Sesuai kode LKM
import matplotlib.pyplot as plt

z = np.linspace(-5, 5, 100)

# Definisi fungsi aktivasi
relu_vals = lambda x: np.maximum(0, x)
sigmoid_vals = lambda x: 1 / (1 + np.exp(-x))

plt.figure(figsize=(15, 10))

# Plot sesuai LKM
plt.subplot(2, 3, 1)
plt.plot(z, relu_vals(z), label="ReLU", linewidth=3, color='red')
plt.plot(z, sigmoid_vals(z), label="Sigmoid", linewidth=3, color='blue')
plt.legend(fontsize=12)
plt.title("Perbandingan Fungsi Aktivasi (LKM)", fontsize=14, fontweight='bold')
plt.xlabel('Input (z)')
plt.ylabel('Output')
plt.grid(True, alpha=0.3)

# Tambahan: Semua fungsi aktivasi
plt.subplot(2, 3, 2)
plt.plot(z, relu(z), label="ReLU", linewidth=2.5)
plt.plot(z, sigmoid(z), label="Sigmoid", linewidth=2.5)
plt.plot(z, tanh(z), label="Tanh", linewidth=2.5)
plt.plot(z, leaky_relu(z), label="Leaky ReLU", linewidth=2.5)
plt.legend()
plt.title("Fungsi Aktivasi Utama")
plt.xlabel('Input (z)')
plt.ylabel('Output')
plt.grid(True, alpha=0.3)

# Zoom pada area kritis (-2 to 2)
plt.subplot(2, 3, 3)
z_zoom = np.linspace(-2, 2, 100)
plt.plot(z_zoom, relu(z_zoom), label="ReLU", linewidth=3)
plt.plot(z_zoom, sigmoid(z_zoom), label="Sigmoid", linewidth=3)
plt.plot(z_zoom, tanh(z_zoom), label="Tanh", linewidth=3)
plt.legend()
plt.title("Zoom Area Kritis (-2 to 2)")
plt.xlabel('Input (z)')
plt.ylabel('Output')
plt.grid(True, alpha=0.3)

# Turunan fungsi aktivasi
def sigmoid_derivative(x):
    s = sigmoid(x)
    return s * (1 - s)

def tanh_derivative(x):
    return 1 - np.tanh(x)**2

def relu_derivative(x):
    return (x > 0).astype(float)

plt.subplot(2, 3, 4)
plt.plot(z, sigmoid_derivative(z), label="Sigmoid'", linewidth=2.5)
plt.plot(z, tanh_derivative(z), label="Tanh'", linewidth=2.5)
plt.plot(z, relu_derivative(z), label="ReLU'", linewidth=2.5)
plt.legend()
plt.title("Turunan Fungsi Aktivasi")
plt.xlabel('Input (z)')
plt.ylabel('Derivative')
plt.grid(True, alpha=0.3)

# Perbandingan range
plt.subplot(2, 3, 5)
activation_ranges = {
    'ReLU': '[0, ∞)',
    'Sigmoid': '(0, 1)',
    'Tanh': '(-1, 1)',
    'Leaky ReLU': '(-∞, ∞)',
    'ELU': '(-α, ∞)'
}

y_pos = np.arange(len(activation_ranges))
colors = ['red', 'blue', 'green', 'orange', 'purple']

for i, (name, range_val) in enumerate(activation_ranges.items()):
    plt.barh(i, 1, color=colors[i], alpha=0.7)
    plt.text(0.5, i, f"{name}: {range_val}", ha='center', va='center', fontweight='bold')

plt.yticks(y_pos, list(activation_ranges.keys()))
plt.xlabel('Range Comparison')
plt.title('Output Ranges')
plt.xlim(0, 1)

# Karakteristik utama
plt.subplot(2, 3, 6)
characteristics = [
    "ReLU: Fast, No saturation (+), Dead neurons (-)",
    "Sigmoid: Smooth, Probabilistic (+), Vanishing gradient (-)", 
    "Tanh: Zero-centered (+), Still saturates (-)",
    "Leaky ReLU: No dead neurons (+), Small negative slope",
    "ELU: Smooth, Negative values (+), Computational cost (-)"
]

for i, char in enumerate(characteristics):
    plt.text(0.05, 0.9 - i*0.18, char, fontsize=10, transform=plt.gca().transAxes,
            bbox=dict(boxstyle="round,pad=0.3", facecolor=colors[i], alpha=0.3))

plt.xlim(0, 1)
plt.ylim(0, 1)
plt.axis('off')
plt.title('Karakteristik Utama')

plt.tight_layout()
plt.show()

## 3. Analisis Mendalam Masing-masing Fungsi

Mari kita analisis secara detail karakteristik masing-masing fungsi aktivasi:

In [None]:
# Analisis detail setiap fungsi aktivasi

def analyze_activation_function(func, name, x_range=(-5, 5)):
    """Analisis mendalam fungsi aktivasi"""
    x = np.linspace(x_range[0], x_range[1], 1000)
    y = func(x)
    
    # Statistik dasar
    min_val = np.min(y)
    max_val = np.max(y)
    mean_val = np.mean(y)
    std_val = np.std(y)
    
    # Gradient (turunan numerik)
    gradient = np.gradient(y, x)
    max_gradient = np.max(gradient)
    min_gradient = np.min(gradient)
    
    # Saturasi (daerah dengan gradient < 0.01)
    saturation_points = np.sum(np.abs(gradient) < 0.01)
    saturation_ratio = saturation_points / len(x)
    
    print(f"\n=== ANALISIS {name.upper()} ===")
    print(f"Range nilai: [{min_val:.3f}, {max_val:.3f}]")
    print(f"Mean: {mean_val:.3f}, Std: {std_val:.3f}")
    print(f"Gradient range: [{min_gradient:.3f}, {max_gradient:.3f}]")
    print(f"Rasio saturasi: {saturation_ratio:.1%}")
    
    return {
        'name': name,
        'min': min_val,
        'max': max_val,
        'mean': mean_val,
        'std': std_val,
        'max_grad': max_gradient,
        'saturation': saturation_ratio
    }

# Analisis semua fungsi
functions = [
    (relu, "ReLU"),
    (sigmoid, "Sigmoid"), 
    (tanh, "Tanh"),
    (leaky_relu, "Leaky ReLU"),
    (elu, "ELU")
]

analysis_results = []
for func, name in functions:
    result = analyze_activation_function(func, name)
    analysis_results.append(result)

# Buat tabel perbandingan
df = pd.DataFrame(analysis_results)
print("\n" + "="*80)
print("                      TABEL PERBANDINGAN")
print("="*80)
print(df.round(3).to_string(index=False))

## 4. Eksperimen: Pengaruh Fungsi Aktivasi pada Learning

Mari kita lihat bagaimana berbagai fungsi aktivasi mempengaruhi proses pembelajaran:

In [None]:
# Simulasi simple learning dengan berbagai aktivasi

class SimpleNeuron:
    def __init__(self, activation_func, activation_name):
        self.activation_func = activation_func
        self.activation_name = activation_name
        self.weight = np.random.normal(0, 0.1)
        self.bias = np.random.normal(0, 0.1)
        self.history = {'weights': [], 'outputs': [], 'gradients': []}
    
    def forward(self, x):
        z = self.weight * x + self.bias
        a = self.activation_func(z)
        
        # Simpan untuk analisis
        self.history['weights'].append(self.weight)
        self.history['outputs'].append(a)
        
        return z, a
    
    def simulate_learning(self, x_values):
        """Simulasi proses learning"""
        outputs = []
        gradients = []
        
        for x in x_values:
            z, a = self.forward(x)
            outputs.append(a)
            
            # Simulasi gradient (turunan numerik sederhana)
            eps = 1e-7
            z_plus = self.weight * x + self.bias + eps
            z_minus = self.weight * x + self.bias - eps
            
            grad = (self.activation_func(z_plus) - self.activation_func(z_minus)) / (2 * eps)
            gradients.append(grad)
            
            # Update weight (simulasi SGD sederhana)
            learning_rate = 0.01
            self.weight += learning_rate * grad * 0.1  # update kecil
        
        return outputs, gradients

# Test dengan berbagai input
x_test = np.linspace(-3, 3, 50)

# Buat neuron dengan berbagai aktivasi
neurons = {
    'ReLU': SimpleNeuron(relu, 'ReLU'),
    'Sigmoid': SimpleNeuron(sigmoid, 'Sigmoid'),
    'Tanh': SimpleNeuron(tanh, 'Tanh')
}

# Simulasi learning
plt.figure(figsize=(15, 10))

for i, (name, neuron) in enumerate(neurons.items()):
    outputs, gradients = neuron.simulate_learning(x_test)
    
    # Plot outputs
    plt.subplot(2, 3, i+1)
    plt.plot(x_test, outputs, label=f'{name} Output', linewidth=2)
    plt.title(f'{name} - Outputs')
    plt.xlabel('Input')
    plt.ylabel('Output')
    plt.grid(True, alpha=0.3)
    plt.legend()
    
    # Plot gradients
    plt.subplot(2, 3, i+4)
    plt.plot(x_test, gradients, label=f'{name} Gradient', linewidth=2, color='red')
    plt.title(f'{name} - Gradients')
    plt.xlabel('Input')
    plt.ylabel('Gradient')
    plt.grid(True, alpha=0.3)
    plt.legend()
    
    # Analisis gradient
    avg_grad = np.mean(np.abs(gradients))
    min_grad = np.min(gradients)
    max_grad = np.max(gradients)
    
    print(f"\n{name} - Analisis Gradient:")
    print(f"  Average |gradient|: {avg_grad:.4f}")
    print(f"  Min gradient: {min_grad:.4f}")
    print(f"  Max gradient: {max_grad:.4f}")
    print(f"  Gradient variance: {np.var(gradients):.4f}")

plt.tight_layout()
plt.show()

## 5. Kapan Menggunakan Fungsi Aktivasi Tertentu?

Berikut adalah panduan pemilihan fungsi aktivasi berdasarkan konteks:

In [None]:
# Panduan pemilihan fungsi aktivasi

def create_activation_guide():
    """Membuat panduan pemilihan fungsi aktivasi"""
    
    guide_data = {
        'Fungsi': ['ReLU', 'Sigmoid', 'Tanh', 'Leaky ReLU', 'ELU', 'Swish'],
        'Best Use Case': [
            'Hidden layers, CNN, general purpose',
            'Binary classification output, probability',
            'Hidden layers when zero-centered needed',
            'When dead neurons are problem',
            'When smooth negatives needed',
            'Deep networks, recent architectures'
        ],
        'Pros': [
            'Fast, no saturation, sparse activation',
            'Smooth, probabilistic output',
            'Zero-centered, stronger gradients than sigmoid',
            'No dead neurons, allows negative values',
            'Smooth, no sharp changes at zero',
            'Self-gating, works well in practice'
        ],
        'Cons': [
            'Dead neurons, not zero-centered',
            'Vanishing gradient, saturated outputs',
            'Still saturates, vanishing gradient',
            'Small gradient for negatives',
            'Computational cost, exponential',
            'More complex, computational overhead'
        ],
        'Gradient Issues': [
            'Zero gradient for negative inputs',
            'Very small gradients at extremes',
            'Small gradients at extremes',
            'Small but non-zero negative gradients',
            'Better gradient flow than ReLU',
            'Generally good gradient properties'
        ]
    }
    
    df_guide = pd.DataFrame(guide_data)
    return df_guide

guide = create_activation_guide()

print("="*100)
print("                           PANDUAN PEMILIHAN FUNGSI AKTIVASI")
print("="*100)

for i, row in guide.iterrows():
    print(f"\n🔹 {row['Fungsi'].upper()}:")
    print(f"   📋 Use Case: {row['Best Use Case']}")
    print(f"   ✅ Pros: {row['Pros']}")
    print(f"   ❌ Cons: {row['Cons']}")
    print(f"   📈 Gradient: {row['Gradient Issues']}")

print("\n" + "="*100)
print("💡 REKOMENDASI UMUM:")
print("   • Mulai dengan ReLU untuk hidden layers")
print("   • Gunakan Sigmoid untuk binary classification output")
print("   • Pertimbangkan Tanh jika perlu zero-centered")
print("   • Coba Leaky ReLU jika mengalami dead neurons")
print("   • Eksperimen dengan ELU/Swish untuk model kompleks")
print("="*100)

## 6. Visualisasi Interaktif Decision Boundaries

Mari kita lihat bagaimana berbagai fungsi aktivasi mempengaruhi decision boundary:

In [None]:
# Visualisasi decision boundaries dengan berbagai aktivasi

def compare_decision_boundaries():
    """Membandingkan decision boundaries dengan berbagai fungsi aktivasi"""
    
    # Setup data
    x1, x2 = np.meshgrid(np.linspace(-3, 3, 100), np.linspace(-3, 3, 100))
    inputs = np.stack([x1.flatten(), x2.flatten()], axis=1)
    
    # Parameter neuron
    w = np.array([1, -0.5])
    b = 0.2
    
    # Hitung z untuk semua input
    z = inputs @ w + b
    
    # Apply berbagai fungsi aktivasi
    activations = {
        'ReLU': relu(z),
        'Sigmoid': sigmoid(z),
        'Tanh': tanh(z),
        'Leaky ReLU': leaky_relu(z)
    }
    
    # Plot
    fig, axes = plt.subplots(2, 2, figsize=(15, 12))
    axes = axes.flatten()
    
    for i, (name, output) in enumerate(activations.items()):
        ax = axes[i]
        
        # Reshape untuk plotting
        output_2d = output.reshape(x1.shape)
        
        # Contour plot
        contour = ax.contourf(x1, x2, output_2d, levels=20, alpha=0.7, cmap='RdYlBu')
        
        # Decision boundary (output = threshold)
        if name == 'Sigmoid':
            threshold = 0.5
        elif name == 'Tanh':
            threshold = 0.0
        else:  # ReLU, Leaky ReLU
            threshold = 0.5 * np.max(output)  # 50% of max
        
        ax.contour(x1, x2, output_2d, levels=[threshold], colors='black', linewidths=3)
        
        # Sample points
        sample_points = np.array([[-2, -1], [-1, 2], [1, -1], [2, 2]])
        for point in sample_points:
            z_point = point @ w + b
            if name == 'ReLU':
                output_point = relu(z_point)
            elif name == 'Sigmoid':
                output_point = sigmoid(z_point)
            elif name == 'Tanh':
                output_point = tanh(z_point)
            else:  # Leaky ReLU
                output_point = leaky_relu(z_point)
            
            color = 'red' if output_point > threshold else 'blue'
            ax.scatter(point[0], point[1], c=color, s=100, edgecolor='black', zorder=5)
            ax.annotate(f'{output_point:.2f}', point, xytext=(5, 5), 
                       textcoords='offset points', fontsize=8, 
                       bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.8))
        
        ax.set_title(f'{name} Activation', fontsize=14, fontweight='bold')
        ax.set_xlabel('x1')
        ax.set_ylabel('x2')
        ax.grid(True, alpha=0.3)
        
        # Colorbar
        plt.colorbar(contour, ax=ax, label='Output Value')
    
    plt.tight_layout()
    plt.show()
    
    # Analisis perbedaan
    print("\n📊 ANALISIS DECISION BOUNDARIES:")
    print("\n🔹 ReLU:")
    print("   • Boundary tajam (step function)")
    print("   • Hanya region positif yang aktif")
    print("   • Suitable untuk sparse representations")
    
    print("\n🔹 Sigmoid:")
    print("   • Boundary halus dan gradual")
    print("   • Output probabilistic (0-1)")
    print("   • Good untuk binary classification")
    
    print("\n🔹 Tanh:")
    print("   • Boundary halus, zero-centered")
    print("   • Output range (-1, 1)")
    print("   • Better gradient flow than sigmoid")
    
    print("\n🔹 Leaky ReLU:")
    print("   • Boundary tajam dengan small negative slope")
    print("   • Prevents dead neurons")
    print("   • Maintains some information in negative region")

compare_decision_boundaries()

## 7. Kesimpulan dan Rekomendasi

Berdasarkan semua analisis yang telah dilakukan:

In [None]:
# Ringkasan dan kesimpulan final

print("="*80)
print("              KESIMPULAN PERBANDINGAN FUNGSI AKTIVASI")
print("="*80)

conclusions = [
    "\n🎯 TEMUAN UTAMA:",
    "   1. ReLU adalah pilihan default yang baik untuk kebanyakan kasus",
    "   2. Sigmoid ideal untuk output layer classification (probabilitas)",
    "   3. Tanh memberikan gradients yang lebih kuat dibanding sigmoid",
    "   4. Leaky ReLU mengatasi masalah dead neurons pada ReLU",
    "   5. Pemilihan aktivasi bergantung pada arsitektur dan data",
    
    "\n🔍 INSIGHT PENTING:",
    "   • Vanishing gradient: Sigmoid & Tanh bermasalah di deep networks",
    "   • Dead neurons: ReLU bisa 'mati' jika input selalu negatif",
    "   • Zero-centering: Tanh lebih baik untuk hidden layers",
    "   • Computational efficiency: ReLU paling cepat",
    "   • Gradient flow: ReLU variants umumnya lebih baik",
    
    "\n📋 REKOMENDASI PRAKTIS:",
    "   🚀 START: Gunakan ReLU untuk hidden layers",
    "   🎯 OUTPUT: Sigmoid (binary), Softmax (multiclass)", 
    "   🔧 TROUBLESHOOT: Leaky ReLU jika ada dead neurons",
    "   ⚡ EXPERIMENT: ELU/Swish untuk fine-tuning performance",
    "   📊 MONITOR: Selalu pantau gradient flow dan dead neurons",
    
    "\n🎓 PEMBELAJARAN DARI LKM:",
    "   • Fungsi aktivasi menentukan karakteristik output neuron",
    "   • Pemilihan yang tepat crucial untuk performance model",
    "   • Trade-off antara simplicity, speed, dan effectiveness",
    "   • Visualisasi membantu memahami behavior function",
    "   • Eksperimen empiris sering lebih baik dari teori"
]

for conclusion in conclusions:
    print(conclusion)

print("\n" + "="*80)
print("💡 NEXT STEPS: Implementasi dalam neural networks dan evaluasi empiris!")
print("="*80)

# Generate summary table untuk save
summary_data = {
    'Activation': ['ReLU', 'Sigmoid', 'Tanh', 'Leaky ReLU'],
    'Range': ['[0, ∞)', '(0, 1)', '(-1, 1)', '(-∞, ∞)'],
    'Best_Use': ['Hidden layers', 'Binary output', 'Hidden layers', 'Replace ReLU'],
    'Pros': ['Fast, sparse', 'Probabilistic', 'Zero-centered', 'No dead neurons'],
    'Cons': ['Dead neurons', 'Vanishing grad', 'Still saturates', 'Small neg grad']
}

summary_df = pd.DataFrame(summary_data)
print("\n📊 SUMMARY TABLE:")
print(summary_df.to_string(index=False))

# Save hasil ke CSV untuk dokumentasi
summary_df.to_csv('/home/juni/Praktikum/deep-learning/dl-lkm-1/results/activation_comparison.csv', index=False)
print(f"\n✅ Summary saved to: results/activation_comparison.csv")