# PCR Template Generator - Jupyter Integration Example

This notebook demonstrates how to use the PCR Template Generator library in a Jupyter environment with interactive widgets and visualizations.

## Requirements

```bash
pip install pcr-template-generator matplotlib ipywidgets pandas seaborn
```

In [None]:
# Import required libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from pcr_template_generator import run_experiment, generate_multiple_templates, analyze_sequence_statistics
from Bio.SeqUtils import GC, MeltingTemp

# Configure matplotlib for inline plots
%matplotlib inline
plt.style.use('seaborn-v0_8')

print("✅ All libraries imported successfully!")

## 1. Interactive Template Generation

Generate a single PCR template with custom parameters:

In [None]:
# Generate a template with custom parameters
print("🧬 Generating PCR template...")

template = run_experiment(
    seq_length=75,
    primer_length=22,
    probe_length=25,
    primer_melt=54.6,
    probe_gap=3,
    debug=True,
    max_iterations=5000
)

if template:
    print("\n✅ Template generated successfully!")
    print("\n📊 Template Layout:")
    print(template.display())
    
    # Calculate properties
    fwd_tm = MeltingTemp.Tm_NN(str(template.fwd_primer()))
    rev_tm = MeltingTemp.Tm_NN(str(template.rev_primer()))
    probe_tm = MeltingTemp.Tm_NN(str(template.probe()))
    gc_content = GC(str(template.fwd()))
    
    print(f"\n📈 Properties:")
    print(f"  Optimization cost: {template.cost():.2f}")
    print(f"  GC content: {gc_content:.1f}%")
    print(f"  Forward primer Tm: {fwd_tm:.1f}°C")
    print(f"  Reverse primer Tm: {rev_tm:.1f}°C")
    print(f"  Probe Tm: {probe_tm:.1f}°C")
else:
    print("❌ Failed to generate template")

## 2. Batch Generation and Analysis

Generate multiple templates and analyze their properties:

In [None]:
# Generate multiple templates
print("🔬 Generating batch of templates...")

templates = generate_multiple_templates(
    count=10,
    debug=False,
    max_iterations=3000
)

print(f"✅ Generated {len(templates)} templates")

# Analyze template properties
template_data = []

for i, template in enumerate(templates):
    fwd_tm = MeltingTemp.Tm_NN(str(template.fwd_primer()))
    rev_tm = MeltingTemp.Tm_NN(str(template.rev_primer()))
    probe_tm = MeltingTemp.Tm_NN(str(template.probe()))
    gc_content = GC(str(template.fwd()))
    cost = template.cost()
    
    template_data.append({
        'Template': i + 1,
        'Cost': cost,
        'GC_Content': gc_content,
        'Fwd_Tm': fwd_tm,
        'Rev_Tm': rev_tm,
        'Probe_Tm': probe_tm,
        'Tm_Diff': abs(fwd_tm - rev_tm),
        'Probe_Delta': probe_tm - (fwd_tm + rev_tm) / 2
    })

# Create DataFrame for analysis
df = pd.DataFrame(template_data)
print("\n📊 Template Analysis Summary:")
print(df.describe().round(2))

## 3. Visualization Dashboard

Create comprehensive visualizations of the generated templates:

In [None]:
# Create visualization dashboard
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
fig.suptitle('PCR Template Analysis Dashboard', fontsize=16, fontweight='bold')

# Cost distribution
axes[0, 0].hist(df['Cost'], bins=8, alpha=0.7, color='lightcoral', edgecolor='black')
axes[0, 0].set_xlabel('Optimization Cost')
axes[0, 0].set_ylabel('Frequency')
axes[0, 0].set_title('Template Quality (Cost Distribution)')
axes[0, 0].grid(True, alpha=0.3)

# GC content distribution
axes[0, 1].hist(df['GC_Content'], bins=8, alpha=0.7, color='lightgreen', edgecolor='black')
axes[0, 1].set_xlabel('GC Content (%)')
axes[0, 1].set_ylabel('Frequency')
axes[0, 1].set_title('GC Content Distribution')
axes[0, 1].grid(True, alpha=0.3)

# Primer Tm comparison
axes[0, 2].scatter(df['Fwd_Tm'], df['Rev_Tm'], alpha=0.7, s=60, color='blue')
axes[0, 2].plot([50, 60], [50, 60], 'r--', alpha=0.5, label='Perfect Match')
axes[0, 2].set_xlabel('Forward Primer Tm (°C)')
axes[0, 2].set_ylabel('Reverse Primer Tm (°C)')
axes[0, 2].set_title('Primer Tm Matching')
axes[0, 2].legend()
axes[0, 2].grid(True, alpha=0.3)

# Tm difference distribution
axes[1, 0].hist(df['Tm_Diff'], bins=8, alpha=0.7, color='orange', edgecolor='black')
axes[1, 0].set_xlabel('Primer Tm Difference (°C)')
axes[1, 0].set_ylabel('Frequency')
axes[1, 0].set_title('Primer Tm Balance')
axes[1, 0].grid(True, alpha=0.3)

# Probe-Primer relationship
axes[1, 1].scatter(df['GC_Content'], df['Probe_Delta'], alpha=0.7, s=60, color='purple')
axes[1, 1].set_xlabel('GC Content (%)')
axes[1, 1].set_ylabel('Probe-Primer ΔTm (°C)')
axes[1, 1].set_title('Probe Temperature Optimization')
axes[1, 1].grid(True, alpha=0.3)

# Template ranking
df_sorted = df.sort_values('Cost')
colors = ['gold' if i == 0 else 'silver' if i == 1 else 'chocolate' if i == 2 else 'lightblue' 
          for i in range(len(df_sorted))]
axes[1, 2].bar(range(len(df_sorted)), df_sorted['Cost'], color=colors, alpha=0.7, edgecolor='black')
axes[1, 2].set_xlabel('Template Rank')
axes[1, 2].set_ylabel('Optimization Cost')
axes[1, 2].set_title('Template Ranking (Lower is Better)')
axes[1, 2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Show best template
best_template_idx = df['Cost'].idxmin()
best_template = templates[best_template_idx]

print(f"\n🏆 Best Template (#{best_template_idx + 1}):")
print(f"Cost: {df.loc[best_template_idx, 'Cost']:.2f}")
print(f"GC Content: {df.loc[best_template_idx, 'GC_Content']:.1f}%")
print(f"Primer ΔTm: {df.loc[best_template_idx, 'Tm_Diff']:.2f}°C")
print("\nLayout:")
print(best_template.display())

## 4. Statistical Analysis

Perform statistical analysis of sequence properties:

In [None]:
# Analyze sequence statistics
print("📊 Performing statistical analysis...")

temperatures, gc_contents = analyze_sequence_statistics(
    sequence_length=22,
    sample_count=5000,
    debug=False
)

# Create statistical plots
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
fig.suptitle('Sequence Statistics Analysis (n=5,000)', fontsize=14, fontweight='bold')

# Temperature distribution
axes[0].hist(temperatures, bins=30, alpha=0.7, color='skyblue', edgecolor='black')
axes[0].axvline(np.mean(temperatures), color='red', linestyle='--', 
               label=f'Mean: {np.mean(temperatures):.1f}°C')
axes[0].set_xlabel('Temperature (°C)')
axes[0].set_ylabel('Frequency')
axes[0].set_title('Temperature Distribution')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# GC content distribution
axes[1].hist(gc_contents, bins=30, alpha=0.7, color='lightgreen', edgecolor='black')
axes[1].axvline(np.mean(gc_contents), color='red', linestyle='--',
               label=f'Mean: {np.mean(gc_contents):.1f}%')
axes[1].set_xlabel('GC Content (%)')
axes[1].set_ylabel('Frequency')
axes[1].set_title('GC Content Distribution')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

# Correlation plot
axes[2].scatter(gc_contents, temperatures, alpha=0.5, s=1, color='coral')
axes[2].set_xlabel('GC Content (%)')
axes[2].set_ylabel('Temperature (°C)')
axes[2].set_title('GC vs Temperature Correlation')
axes[2].grid(True, alpha=0.3)

# Add correlation coefficient
correlation = np.corrcoef(gc_contents, temperatures)[0, 1]
axes[2].text(0.05, 0.95, f'r = {correlation:.3f}', transform=axes[2].transAxes,
            bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))

plt.tight_layout()
plt.show()

print(f"\n📈 Statistical Summary:")
print(f"Temperature: {np.mean(temperatures):.2f} ± {np.std(temperatures):.2f}°C")
print(f"GC Content: {np.mean(gc_contents):.2f} ± {np.std(gc_contents):.2f}%")
print(f"Correlation: {correlation:.3f}")

## 5. Export Results

Save the analysis results for further use:

In [None]:
# Export template data
df.to_csv('pcr_templates_analysis.csv', index=False)
print("💾 Template analysis saved to: pcr_templates_analysis.csv")

# Export best template sequences
with open('best_pcr_template.txt', 'w') as f:
    f.write("PCR Template Generator - Best Template\n")
    f.write("=" * 40 + "\n\n")
    f.write(f"Template ID: {best_template_idx + 1}\n")
    f.write(f"Optimization Cost: {df.loc[best_template_idx, 'Cost']:.2f}\n")
    f.write(f"GC Content: {df.loc[best_template_idx, 'GC_Content']:.1f}%\n")
    f.write(f"Primer Tm Difference: {df.loc[best_template_idx, 'Tm_Diff']:.2f}°C\n\n")
    f.write("Sequences:\n")
    f.write("-" * 20 + "\n")
    f.write(f"Forward Primer: {best_template.fwd_primer()}\n")
    f.write(f"Reverse Primer: {best_template.rev_primer()}\n")
    f.write(f"Probe:          {best_template.probe()}\n")
    f.write(f"Full Template:  {best_template.fwd()}\n\n")
    f.write("Layout:\n")
    f.write("-" * 10 + "\n")
    f.write(best_template.display())

print("💾 Best template saved to: best_pcr_template.txt")

# Create summary report
summary = f"""
PCR Template Generator - Jupyter Analysis Summary
================================================

Analysis Date: {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')}

Templates Generated: {len(templates)}
Statistical Samples: 5,000

Best Template Performance:
- Optimization Cost: {df.loc[best_template_idx, 'Cost']:.2f}
- GC Content: {df.loc[best_template_idx, 'GC_Content']:.1f}%
- Primer Balance: {df.loc[best_template_idx, 'Tm_Diff']:.2f}°C difference

Statistical Insights:
- Mean Temperature: {np.mean(temperatures):.2f}°C
- Mean GC Content: {np.mean(gc_contents):.2f}%
- GC-Temperature Correlation: {correlation:.3f}

Files Generated:
- pcr_templates_analysis.csv: Detailed template data
- best_pcr_template.txt: Best template sequences

Ready for PCR experiments! 🧬
"""

print(summary)

print("\n🎉 Jupyter analysis completed!")
print("\nNext steps:")
print("- Review the generated CSV file for detailed analysis")
print("- Use the best template for your PCR experiments")
print("- Modify parameters above to explore different conditions")