In [1]:
pip install biopython numpy

Collecting biopython
  Downloading biopython-1.84-cp312-cp312-win_amd64.whl.metadata (13 kB)
Downloading biopython-1.84-cp312-cp312-win_amd64.whl (2.8 MB)
   ---------------------------------------- 0.0/2.8 MB ? eta -:--:--
   ---------------------------------------- 0.0/2.8 MB ? eta -:--:--
   ------- -------------------------------- 0.5/2.8 MB 3.4 MB/s eta 0:00:01
   --------------- ------------------------ 1.0/2.8 MB 2.5 MB/s eta 0:00:01
   ------------------ --------------------- 1.3/2.8 MB 2.1 MB/s eta 0:00:01
   -------------------------- ------------- 1.8/2.8 MB 2.2 MB/s eta 0:00:01
   ------------------------------ --------- 2.1/2.8 MB 2.3 MB/s eta 0:00:01
   --------------------------------- ------ 2.4/2.8 MB 2.0 MB/s eta 0:00:01
   ------------------------------------- -- 2.6/2.8 MB 1.8 MB/s eta 0:00:01
   ---------------------------------------- 2.8/2.8 MB 1.7 MB/s eta 0:00:00
Installing collected packages: biopython
Successfully installed biopython-1.84
Note: you may need t

In [11]:
import numpy as np
from Bio import SeqIO, Align, SwissProt
from Bio.PDB import *
from Bio.PDB.DSSP import dssp_dict_from_pdb_file
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import warnings
import requests
from typing import Dict, List, Tuple, Optional

In [45]:
class ProteinStructurePredictor:
    def __init__(self):
        self.pdb_list = []
        self.template_structures = {}
        self.amino_acid_properties = {
            'A': {'hydrophobicity': 1.8, 'volume': 88.6, 'charge': 0},
            'R': {'hydrophobicity': -4.5, 'volume': 173.4, 'charge': 1},
            'N': {'hydrophobicity': -3.5, 'volume': 114.1, 'charge': 0},
            'D': {'hydrophobicity': -3.5, 'volume': 111.1, 'charge': -1},
            # ... (other amino acids)
        }
    def search_templates(self, sequence: str) -> List[Dict]:
        """
        Search for template structures in PDB database
        """
        templates = []
        try:
            # Simulate PDB REST API call
            # In production, would use actual PDB API
            mock_templates = [
                {
                    'pdb_id': '1ABC',
                    'identity': 75.5,
                    'resolution': 2.1,
                    'method': 'X-RAY DIFFRACTION',
                    'sequence': sequence
                },
                {
                    'pdb_id': '2XYZ',
                    'identity': 68.3,
                    'resolution': 2.8,
                    'method': 'X-RAY DIFFRACTION',
                    'sequence': sequence
                }
            ]
            templates.extend(mock_templates)
        except Exception as e:
            print(f"Error searching templates: {e}")
        return templates
    def align_sequences(self, query: str, template: str) -> Tuple[str, str, float]:
        """
        Perform sequence alignment and return alignment score
        """
        aligner = Align.PairwiseAligner()
        aligner.open_gap_score = -10
        aligner.extend_gap_score = -0.5
        alignment = aligner.align(query, template)[0]
        score = alignment.score
        return str(alignment[0]), str(alignment[1]), score
            def predict_secondary_structure(self, sequence: str) -> Dict[str, List[float]]:
            """
            Predict secondary structure propensities
            """
            # Initialize probabilities for each position
            length = len(sequence)
            ss_pred = {
            'helix': np.zeros(length),
            'sheet': np.zeros(length),
            'coil': np.zeros(length)
            }
                
            # Simple sliding window analysis
            window = 5
            for i in range(length):
                start = max(0, i - window//2)
                end = min(length, i + window//2 + 1)
                window_seq = sequence[start:end]
                # Calculate propensities based on amino acid properties
                h_prop = 0
                e_prop = 0
                c_prop = 0
                for aa in window_seq:
                    if aa in self.amino_acid_properties:
                        props = self.amino_acid_properties[aa]
                        # Simple propensity rules
                        h_prop += (props['hydrophobicity'] > 0) * 0.5
                        e_prop += (props['volume'] > 120) * 0.5
                        c_prop += (abs(props['charge']) > 0) * 0.5
                total = h_prop + e_prop + c_prop
                if total > 0:
                    ss_pred['helix'][i] = h_prop / total
                    ss_pred['sheet'][i] = e_prop / total
                    ss_pred['coil'][i] = c_prop / total
            return ss_pred
        def build_3d_model(self, sequence: str, template: Dict) -> np.ndarray:
            """
            Build 3D model based on template and secondary structure
            """
            # Simplified model building - in reality would use more sophisticated methods
            ss_pred = self.predict_secondary_structure(sequence)
            coords = np.zeros((len(sequence), 3))
            current_pos = np.array([0., 0., 0.])
            for i in range(len(sequence)):
                 if ss_pred['helix'][i] > max(ss_pred['sheet'][i], ss_pred['coil'][i]):
                    # Helix geometry
                    current_pos += np.array([1.5, 0.5, 0.5])
                    elif ss_pred['sheet'][i] > ss_pred['coil'][i]:
                    # Sheet geometry
                        current_pos += np.array([1.5, 0., 0.])
                    else:
                    # Coil geometry
                        current_pos += np.array([1.0, np.random.rand(), np.random.rand()])
                 coords[i] = current_pos
        return coords
    
    

IndentationError: unexpected indent (1945029108.py, line 50)

In [None]:
class ProteinAnalyzer:
        def __init__(self):
            self.structure = None
            self.sequence = None
        def analyze_structure(self, coords: np.ndarray, sequence: str) -> Dict:
            """
            Analyze the predicted structure
            """
            analysis = {}
            # Calculate basic structural properties
            analysis['radius_of_gyration'] = self._calculate_radius_of_gyration(coords)
            analysis['surface_accessibility'] = self._estimate_surface_accessibility(coords)
            analysis['potential_binding_sites'] = self._predict_binding_sites(coords, sequence)
            analysis['domains'] = self._predict_domains(coords, sequence)
                    return analysis
        def _calculate_radius_of_gyration(self, coords: np.ndarray) -> float:
            """
            Calculate radius of gyration
            """
            center = np.mean(coords, axis=0)
            rg = np.sqrt(np.mean(np.sum((coords - center) ** 2, axis=1)))
            return float(rg)
        def _estimate_surface_accessibility(self, coords: np.ndarray) -> List[float]:
            """
            Estimate solvent accessibility
            """
            accessibility = []
            for i, coord in enumerate(coords):
                # Simple distance-based accessibility estimation
                distances = np.linalg.norm(coords - coord, axis=1)
                nearby_atoms = np.sum(distances < 10.0) # 10Å cutoff
                accessibility.append(1.0 / nearby_atoms)
            return accessibility
        def _predict_binding_sites(self, coords: np.ndarray, sequence: str) -> List[Dict]:
            """
            Predict potential binding sites
            """
            binding_sites = []
            # Simple pocket detection
            for i in range(len(coords)):
                # Calculate local atomic density
                local_density = np.sum(np.linalg.norm(coords - coords[i], axis=1) < 8.0)
                if local_density < 10: # Arbitrary threshold
                    binding_sites.append({
                        'position': i,
                        'residue': sequence[i],
                        'score': 1.0 / local_density
                    })
            return binding_sites
        def _predict_domains(self, coords: np.ndarray, sequence: str) -> List[Dict]:
            """
            Predict protein domains
            """
            domains = []
            # Simple domain prediction based on spatial clustering
            from sklearn.cluster import DBSCAN
            clustering = DBSCAN(eps=8.0, min_samples=5).fit(coords)
            
            unique_labels = set(clustering.labels_)
            for label in unique_labels:
                if label != -1: # Skip noise points
                    domain_positions = np.where(clustering.labels_ == label)[0]
                    domains.append({
                        'start': int(domain_positions[0]),
                        'end': int(domain_positions[-1]),
                        'size': len(domain_positions),
                        'sequence': sequence[domain_positions[0]:domain_positions[-1]+1]
                    })
            return domains
    class ReportGenerator:
        def __init__(self):
            self.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        def generate_report(self,
                            sequence: str,
                            structure_analysis: Dict,
                            templates: List[Dict],
                            filename: str = "protein_analysis_report.md") -> None:
                """
                Generate a detailed analysis report
                """
                        report = f"""
    # Protein Structure Analysis Report
    Generated on: {self.timestamp}
    
    ## 1. Sequence Information
    Length: {len(sequence)} amino acids
    ```
    {sequence}
    ```
    ## 2. Template Information
    """
            # Add template information
            for i, template in enumerate(templates, 1):
                report += f"""
    ### Template {i}: {template['pdb_id']}
    - Sequence Identity: {template['identity']}%
    - Resolution: {template['resolution']}Å
    - Method: {template['method']}
    """
            # Add structural analysis
            report += """
    ## 3. Structural Analysis
    """
            report += f"""
    ### Basic Properties
    - Radius of Gyration: {structure_analysis['radius_of_gyration']:.2f}Å
    ### Predicted Binding Sites
    """
            for site in structure_analysis['potential_binding_sites']:
                report += f"- Position {site['position']}: {site['residue']} (Score: {site['score']:.2f})\n"
            report += """
    ### Predicted Domains
    """
            for i, domain in enumerate(structure_analysis['domains'], 1):
                report += f"""
    Domain {i}:
    - Region: {domain['start']}-{domain['end']}
    - Size: {domain['size']} residues
    - Sequence: {domain['sequence']}
    """
            # Save report
            with open(filename, 'w') as f:
                f.write(report)
            print(f"Report saved as {filename}")
    def main():
    # Example sequence
        sequence = "MVKVGVNGFGRIGRLVTRAAFNSGKVDIVAINDPFIDLNYMVYMFQYDSTHGKFHGTVKAENGKLVINGNPITIFQERDPSKIKWGDAGAEYVVESTGVFTTMEKAGAHLQGGAKRVIISAPSADA
    # Initialize classes
        predictor = ProteinStructurePredictor()
        analyzer = ProteinAnalyzer()
        report_gen = ReportGenerator()
        try:
            # 1. Search for templates
            print("Searching for templates...")
            templates = predictor.search_templates(sequence)
            
            # 2. Build 3D model
            print("Building 3D model...")
            coords = predictor.build_3d_model(sequence, templates[0])
            
            # 3. Analyze structure
            print("Analyzing structure...")
            analysis_results = analyzer.analyze_structure(coords, sequence)
            
            # 4. Generate report
            print("Generating report...")
            report_gen.generate_report(sequence, analysis_results, templates)
            print("Analysis complete! Check the generated report for details.")
        except Exception as e:
            print(f"Error during analysis: {e}")
    if __name__ == "__main__":
        main()
