In [45]:
import re 
class Spacer:
    def __init__(self, begin, length, sequence):
        "CRISPR spacer"
        self._begin = begin
        self._length = length
        self._sequence = sequence 
    @property 
    def begin(self):
        ":return: begin position of spacer"
        return self._begin
    @property
    def length(self):
        ":return: length of spacer"
        return self._length 
    @property
    def sequence(self):
        ":return: spacer sequence"
        return self._sequence
    
class Bacteria_CRISPR:
    """CRISPR information for bacteria with natural CRISPRs.
    :author: Karl Diedrich, PhD, <ktdiedrich@gmail.com>"""
    def __init__(self):
        """Set up variables"""
        self._description = None 
        self._directed_repeats = list()
        self._spacers = list()
        self._spacer_positions = list()
        self._begin = 0
        self._end = 0 
        self._consensus_DR = None 
        self._id = None 
    def add_spacer(self, begin, spacer):
        """:begin: begin position
        :spacer: sequence """
        spacer = Spacer(begin, len(spacer), spacer)
        self._spacers.append(spacer)
        
    def __str__(self):
        this_str = """Description: {}
    Begin: {} End: {}""".format(self._description, self._begin, self._end)
        return this_str
    
    @property 
    def begin(self):
        "Begin position"
        return self._begin
    
    @begin.setter
    def begin(self, value):
        "set begin position"
        self._begin = value 
        
    @property
    def id(self):
        return self._id
    
    @id.setter
    def id(self, value):
        self._id = value 
        
    @property 
    def description(self):
        ""
        return self._description 
        
    @description.setter
    def description(self, value):
        ""
        self._description = value 
        
    def  add_DR(self, dr):
        """Add Directed Repeat 
        """
        self._directed_repeats.add(dr)
        
    @property 
    def consensus_DR(self):
        ":return: consensus Directed Repeat "
        return self._consensus_DR
    
    @consensus_DR.setter
    def consensus_DR(self, value):
        "set consensus Directed Repeat "
        self._consensus_DR = value 
        

        

    

In [46]:
# from http://crispr.i2bc.paris-saclay.fr/crispr/databases/Output/413999/NC_009495/NC_009495_Crispr_10 
# index http://crispr.i2bc.paris-saclay.fr/crispr/ 
crispr_str = """########################################
# Program: Crispr Finder Program
# Author: Ibtissem GRISSA
# Rundate (GMT): 5/6/2007 7:51:58
# Report_file: /var/www/Server/.tmp/Output/129.175.104.227_Jun_05_2007_07_51_01/tmp_1/tmp_1_Crispr_9
########################################
#=======================================
# 
# Sequence: tmp_1 
# Description: Clostridium botulinum A str. ATCC 3502, complete genome
# Length: 3886916
# Id: gi|148378011|ref|NC_009495.1|
#
#=========================================================================
# Crispr Rank in the sequence: 10
# Crispr_begin_position: 2324983	 Crispr_end_position:  2325211
# DR: ATTTAAATACATCTCATGTTAATGTTCAAC	 DR_length: 30	 Number_of_spacers: 2
#=========================================================================
Spacer_begin_position	 Spacer_length	 Spacer_sequence
              2325013	            36	 AATAGAGTATTCAGATGAATATAAATTCTTGGAAGA

              2325079	            37	 TAGAGGTGATTTTATATGAAAAAGGAAAACATAACAA

              2325146	            36	 CGACCCTATAACAGTTTCAGAAGTAGAACAAAATAT

#=========================================================================
########################################
"""

In [47]:
def parse_Crispr_finder(crispr_str):
    """Parse Crispr finder website information string on bacteria CRISPRs
    :return: Bacteria_CRISPR object """
    crispr = Bacteria_CRISPR()
    crispr.description = re.search("Description:\s+(.+)", crispr_str).group(1)
    # TODO find all spacers 
    spacer_match = re.search("\s+(\d+)\s+(\d+)\[ATCG]+", crispr_str)
    spacer_match = re.search("\s+(\d+)\s+(\d+)(.+)", crispr_str)
    spacer_begin = int(spacer_match.group(1))
    spacer_seq = spacer_match.group(3)
    print("spacer begin={} sequence={}".format(spacer_begin, spacer_seq))
    spacer = Spacer(begin=spacer_begin, length=len(spacer_seq), sequence=spacer_seq)
    
    return crispr 

clostridum = parse_Crispr_finder(crispr_str=crispr_str)
print(clostridum)


spacer begin=2325013 sequence=	 AATAGAGTATTCAGATGAATATAAATTCTTGGAAGA
Description: Clostridium botulinum A str. ATCC 3502, complete genome
    Begin: 0 End: 0
