# Approximate Pattern Matching Problem: Find all approximate occurrences of a pattern in a string.

Input: Strings Pattern and Text along with an integer d.
Output: All starting positions where Pattern appears as a substring of Text with at most d mismatches.
Code Challenge: Solve the Approximate Pattern Matching Problem.

# Sample Input:
ATTCTGGA
CGCCCGAATCCAGAACGCATTCCCATATTTCGGGACCACTGGCCTCCACGGTACGGACGTCAATCAAAT
3
# Sample Output:
6 7 26 27

In [2]:
def HammingDistance(string_1, string_2):
    # confirm same length
    try:
        assert len(string_1) == len(string_2)
    except AssertionError:
        print("Error: The strings must have the same length.")

    # find hamming distance by iterating over string
    hamming_distance = 0
    string_length = len(string_1)
    for i in range(string_length):
        if string_1[i] != string_2[i]:
            hamming_distance = hamming_distance + 1
    return hamming_distance
    
def ApproximatePatternMatch(pattern, text, d):
    indeces = ""
    len_pattern = len(pattern)
    for i in range(len(text)-(len_pattern-1)):
        slide = text[i:i+len_pattern]
        # for each k length slice, update the dictionary value
        if HammingDistance(slide, pattern) <= d:
            indeces = " ".join([indeces, str(i)])
    return indeces

In [3]:
# test ApproximatePatternMatch on example from question
pattern="ATTCTGGA"
text = "CGCCCGAATCCAGAACGCATTCCCATATTTCGGGACCACTGGCCTCCACGGTACGGACGTCAATCAAAT"
d = 3

print(ApproximatePatternMatch(pattern,text,d))

 6 7 26 27


In [4]:
# test ApproximatePatternMatch on test data file
test_data_file ="ApproximatePatternMatching Test Data\dataset_30278_4.txt"
with open(test_data_file, "r") as file:
    pattern = file.readline().strip()
    text = file.readline().strip()
    d = int(file.readline().strip())
    print(ApproximatePatternMatch(pattern,text,d))

 2 3 6 11 14 19 20 21 29 31 32 33 35 36 38 41 44 46 49 50 52 53 56 57 58 60 63 64 65 66 67 68 76 77 78 79 80 84 85 90 91 95 98 99 102 103 105 106 111 112 115 117 120 128 132 138 141 142 146 149 153 156 157 159 160 164 166 167 170 171 173 179 182 183 187 188 191 193 194 195 202 203 205 208 217 218 221 224 226 227 230 235 236 245 246 249 250 253 258 260 263 264 266 268 269 271 272 277 278 279 280 281 284 288 289 291 292 294 298 300 303 308 312 313 319 320 323 333 339 340 341 342 343 349 351 352 357 360 361 365 368 370 373 378 381 383 386 388 389 391 396 399 401 402 403 407 416 418 421 422 424 425 428 430 432 433 437 438 439 441 444 448 454 457 458 461 464 471 472 473 476 479 480 485 491 492 496 497 498 501 502 503 506 509 510 514 515 516 519 528 529 531 532 534 537 538 540 543 548 549 552 557 559 560 561 566 569 574 575 576 578 581 584 585 586 587 589 590 595 597 600 601 603 609 611 612 613 616 621 627 628 630 631 635 636 638 639 641 645 650 651 652 658 659 660 662 664 665 666 669 673 67