# Workflow for pattern-matching algorithm

The below code blocks show the steps followed in deciding how to structure the pattern matching function in Python.

In [1]:
import sys
import os
import re
from math import sqrt

## Extract tabular blocks that fall below the string "Q U A D 4"

In [2]:
element_example = "SUBCASE  7 \n STRESSES IN ( Q U A D 4 )" 
print(element_example)

SUBCASE  7 
 STRESSES IN ( Q U A D 4 )


## Count number of matches of "QUAD4"

In [3]:
n_match = 0

In [4]:
element_type = "quad4"
setname = ' '.join(element_type.upper())
element_pattern = r'(' + setname + ')'

In [5]:
if re.search(element_pattern, element_example):
    n_match += 1
print("Matched {} occurrences of element type {} in example.".format(n_match, element_type))

Matched 1 occurrences of element type quad4 in example.


## Match subcase number, and count number of matches as well

In [6]:
# Number of spaces is arbitrary in the actual output & cannot be pre-determined
sub_example_1 = "    SUBCASE   7"   
sub_example_2 = "  SUBCASE  12"

In [7]:
sub_pattern = r'SUBCASE'+'\s*=?\s*([0-9]+)'

### Note that we are only catching the subcase number, for future use, regardless of number of digits

In [8]:
find_1 = re.search(sub_pattern, sub_example_1)
find_2 = re.search(sub_pattern, sub_example_2)

print(find_1.groups())
print(find_2.groups())

('7',)
('12',)


## If subcase number repeats in the next block, how to catch it?

In [9]:
sub_example_3 = "    SUBCASE   7 \n SOME TEXT \n\n     SUBCASE   7 "
print(sub_example_3)

    SUBCASE   7 
 SOME TEXT 

     SUBCASE   7 


### We can continually append the matched subcase numbers to a dummy list

In [10]:
split_data = sub_example_3.split("\n")
print(split_data)

sub_repeat = [-1, -1]

for line in split_data:
    find = re.search(sub_pattern, line)
    if find:
        f = int(find.group(1))
        sub_repeat.append(f)

print(sub_repeat)
        

['    SUBCASE   7 ', ' SOME TEXT ', '', '     SUBCASE   7 ']
[-1, -1, 7, 7]


### From this point, we can easily count the number of repeats with the .count() method

In [11]:
print(sub_repeat.count(7))

2


## Stop collecting tabular data when a specific string appears
### In the actual data output, the tabular data of interest is separated by a start pattern and a stop pattern

In [12]:
real_example = """ 
                 SUBCASE   7 
    E L E M E N T S   ( Q U A D 4 )
    
00001    3.810295E-01   8.661047E-01\n  
        -1.196695E+00  -6.235256E-01\n  
00002    1.662949E-01   5.598139E-01\n  
        -1.336528E+00  -2.009380E-01\n  
SEPTEMBER  20, 2017  MD NASTRAN  12/15/10 \n
"""

print(real_example)

 
                 SUBCASE   7 
    E L E M E N T S   ( Q U A D 4 )
    
00001    3.810295E-01   8.661047E-01
  
        -1.196695E+00  -6.235256E-01
  
00002    1.662949E-01   5.598139E-01
  
        -1.336528E+00  -2.009380E-01
  
SEPTEMBER  20, 2017  MD NASTRAN  12/15/10 




### In the real data, the stop pattern string could be "MSC NASTRAN", "MD NASTRAN" or "NX NASTRAN" - we need to catch this using a regular expression to account for variability in the input

In [13]:
stop_pattern = r'[A-Z]+\s+[Nn][Aa][Ss][Tt][Rr][Aa][Nn]'

In [14]:
split_real_example = real_example.split("\n")

n_match = 0
flag = False   # flag to set whether we will be extracting data or not

print('Printing relevant numerical data extracted from sample:\n')
for line in split_real_example:
    if flag:                            # if flag == True
        if re.search(stop_pattern, line):   # if we hit a stopping pattern, stop search
            flag = False                # Reset flag 
        else:
            print(line)
    find = re.search(sub_pattern, line)
    
    if find:
        f = int(find.group(1))
    
    if re.search(element_pattern, line):
        n_match += 1
        flag = True

print("Value of flag = "+ str(flag))
print("Matched {} subcase in example.".format(n_match))

Printing relevant numerical data extracted from sample:

    
00001    3.810295E-01   8.661047E-01
  
        -1.196695E+00  -6.235256E-01
  
00002    1.662949E-01   5.598139E-01
  
        -1.336528E+00  -2.009380E-01
  
Value of flag = False
Matched 1 subcase in example.


## Summary

Once we have tested each pattern matching step as described above, we can loop through the lines of the file by creating a more detailed function (as shown in ```pattern_matching_nastran.py```) can be constructed that will allows us to parse through a large text file for extracting blocks of numerical information.