In [298]:
import pandas as pd
from collections import OrderedDict as od
import string

In [299]:
def reserved_words_and_counts(csv_df):
    """
    Analyzes a DataFrame containing strings and extracts reserved words, along with their character counts.

    Parameters:
    - csv_df (pandas.DataFrame): The DataFrame containing strings to be analyzed.

    Returns:
    list: A list of dictionaries, each containing information about a reserved word, including the word itself
          and its character count. The dictionary format is as follows:
          [
            {"word": str, "size": int},
            {"word": str, "size": int},
            ...
          ]

    Note:
    - Reserved words are identified as lowercase strings excluding the '<' character.
    - The function calculates the size (character count) of each reserved word.
    - The returned list contains dictionaries with information about each reserved word and its size.
    """
    reserved_words = [row for row in csv_df.values for char in row if char.islower() and char != "<"]         
    size = [len(c) for word in reserved_words for c in word]
    
    json_data = []
    for word, size in zip(reserved_words, size):
        json = {
            "word": word,
            "size": size
        }
        json_data.append(json)
    
    return json_data


In [300]:
def extract_terminals(csv_df):
    """
    Extracts terminal letters from reserved words obtained by analyzing a DataFrame containing strings.

    Parameters:
    - csv_df (pandas.DataFrame): The DataFrame containing strings to be analyzed.

    Returns:
    list: A list of individual terminal letters obtained from the reserved words.

    Note:
    - This function relies on the 'reserved_words_and_counts' function to identify reserved words and their counts.
    - The function extracts individual letters from the reserved words to create a list of terminal letters.
    """
    reserved_words = [row["word"] for row in reserved_words_and_counts(csv_df)]
    
    terminal_letters = [c for word in reserved_words for char in word for c in char]
    return terminal_letters

In [301]:
def unique_terminal_letters(csv_df):
    """
    Extracts unique terminal letters from a DataFrame containing strings.

    Parameters:
    - csv_df (pandas.DataFrame): The DataFrame containing strings to be analyzed.

    Returns:
    list: A list of unique terminal letters found in the strings.

    Note:
    - The function ignores uppercase letters, 'ε' (epsilon), and non-letter characters.
    - Uses the 'collections.OrderedDict' to preserve the order of unique terminal letters.
    """
    terminal_letters = list(od.fromkeys((c for row in csv_df.values for char in row for c in char if c.islower() and c != 'ε')))
    return terminal_letters

In [302]:
def create_afnd_skeleton(csv_df):
    """
    Creates the skeleton of an AFND (Nondeterministic Finite Automaton) represented as a DataFrame.

    Parameters:
    - csv_df (pandas.DataFrame): The DataFrame containing strings to be analyzed.

    Returns:
    pandas.DataFrame: The AFND skeleton represented as a DataFrame with appropriate column headers.

    Note:
    - The DataFrame includes columns for the alphabet ('sigma') and terminal letters.
    - Rows represent states in the AFND.
    - The first row is the start state ('S'), and subsequent rows represent states labeled with uppercase letters.
    - Empty cells indicate transitions that are not defined.
    """
    terminal_letters = unique_terminal_letters(csv_df)
    df = pd.DataFrame(columns=['sigma'] + [str(c) for c in terminal_letters])
    df.at[0, 'sigma'] = 'S'
    
    alphabet = list(string.ascii_uppercase)
    size = len(extract_terminals(csv_df))
    
    symbols = [symbol for letters in alphabet[:size] for symbol in letters]
    for i, symbol in enumerate(symbols):
        df.at[i+1, "sigma"] = symbol
    
    df = df.fillna('')
    
    return df

In [303]:

def create_afnd(csv_df):
    """
    Create an afnd from a dataframe
    return: afnd
    """
    
    afnd_df = create_afnd_skeleton(csv_df)
    
    
    reserved_counts = reserved_words_and_counts(csv_df)
    alphabet = list(string.ascii_uppercase)
    last_state = 'S'
    
    for row in reserved_counts:
        initial_state = 'S'
        word = [c for char in row["word"] for c in char]
        
        for index, char in enumerate(word):
            if last_state == initial_state: 
                afnd_df.loc[afnd_df['sigma'] == initial_state, char] = alphabet[index]
                last_state = alphabet[index]
                        
            elif last_state != 'S':
                afnd_df.loc[afnd_df['sigma'] == last_state, char] = alphabet[((alphabet.index(last_state) + 1) % len(alphabet))] 
                last_state = alphabet[((alphabet.index(last_state) + 1) % len(alphabet))]
                
        print(last_state)       
        # print()
        # print(last_state)
    return afnd_df

csv_df = pd.read_csv('./entrada.csv',  header=None)
csv_df

afnd_df = create_afnd(csv_df)
afnd_df


B
G
L


Unnamed: 0,sigma,s,e,n,t,a,o,i,u
0,S,A,,,,,,,
1,A,,B,,,,,,
2,B,,C,,,,,,
3,C,,,D,,,,,
4,D,,,,E,,,,
5,E,,,,,F,,,
6,F,,,,,,G,,
7,G,H,,,,,,,
8,H,,I,,,,,,
9,I,,,J,,,,,


In [304]:
csv_df = pd.read_csv('./entrada.csv',  header=None)
csv_df

afnd = create_afnd(csv_df)
afnd

B
G
L


Unnamed: 0,sigma,s,e,n,t,a,o,i,u
0,S,A,,,,,,,
1,A,,B,,,,,,
2,B,,C,,,,,,
3,C,,,D,,,,,
4,D,,,,E,,,,
5,E,,,,,F,,,
6,F,,,,,,G,,
7,G,H,,,,,,,
8,H,,I,,,,,,
9,I,,,J,,,,,


In [305]:
def extract_variables(csv_df):
    import re
    
    symbol_pattern = r'<[A-Z]> ::='
    variable_pattern = r'([a-z])<([A-Z])>'
    
    lines = [str(row) for row in csv_df.values if re.search(symbol_pattern, str(row)) for char in row]
    
    rg = []
    
    for line in lines:       
        symbol = [match[1] for match in re.findall(symbol_pattern, line) if re.search(variable_pattern, line)]
        terminals = [match[0] for match in re.findall(variable_pattern, line)]
        variables = [match[1] for match in re.findall(variable_pattern, line)]
        
        json = {
            "symbol": symbol,
            "terminals": terminals,
            "variables": variables
        }
        rg.append(json)
    return rg

teste = pd.read_csv('./entrada.csv',  header=None)

teste = extract_variables(teste)
teste

[{'symbol': ['S'],
  'terminals': ['a', 'e', 'i', 'o', 'u'],
  'variables': ['A', 'A', 'A', 'A', 'A']},
 {'symbol': ['A'],
  'terminals': ['a', 'e', 'i', 'o', 'u'],
  'variables': ['A', 'A', 'A', 'A', 'A']}]