#NLP to Systems Architecture 

###Imports

In [None]:
# Spacy Imports
import spacy 
from spacy.lang.en import English

# TextBlob Imports 
from textblob import TextBlob

# nltk Imports 
import nltk
from nltk.corpus import stopwords 

# Other Imports 
import string

###Downloads

In [None]:
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


True

###Creating text file to store info to be runned in PlantUML 

In [None]:
# specification text
text = (" 1.	The FGS_System consists of four components: the Left_Side FGS, the Right_Side FGS, an LR_Bus, and an RL_Bus."
        " 2.	LR_Bus establishes connection between Left_Side FGS and Right_Side FGS."
        " 3.	RL_Bus establishes connection between Left_Side FGS and Right_Side FGS."
        " 4.	The Left_Side FGS accepts as input a boolean value of Left_Transfer_Switch and Left_Primary_Side."
        " 5.	The Right_Side FGS accepts as input a boolean value of Right_Transfer_Switch and Right_Primary_Side."
        " 6.	The Left_Side FGS takes input from a synchronous clock CLK1."
        " 7.	The LR_Bus takes input from CLK2."
        " 8.	The Right_Side FGS takes input from a synchronous clock CLK3."
        " 9.	The RL_Bus takes input from CLK4.")

nlp = spacy.load("en_core_web_sm")
doc = nlp(text)


In [None]:
# Extract the nouns using spcay 
tb_noun_phrases = TextBlob(text).noun_phrases
tb_noun_phrases

WordList(['fgs_system', 'left_side fgs', 'right_side fgs', 'lr_bus', 'rl_bus', 'lr_bus', 'establishes connection', 'left_side fgs', 'right_side fgs', 'rl_bus', 'establishes connection', 'left_side fgs', 'right_side fgs', 'left_side fgs', 'boolean value', 'left_transfer_switch', 'left_primary_side', 'right_side fgs', 'boolean value', 'right_transfer_switch', 'right_primary_side', 'left_side fgs', 'synchronous clock', 'clk1', 'lr_bus', 'clk2', 'right_side fgs', 'synchronous clock', 'clk3', 'rl_bus', 'clk4'])

In [None]:
# Extract the nouns using spcay 
# Must run nlp = spacy.load("en_core_web_sm")
#          doc = nlp(text)
sp_noun_phrases =[chunk.text for chunk in doc.noun_chunks]
print(sp_noun_phrases)

['The FGS_System', 'four components', 'the Left_Side FGS', 'the Right_Side FGS', 'an LR_Bus', 'an RL_Bus.', 'establishes connection', 'Left_Side FGS', 'Right_Side FGS', 'RL_Bus', 'establishes connection', 'Left_Side FGS', 'Right_Side FGS', 'The Left_Side FGS', 'input', 'a boolean value', 'Left_Transfer_Switch', 'Left_Primary_Side', 'The Right_Side FGS', 'input', 'a boolean value', 'Right_Transfer_Switch', 'The Left_Side FGS', 'input', 'The LR_Bus', 'input', 'CLK2', 'The Right_Side FGS', 'input', 'a synchronous clock CLK3', 'The RL_Bus', 'input', 'CLK4']


In [None]:
# Sentence segmenation 
nlp = English()  # just the language with no pipeline
nlp.add_pipe(nlp.create_pipe('sentencizer'))
doc = nlp(text)
for sent in doc.sents:
    print(sent.text)

 1.
	The FGS_System consists of four components: the Left_Side FGS, the Right_Side FGS, an LR_Bus, and an RL_Bus.
2.
	LR_Bus establishes connection between Left_Side FGS and Right_Side FGS.
3.
	RL_Bus establishes connection between Left_Side FGS and Right_Side FGS.
4.
	The Left_Side FGS accepts as input a boolean value of Left_Transfer_Switch and Left_Primary_Side.
5.
	The Right_Side FGS accepts as input a boolean value of Right_Transfer_Switch and Right_Primary_Side.
6.
	The Left_Side FGS takes input from a synchronous clock CLK1.
7.
	The LR_Bus takes input from CLK2.
8.
	The Right_Side FGS takes input from a synchronous clock CLK3.
9.
	The RL_Bus takes input from CLK4.


In [None]:
# Remove stop words 
stopwords_english = stopwords.words('english')
stopwords_english.append("\t")
stopwords_english.append(" ")

In [None]:
# Editing or creating text file if it doesn't exists
# Creating the code for plantuml

# Used to tokenize a sentence 
nlp = English()  # just the language with no pipeline
nlp.add_pipe(nlp.create_pipe('sentencizer'))
doc = nlp(text)

connection_num = 1
with open('/content/docs/model_specs.txt', 'w') as f:
    # Iterate sentence-by-sentence then word-by-word 
    for sent in doc.sents:

      # Extract the nouns from each sentences
      # Components are mentioned before it sub-component(s) 
      tb_noun_phrases = TextBlob(str(sent)).noun_phrases
      # print(tb_noun_phrases)

      # Tokenize that sentence 
      token_sentence = []
      for token in sent:
        token_sentence.append(token.lower_)

      # Clean that sentence 
      sentence_clean = []
      for word in token_sentence: # Go through every word in your tokens list
        if (word not in stopwords_english and # remove stopwords
            word not in string.punctuation and 
            word not in string.digits):  # remove punctuation
            sentence_clean.append(word)
      # print(sentence_clean)
      i = 2
      j = 2
      for word in sentence_clean: 
        if i < (len(tb_noun_phrases)):  
          if "connection" in sentence_clean:
            # print(f"{tb_noun_phrases[0]}-{tb_noun_phrases[i]} :C{connection_num}")
            with open('/content/docs/model_specs.txt','a') as f: 
              f.write(f"{tb_noun_phrases[0]}-{tb_noun_phrases[i]} :C{connection_num}\n")
            i = i+1
            connection_num = connection_num+1
        else: 
          break
        
        if j < (len(tb_noun_phrases)):  
          if "input" in sentence_clean: 
            # print(f"{tb_noun_phrases[0]}-{tb_noun_phrases[j]}")
            with open('/content/docs/model_specs.txt', 'a') as f:     
              f.write(f"{tb_noun_phrases[0]}-{tb_noun_phrases[j]}\n")
            j = j+1
            connection_num = connection_num + 1
        else: 
          break
f.close()

In [None]:
# Investigate further !!!!!
word = 'accepts input'
word = nltk.word_tokenize(word)
l1 = nltk.pos_tag(word)
l1

[('accepts', 'NNS'), ('input', 'VBP')]