In [None]:
!python -m spacy download en_core_web_trf
!python -m spacy download en_core_web_trf

In [None]:
from esco import LocalDB
db = LocalDB()
skills_labels = [
        "collaborate with engineers",
        "deploy cloud resource",
        "design cloud architecture",
        "design cloud networks",
        "plan migration to cloud",
        "automate cloud tasks",
        "coordinate engineering teams",
        "design database in the cloud",
        "design for organisational complexity",
        "develop with cloud services",
        "do cloud refactoring",
    ]
skills = db.skills[db.skills.label.str.lower().isin(skills_labels)]
labels  = [l for labels in skills.allLabel for l in labels]

In [None]:
import spacy
from spacy import displacy
nlp = spacy.load("en_core_web_trf")

def find_root(doc):
    for prefix in ("", "to "):
        doc = nlp(prefix + doc.text)
        for token in doc:
            if token.dep_ == "ROOT" and token.pos_ == "VERB":
                return token
    return None

In [None]:
def find_obj_or_prep(token):
    obj_phrase = None
    prep_phrase = None
    
    for child in token.children:
        if child.dep_ in ("dobj"):
            # Trova l'oggetto diretto
            subtree = [child.text for child in child.subtree]
            obj_phrase = ' '.join(subtree)
        
        if child.dep_ in ("prep") and child.text == "with":
            # Trova l'oggetto della preposizione 'with'
            for grandchild in child.children:
                if grandchild.dep_ == "pobj":
                    subtree = [grandchild.text for grandchild in grandchild.subtree]
                    prep_phrase = 'with ' + ' '.join(subtree)
                    
    return obj_phrase, prep_phrase

# Esempi di frasi
sentences = ["develop cloud services", "develop with cloud services"]

for sentence in sentences:
    doc = nlp(sentence)
    for token in doc:
        if token.dep_ == "ROOT":
            obj, prep = find_obj_or_prep(token)
            print(f"Frase: {sentence}")
            print(f"Oggetto diretto: {obj}")
            print(f"Frase preposizionale: {prep}")
            print()


```
Frase: develop cloud services
Oggetto diretto: cloud services
Frase preposizionale: None

Frase: develop with cloud services
Oggetto diretto: None
Frase preposizionale: with cloud services
```

In [None]:
def find_obj_or_prep(token):
    """
    Trova l'oggetto diretto (dobj) e le frasi preposizionali (prep_phrases) di un token.
    """
    obj_phrase = None
    prep_phrases = []
    
    for child in token.children:
        if child.dep_ == "dobj":
            # Find the dobj
            subtree = [child.text for child in child.subtree]
            obj_phrase = ' '.join(subtree)
        
        if child.dep_ == "prep":
            # Find ADP
            for grandchild in child.children:
                if grandchild.dep_ == "pobj":
                    subtree = [grandchild.text for grandchild in grandchild.subtree]
                    prep_phrases.append(f"{child.text} {' '.join(subtree)}")
                    
    return obj_phrase, prep_phrases

def get_verb_obj_from_label(label, nlp):
    """
    Dato un label, restituisce il verbo principale (root), l'oggetto diretto e le frasi preposizionali.
    """
    doc = nlp(label)
    root = find_root(doc)
    if root is None:
        doc = nlp(f"to {label}")
        root = find_root(doc)
    if root is None:
        return None, None, []

    dobj, prep_phrases = find_obj_or_prep(root)

    return root, dobj, prep_phrases


label = "develop with cloud services for multiple clients"
root, dobj, prep_phrases = get_verb_obj_from_label(label, nlp)
print(f"Verbo principale: {root}")
print(f"Oggetto diretto: {dobj}")
print(f"Frasi preposizionali: {prep_phrases}")


```
Verbo principale: develop
Oggetto diretto: None
Frasi preposizionali: ['with cloud services for multiple clients']

In [None]:
def generate_pattern(label):
    root, obj, prep_phrases = get_verb_obj_from_label(label, nlp)
    pattern = []

    # Aggiungi il verbo radice se trovato
    if root:
        pattern.append({"LEMMA": root.lemma_, "POS": "VERB"})

    # Aggiungi l'oggetto diretto se trovato
    if obj:
        # Cattura l'intero sottoalbero dell'oggetto diretto
        obj_tokens = nlp(obj)
        for token in obj_tokens:
            pattern.append({"LEMMA": token.lemma_, "POS": token.pos_})
    
    # Aggiungi le frasi preposizionali al pattern
    if prep_phrases:
        for prep_phrase in prep_phrases:
            # Suddividi la frase preposizionale in preposizione e oggetto della preposizione
            tokens = nlp(prep_phrase)
            for token in tokens:
                if token.dep_ == "prep":
                    pattern.append({"LEMMA": token.lemma_, "POS": "ADP"})
                elif token.dep_ == "pobj":
                    pattern.append({"LEMMA": token.lemma_, "POS": "NOUN"})
                else:
                    pattern.append({"LEMMA": token.lemma_, "POS": token.pos_})

    # Se non è stato generato alcun pattern, usa l'etichetta originale
    if not pattern:
        pattern = [{"LOWER": label.lower()}]

    return pattern


In [None]:
from spacy.matcher import Matcher

matcher = Matcher(nlp.vocab)

for label in labels:
    print(f"Label: {label}")
    pattern = generate_pattern(label)
    print(f"Pattern: {pattern}")
    print()
    matcher.add(label, [pattern])

```
Label: plan migration to cloud
Pattern: [{'LEMMA': 'plan', 'POS': 'VERB'}, {'LEMMA': 'migration', 'POS': 'NOUN'}, {'LEMMA': 'to', 'POS': 'ADP'}, {'LEMMA': 'cloud', 'POS': 'NOUN'}]

Label: plan refactoring
Pattern: [{'LEMMA': 'plan', 'POS': 'VERB'}, {'LEMMA': 'refactoring', 'POS': 'NOUN'}]

Label: cloud migration planning
Pattern: [{'LOWER': 'cloud migration planning'}]

Label: create cloud architecture
Pattern: [{'LEMMA': 'create', 'POS': 'VERB'}, {'LEMMA': 'cloud', 'POS': 'NOUN'}, {'LEMMA': 'architecture', 'POS': 'NOUN'}]

Label: design multi-tier cloud architecture
Pattern: [{'LEMMA': 'design', 'POS': 'VERB'}, {'LEMMA': 'multi', 'POS': 'ADJ'}, {'LEMMA': '-', 'POS': 'PUNCT'}, {'LEMMA': 'tier', 'POS': 'NOUN'}, {'LEMMA': 'cloud', 'POS': 'NOUN'}, {'LEMMA': 'architecture', 'POS': 'NOUN'}]

Label: engineer cloud architecture
Pattern: [{'LEMMA': 'engineer', 'POS': 'VERB'}, {'LEMMA': 'cloud', 'POS': 'NOUN'}, {'LEMMA': 'architecture', 'POS': 'NOUN'}]

Label: design cloud architecture
Pattern: [{'LEMMA': 'design', 'POS': 'VERB'}, {'LEMMA': 'cloud', 'POS': 'NOUN'}, {'LEMMA': 'architecture', 'POS': 'NOUN'}]

Label: refactoring
Pattern: [{'LOWER': 'refactoring'}]

Label: do cloud refactoring
Pattern: [{'LEMMA': 'do', 'POS': 'VERB'}]

Label: implement cloud network
Pattern: [{'LEMMA': 'implement', 'POS': 'VERB'}, {'LEMMA': 'cloud', 'POS': 'NOUN'}, {'LEMMA': 'network', 'POS': 'NOUN'}]

Label: design cloud networks
Pattern: [{'LEMMA': 'design', 'POS': 'VERB'}, {'LEMMA': 'cloud', 'POS': 'NOUN'}, {'LEMMA': 'network', 'POS': 'NOUN'}]

Label: create cloud network
Pattern: [{'LEMMA': 'create', 'POS': 'VERB'}, {'LEMMA': 'cloud', 'POS': 'NOUN'}, {'LEMMA': 'network', 'POS': 'NOUN'}]

Label: cloud deployment
Pattern: [{'LOWER': 'cloud deployment'}]

Label: deploy cloud resource
Pattern: [{'LEMMA': 'deploy', 'POS': 'VERB'}, {'LEMMA': 'cloud', 'POS': 'NOUN'}, {'LEMMA': 'resource', 'POS': 'NOUN'}]

Label: provision cloud resources
Pattern: [{'LEMMA': 'provision', 'POS': 'VERB'}, {'LEMMA': 'cloud', 'POS': 'NOUN'}, {'LEMMA': 'resource', 'POS': 'NOUN'}]

Label: deployment and provisioning
Pattern: [{'LOWER': 'deployment and provisioning'}]

Label: develop cloud applications
Pattern: [{'LEMMA': 'develop', 'POS': 'VERB'}, {'LEMMA': 'cloud', 'POS': 'NOUN'}, {'LEMMA': 'application', 'POS': 'NOUN'}]

Label: write code with cloud services
Pattern: [{'LEMMA': 'write', 'POS': 'VERB'}, {'LEMMA': 'code', 'POS': 'NOUN'}, {'LEMMA': 'with', 'POS': 'ADP'}, {'LEMMA': 'cloud', 'POS': 'NOUN'}, {'LEMMA': 'service', 'POS': 'NOUN'}]

Label: develop with cloud services
Pattern: [{'LEMMA': 'develop', 'POS': 'VERB'}, {'LEMMA': 'with', 'POS': 'ADP'}, {'LEMMA': 'cloud', 'POS': 'NOUN'}, {'LEMMA': 'service', 'POS': 'NOUN'}]

Label: code with cloud services
Pattern: [{'LEMMA': 'code', 'POS': 'VERB'}, {'LEMMA': 'with', 'POS': 'ADP'}, {'LEMMA': 'cloud', 'POS': 'NOUN'}, {'LEMMA': 'service', 'POS': 'NOUN'}]

Label: design cloud data architecture
Pattern: [{'LEMMA': 'design', 'POS': 'VERB'}, {'LEMMA': 'cloud', 'POS': 'NOUN'}, {'LEMMA': 'data', 'POS': 'NOUN'}, {'LEMMA': 'architecture', 'POS': 'NOUN'}]

Label: develop cloud database design
Pattern: [{'LEMMA': 'develop', 'POS': 'VERB'}, {'LEMMA': 'cloud', 'POS': 'NOUN'}, {'LEMMA': 'database', 'POS': 'NOUN'}, {'LEMMA': 'design', 'POS': 'NOUN'}]

Label: design database in the cloud
Pattern: [{'LEMMA': 'design', 'POS': 'VERB'}, {'LEMMA': 'database', 'POS': 'NOUN'}, {'LEMMA': 'in', 'POS': 'ADP'}, {'LEMMA': 'the', 'POS': 'DET'}, {'LEMMA': 'cloud', 'POS': 'NOUN'}]

Label: automate cloud tasks
Pattern: [{'LEMMA': 'automate', 'POS': 'VERB'}, {'LEMMA': 'cloud', 'POS': 'NOUN'}, {'LEMMA': 'task', 'POS': 'NOUN'}]

Label: automation of cloud tasks
Pattern: [{'LOWER': 'automation of cloud tasks'}]

Label: design cloud environment for complex organisations
Pattern: [{'LEMMA': 'design', 'POS': 'VERB'}, {'LEMMA': 'cloud', 'POS': 'NOUN'}, {'LEMMA': 'environment', 'POS': 'NOUN'}, {'LEMMA': 'for', 'POS': 'ADP'}, {'LEMMA': 'complex', 'POS': 'ADJ'}, {'LEMMA': 'organisation', 'POS': 'NOUN'}]

Label: design for organisational complexity
Pattern: [{'LEMMA': 'design', 'POS': 'VERB'}, {'LEMMA': 'for', 'POS': 'ADP'}, {'LEMMA': 'organisational', 'POS': 'ADJ'}, {'LEMMA': 'complexity', 'POS': 'NOUN'}]


In [None]:
# Esempio di utilizzo del matcher
def find_matches(text):
    doc = nlp(text)
    matches = matcher(doc)
    predicted_matches = []
    for match_id, start, end in matches:
        string_id = nlp.vocab.strings[match_id]
        span = doc[start:end]
        print(f"Matched '{string_id}': {span.text}")
        predicted_matches.append(span.text)
    return set(predicted_matches)

# Test del matcher
test_text = ["As a recent graduate, I have hands-on experience to design cloud architecture during my internships.",
             "In my previous role, I specialized in develop cloud applications to improve system functionality.",
             "I have a solid background in implement cloud network through various academic projects and part-time jobs."]
for text in test_text:
    print("Testing matcher with:", text)
    find_matches(text)


```
Testing matcher with: As a recent graduate, I have hands-on experience to design cloud architecture during my internships.
Matched 'design cloud architecture': design cloud architecture
Testing matcher with: In my previous role, I specialized in develop cloud applications to improve system functionality.
Matched 'develop cloud applications': develop cloud applications
Testing matcher with: I have a solid background in implement cloud network through various academic projects and part-time jobs.
Matched 'implement cloud network': implement cloud network

In [None]:
test_text = ["I have experience in develop cloud applications and automate cloud tasks to improve operational efficiency.",
             "During my internship, I worked on code with cloud services and implement cloud network solutions to enhance connectivity."]
for text in test_text:
    print("Testing matcher with:", text)
    find_matches(text)

```
Testing matcher with: I have experience in develop cloud applications and automate cloud tasks to improve operational efficiency.
Matched 'develop cloud applications': develop cloud applications
Matched 'automate cloud tasks': automate cloud tasks
Testing matcher with: During my internship, I worked on code with cloud services and implement cloud network solutions to enhance connectivity.
Matched 'code with cloud services': code with cloud services
Matched 'implement cloud network': implement cloud network
