In [1]:
import spacy
import en_core_web_sm
import pandas as pd
import pypandoc
from pypandoc.pandoc_download import download_pandoc
#download_pandoc()
import csv
import docx
import os
from pathlib import Path
from os import listdir
from os.path import isfile, join
import shutil
import html2text

class Skill:
    def __init__(self,name, keyword , groups=None,prerequisites= None):
        filename = name
        filename = filename.replace('/','-')
        filename = filename.replace("\\",'-')
        filename = filename + ".html"
        path = os.path.join("skill", filename)
        path = path.replace("\\",'/')
        self.resource_path = path # for the resource path
        self.keyword_search = keyword  # keyword for searching LLM
        self.group_set = set()
        if groups is not None:    
            self.UpdateGroupSet(groups)
        
    def UpdateGroupSet(self,groups):
        self.group_set.update(groups)
        #print("skill group set updated.")

    def ChangeKeyword(self,keyword):
        self.keyword_search = keyword
  
class Group:
    def __init__(self,name,skills):
        filename = name
        filename = filename.replace('/','-')
        filename = filename.replace("\\",'-')
        filename = filename + ".html"
        path = os.path.join("group", filename)
        path = path.replace("\\",'/')
        self.resource_path = path # for the resource path
        self.keyword_search = name + " in tech" # keyword for searching LLM
        self.skill_set = skills

    def UpdateSkillSet(self,skill):
        self.skill_set.update(skill)
        #print("group skill set updated.")

    def ChangeKeyword(self,keyword):
        self.keyword_search = keyword

class TechStack:
    def __init__(self):
        self.nlp = spacy.load('en_core_web_md')
        self.skill_dict_list = {}
        self.group_dict_list = {}
        self.exact_match_replace_dict_list = {}
        self.partial_match_replace_dict_list = {}
        self.vector_group_dict_list = {}
        self.ignore_set = set()
        self.not_found_dict_list = {}
        self.document_pepare_set = set()
        self.three_word_skill_classification_set =set()
        self.two_word_skill_classification_set =set()
        self.one_word_skill_classification_set =set()
        self.backup_keyword_dict_list={}
        self.ImportIgnoreSet()
        self.AllThisWillBeRemoveOnceFinalize()
        self.ImportClassificationSet()
        self.ImportSkillDictList()
        self.GroupTextVectorization()
          
    def AddSkillDictList(self,name,keyword,groups=None):
        if name not in self.skill_dict_list:
            self.skill_dict_list[name] = Skill(name,keyword,groups)
            #print(name,"added in skill_dict_list.")
            if groups is not None:
                for g in groups:
                    if g in self.group_dict_list:
                        self.group_dict_list.get(g).UpdateSkillSet({name})
                        #print(name,"added in",g,".")
                    else:
                        self.group_dict_list[g] = Group(g,{name})
                        #print("new group:",g,"have been created and added",name,".")
        else:
            self.UpdateSkillDictList(name,groups)

    def ReClassificationSkillDictList(self,name,keyword,groups):
        search_keyword = keyword
        if name in self.backup_keyword_dict_list:
            search_keyword = self.backup_keyword_dict_list[name]
        self.AddSkillDictList(name,search_keyword,groups)
                    
    def UpdateSkillDictList(self,name,groups):
        if name in self.skill_dict_list:
            self.skill_dict_list[name].UpdateGroupSet(groups)

    def AddGroupDictList(self,name,skills):
        if skills is not None:
            if name in self.group_dict_list:
                self.UpdateGroupDictList(name,skills)
            else:
                found_set = set()
                for s in skills:
                    if s in self.skill_dict_list:
                        self.skill_dict_list[s].UpdateGroupSet({name})
                        found_set.add(s)  
                        #print(s,"added in",name,"group set.")
                self.group_dict_list[name] = Group(name,found_set)

    def UpdateGroupDictList(self,name,skills):
        if name in self.group_dict_list:
            found_set = set()
            for s in skills:
                if s in self.skill_dict_list:
                      found_set.add(s)  
            self.group_dict_list[name].UpdateSkillSet(found_set)
        else:
            self.AddGroupDictList(name,skills)

    def AddNotFoundDictList(self,name,keyword):
        if name not in self.not_found_dict_list:
            path = "unclassified"
            self.not_found_dict_list[name] =  Skill(name,path,keyword,None)   

    def ImportIgnoreSet(self):
        f = open("ignore.txt", "r")
        for c in f:
            c = c.replace("\n", "")
            self.ignore_set.add(c)
        f.close()

    def ImportClassificationSet(self):
        f = open("three word skill classification.txt", "r")
        for l in f:
            l = l.replace("\n", "")
            self.three_word_skill_classification_set.add(l)
        f.close()
        f = open("two word skill classification.txt", "r")
        for l in f:
            l = l.replace("\n", "")
            self.two_word_skill_classification_set.add(l)
        f.close()
        f = open("one word skill classification.txt", "r")
        for l in f:
            l = l.replace("\n", "")
            self.one_word_skill_classification_set.add(l)
        f.close()
        
    def ExportSkillDictList(self):
        file_path = "skills.csv"
        with open(file_path, 'w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(["Name", "Search Keyword","Resource Path","Groups"])
            for key, value in self.skill_dict_list.items():
                name=key
                search = value.keyword_search
                path = value.resource_path
                groups =""
            
                for g in value.group_set:
                    groups += "["
                    groups += g
                    groups +="]"
             
                writer.writerow([name,search,path,groups])
            file.close()
        with open('skills.txt', 'w') as f:
            for i in self.skill_dict_list:
                f.write(i)
                f.write("\n")
            f.close()
            
    def ImportSkillDictList(self):
        df = pd.read_csv("skills.csv")
        for index, row in df.iterrows():
            name = str(row['Name'])
            keyword = str(row['Search Keyword'])
            groups = str(row['Groups'])
            groups_set = None
            groups = groups.replace('[', '')
            groups_list = groups.split(']')
            if len(groups_list) > 0 :
                groups_list = groups_list[:-1]
                groups_set = set()
                for g in groups_list:
                    groups_set.add(g)
            # auto create group also
            self.AddSkillDictList(name,keyword,groups_set)
                
    def ExportGroupDictList(self):
        file_path = "groups.csv"
        with open(file_path, 'w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(["Name", "Search Keyword","Resource Path","skills"])
            for key, value in self.group_dict_list.items():
                name=key
                search = value.keyword_search
                path = value.resource_path
                skills =""
                for s in value.skill_set:
                    skills += "["
                    skills += s
                    skills +="]"
                writer.writerow([name,search,path,skills])
            file.close()

    def ExportMatchReplaceDictList(self):
        file_path = "exact match.csv"
        with open(file_path, 'w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(["Word", "Replace"])
            for key, value in self.exact_match_replace_dict_list.items():
                writer.writerow([key,value])
            file.close()
        file_path = "partial match.csv"
        with open(file_path, 'w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(["Word", "Replace"])
            for key, value in self.partial_match_replace_dict_list.items():
                writer.writerow([key,value])
            file.close()

    def ExportNotFoundSet(self):
        file_path = "not found.csv"
        with open(file_path, 'w', newline='') as file:
            writer = csv.writer(file)
            writer.writerow(["Name", "Search Keyword","Resource Path"])
            for key, value in self.not_found_dict_list.items():
                name=key
                search = value.keyword_search
                path = "skill unclassified/not tech/" + name + ".html"
             
                writer.writerow([name,search,path])
            file.close()
     
    def Filter(self, text):
        text = text.lower()
        text = text.replace("/"," ")
        if text.find('(') != -1:
            text = text.split("(")[0]
            text = text.rsplit()[0]
            
        words = text.split()
    
        if text in self.ignore_set:
            return str("")
            
        if text in self.exact_match_replace_dict_list:
            text = self.exact_match_replace_dict_list.get(text)
            
        words = text.split()
        new_text = ""
        for word in words:
            if word in self.partial_match_replace_dict_list:
                new_text += self.partial_match_replace_dict_list.get(word) 
                new_text +=" "
            else:
                new_text += word 
                new_text +=" "
        return new_text[:-1]

    def Search(self,text):
        if text in self.skill_dict_list:
            self.document_pepare_set.add(text)
            return True
        if text in self.group_dict_list:
            self.document_pepare_set.add(text)
            return True
        # check for . - space and .js js
        for sdl in  self.skill_dict_list:
            check1 = sdl
            check1 = check1.replace(".","")
            check2 = text
            check2 = check2.replace(".","")
            if check1 == check2:
                self.document_pepare_set.add(sdl)
                return True
            check1 = sdl
            check1 = check1.replace("-"," ")
            check2 = text
            check2 = check2.replace("-"," ")
            if check1 == check2:
                self.document_pepare_set.add(sdl)
                return True
            check1 = sdl
            check1 = check1.replace(" ","")
            check2 = text
            check2 = check2.replace(" ","")
            if check1 == check2:
                self.document_pepare_set.add(sdl)
                return True
            check1 = sdl
            check1 = check1.replace(".js","")
            check1 = check1.replace("js","")
            check2 = text
            check2 = check2.replace(".js","")
            check2 = check2.replace("js","")
            if check1 == check2:
                self.document_pepare_set.add(sdl)
                return True
   
        found = False      
        words = text.split()
        # check word by word
        for word in words:   
            if word in self.skill_dict_list:
                self.document_pepare_set.add(word)
                found = True
            elif word in self.group_dict_list:
                self.document_pepare_set.add(word)
                found = True
        
        return found 
        
    def GroupTextVectorization(self):
        for word in self.group_dict_list:
            if self.nlp.vocab[word].has_vector == True:
                vector_word = self.nlp(word)
                if vector_word not in self.vector_group_dict_list:
                    self.vector_group_dict_list[vector_word] = set()
                self.vector_group_dict_list[vector_word].add(word)

    def VectorSearch(self, word):
        if self.nlp.vocab[word].has_vector == True:
            vector_word = self.nlp(word)
            for vw in self.vector_group_dict_list:
                similarity_score = vector_word.similarity(vw)
                if similarity_score >= 0.9:
                    for w in vector_group_dict_list[vw]:
                        print(w)

    def CopyReplaceFolder(self, source_dir ,dest_dir , filename): 
        keyword = ""
        if dest_dir == "unknown":
            keyword = filename + " in tech"
        else:
            keyword = filename
        self.ReClassificationSkillDictList(filename, keyword , {dest_dir})
        dest_dir = "skill classified/" + dest_dir
        if not os.path.exists(dest_dir):
            os.makedirs(dest_dir)
        source_path_doc = source_dir + "/" + filename + ".docx"
        source_path_html = source_dir + "/" + filename + ".html"
        destination_path_doc =  dest_dir + "/" + filename + ".docx"
        destination_path_html = dest_dir + "/" + filename + ".html"
        if source_path_doc != destination_path_doc:
            shutil.copyfile(source_path_doc, destination_path_doc)
        if source_path_html != destination_path_html:
            shutil.copyfile(source_path_html, destination_path_html)

    def MakeDocsFromHtml(self):
        dir = 'skill unclassified/not tech/'
        filenames = [f for f in listdir(dir) if isfile(join(dir, f))]
        for f in filenames:
            print(f)
            words = f.rsplit(".")
            extension = words[len(words)-1]
            if extension =="html":
                filename = f.replace(".html","")
                output = pypandoc.convert_file(dir + "/" + f, 'docx', outputfile= dir + "/" + filename +".docx")
 
    def DeleteAllSkillFile(self):
        for dir in self.three_word_skill_classification_set:
            path = "skill classified/" + dir
            if os.path.isdir(path):
                for filename in os.listdir(path):
                    file_path = os.path.join(path, filename)
                    try:
                        if os.path.isfile(file_path) or os.path.islink(file_path):
                            os.unlink(file_path)
                        elif os.path.isdir(file_path):
                            shutil.rmtree(file_path)
                    except Exception as e:
                        print(f'Failed to delete {file_path}. Reason: {e}')
        for dir in self.two_word_skill_classification_set:
            path = "skill classified/" + dir
            if os.path.isdir(dir):
                for filename in os.listdir(path):
                    file_path = os.path.join(path, filename)
                    try:
                        if os.path.isfile(file_path) or os.path.islink(file_path):
                            os.unlink(file_path)
                        elif os.path.isdir(file_path):
                            shutil.rmtree(file_path)
                    except Exception as e:
                        print(f'Failed to delete {file_path}. Reason: {e}')
        for dir in self.one_word_skill_classification_set:
            path = "skill classified/" + dir
            if os.path.isdir(path):
                for filename in os.listdir(path):
                    file_path = os.path.join(path, filename)
                    try:
                        if os.path.isfile(file_path) or os.path.islink(file_path):
                            os.unlink(file_path)
                        elif os.path.isdir(file_path):
                            shutil.rmtree(file_path)
                    except Exception as e:
                        print(f'Failed to delete {file_path}. Reason: {e}')
        source_dir = 'skill'
        destination_dir = 'skill classified/unknown'
        os.makedirs(destination_dir, exist_ok=True)

        for file_name in os.listdir(source_dir):
            source_file = os.path.join(source_dir, file_name)
            destination_file = os.path.join(destination_dir, file_name)
            shutil.copy(source_file, destination_file)
        
    def SkillReClassification(self): 
        self.backup_keyword_dict_list.clear()
        for s in self.skill_dict_list:
            self.backup_keyword_dict_list[s] = self.skill_dict_list[s].keyword_search
        
        self.skill_dict_list.clear()
        self.group_dict_list.clear()
        self.vector_group_dict_list.clear()
        self.DeleteAllSkillFile()
        h = html2text.HTML2Text()
        h.ignore_links = False
        h.inline_links = False
        h.reference_links = True
        dir = 'skill classified/unknown'
        filenames = [f for f in listdir(dir) if isfile(join(dir, f))]
         
        for f in filenames:
            words = f.rsplit(".")
            extension = words[len(words)-1]
            if extension =="html":
                filename = f.replace(".html","")
                html_content = str("")
                with open(dir+"/"+ f, 'r', encoding="utf-8") as file:
                    html_content = file.read()
                    file.close()
                text_content = h.handle(html_content)
                text_content = text_content.lower()
                text_content = text_content.replace("[1]","")
                text_content = text_content.replace("[2]","")
                text_content = text_content.replace("[3]","")
                text_content = text_content.replace("[4]","")
                text_content = text_content.replace("[5]","")
                text_content = text_content.replace("[6]","")
                text_content = text_content.replace("[7]","")
                text_content = text_content.replace("[8]","")
                text_content = text_content.replace("[9]","")
                text_content = text_content.replace("[0]","")
                text_content = text_content.replace("[","")
                text_content = text_content.replace("]","")
                text_content = text_content.replace("(","")
                text_content = text_content.replace(")","")
                text_content = text_content.replace("*","")
                text_content = text_content.replace("\"","")
                text_content = text_content.replace("’s","")
                text_content = text_content.replace("!","")
                text_content = text_content.replace(":","")
                text_content = text_content.replace(",","")
                text_content = text_content.replace("\n"," ")
                text_content = text_content.replace("/"," ")
                text_content = text_content.replace("-"," ")
              
                words = text_content.split()
                have_classific = False
                for i in range(len(words)):
                    first_word = words[i]
                    if first_word.endswith('.'):
                        first_word = first_word[:-1]
                   
                    one_word = first_word
                    one_word = one_word.replace("microservices","microservice")
                    one_word = one_word.replace("protocols","protocol")
                    one_word = one_word.replace("networks","network")
                    one_word = one_word.replace("website","web")
                    one_word = one_word.replace("test","testing")
                    one_word = one_word.replace("visualizations","visualization")
                    one_word = one_word.replace("aws","amazon")
        
                    if one_word in self.one_word_skill_classification_set:
                        self.CopyReplaceFolder(dir,one_word,filename)
                        have_classific = True
        
                    if one_word =="ai":
                        one_word = "artificial intelligence"
                        if one_word in self.two_word_skill_classification_set:
                            self.CopyReplaceFolder(dir,one_word,filename)
                            have_classific = True
                    if one_word =="api":
                        one_word = "application programming interface"
                        if one_word in self.three_word_skill_classification_set:
                            self.CopyReplaceFolder(dir,one_word,filename)
                            have_classific = True
                    if one_word =="nlp":
                        one_word = "natural language processing"
                        if one_word in self.three_word_skill_classification_set:
                            self.CopyReplaceFolder(dir,one_word,filename)
                            have_classific = True

                    if i + 1 >= len(words):
                        break
                    second_word = words[i+1]
                    if second_word.endswith('.'):
                        second_word = second_word[:-1] 
                
                    two_word = first_word + " " + second_word
                    two_word = two_word.replace(" servers"," server")
                    two_word = two_word.replace(" services"," service")
                    two_word = two_word.replace(" applications"," application")
                    two_word = two_word.replace(" apps"," application")
                    two_word = two_word.replace(" app"," application")
                    two_word = two_word.replace(" databases"," database")
                    two_word = two_word.replace(" machines"," machine")
                    two_word = two_word.replace("website","web")
                      
                    if two_word in self.two_word_skill_classification_set:
                        self.CopyReplaceFolder(dir,two_word,filename)
                        have_classific = True

                    if i + 2 >= len(words):
                        break
                    third_word = words[i+2]
                    if third_word.endswith('.'):
                        third_word = third_word[:-1] 
                    three_word = first_word + " " + second_word + " " +  third_word
                    if three_word in self.three_word_skill_classification_set:
                        self.CopyReplaceFolder(dir,three_word,filename)
                        have_classific = True
                
                if have_classific == True:
                    file_path = dir +"/" + filename + ".html"
                    if os.path.isfile(file_path):
                        os.remove(file_path)
                    else:
                        print(f"The file {file_path} does not exist.")
                    file_path = dir +"/" + filename + ".docx"
                    if os.path.isfile(file_path):
                        os.remove(file_path)
                    else:
                        print(f"The file {file_path} does not exist.")
                else:
                    self.ReClassificationSkillDictList(filename, filename + " in tech" , {"unknown"})
                        
        self.GroupTextVectorization()
        self.ExportSkillDictList()
        self.ExportGroupDictList()

    def ClassificationUnClassifedSkill(self):
        h = html2text.HTML2Text()
        h.ignore_links = False
        h.inline_links = False
        h.reference_links = True
        dir = 'skill unclassified/not tech'
        filenames = [f for f in listdir(dir) if isfile(join(dir, f))]
        one_word_dict_list = {}
        two_word_dict_list = {}
        three_word_dict_list = {}
        ignore_word_list = ["a","an","the","of","on","as","by","to","with","for","is","are","was","were", "in"]
        tech_word_list = ["software" ,"application", "applications", "platform", "platforms","api", "web", "website","network","networks","security","architecture", "development" , "system", "systems", "language", "cloud", "data", "open","source", "windows"]
        for f in filenames:
            words = f.rsplit(".")
            extension = words[len(words)-1]
            if extension =="html":
                filename = f.replace(".html","")
                html_content = str("")
                with open(dir+"/"+ f, 'r', encoding="utf-8") as file:
                    html_content = file.read()
                    file.close()
                text_content = h.handle(html_content)
                text_content = text_content.lower()
                text_content = text_content.replace("[1]","")
                text_content = text_content.replace("[2]","")
                text_content = text_content.replace("[3]","")
                text_content = text_content.replace("[4]","")
                text_content = text_content.replace("[5]","")
                text_content = text_content.replace("[6]","")
                text_content = text_content.replace("[7]","")
                text_content = text_content.replace("[8]","")
                text_content = text_content.replace("[9]","")
                text_content = text_content.replace("[0]","")
                text_content = text_content.replace("[","")
                text_content = text_content.replace("]","")
                text_content = text_content.replace("(","")
                text_content = text_content.replace(")","")
                text_content = text_content.replace("*","")
                text_content = text_content.replace("\"","")
                text_content = text_content.replace("’s","")
                text_content = text_content.replace("!","")
                text_content = text_content.replace(":","")
                text_content = text_content.replace(",","")
                text_content = text_content.replace("\n"," ")
                text_content = text_content.replace("/"," ")
                text_content = text_content.replace("-"," ")
                words = text_content.split()
                is_tech = False
                for i in range(len(words)):
                    if words[i] in tech_word_list:
                        is_tech = True
                        break
                if is_tech == True:
                    source_file = os.path.join("skill unclassified/not tech", file_name)
                    destination_file = os.path.join("skill unclassified/tech", file_name)
                    shutil.copy(source_file, destination_file)
                    
    def FindClassificationKeyword(self):
        h = html2text.HTML2Text()
        h.ignore_links = False
        h.inline_links = False
        h.reference_links = True
        dir = 'skill classified/unknown'
        filenames = [f for f in listdir(dir) if isfile(join(dir, f))]
        one_word_dict_list = {}
        two_word_dict_list = {}
        three_word_dict_list = {}
        ignore_word_list = ["a","an","the","of","on","as","by","to","with","for","is","are","was","were", "in"]
        for f in filenames:
            words = f.rsplit(".")
            extension = words[len(words)-1]
            if extension =="html":
                filename = f.replace(".html","")
                html_content = str("")
                with open(dir+"/"+ f, 'r', encoding="utf-8") as file:
                    html_content = file.read()
                    file.close()
                text_content = h.handle(html_content)
                text_content = text_content.lower()
                text_content = text_content.replace("[1]","")
                text_content = text_content.replace("[2]","")
                text_content = text_content.replace("[3]","")
                text_content = text_content.replace("[4]","")
                text_content = text_content.replace("[5]","")
                text_content = text_content.replace("[6]","")
                text_content = text_content.replace("[7]","")
                text_content = text_content.replace("[8]","")
                text_content = text_content.replace("[9]","")
                text_content = text_content.replace("[0]","")
                text_content = text_content.replace("[","")
                text_content = text_content.replace("]","")
                text_content = text_content.replace("(","")
                text_content = text_content.replace(")","")
                text_content = text_content.replace("*","")
                text_content = text_content.replace("\"","")
                text_content = text_content.replace("’s","")
                text_content = text_content.replace("!","")
                text_content = text_content.replace(":","")
                text_content = text_content.replace(",","")
                text_content = text_content.replace("\n"," ")
                text_content = text_content.replace("/"," ")
                text_content = text_content.replace("-"," ")
                words = text_content.split()
                for i in range(len(words)):
                    first_word = words[i]
                    if '1.' in first_word:
                        break
                    if first_word.endswith('.'):
                        first_word = first_word[:-1]
                    if first_word in ignore_word_list:
                        continue
                    one_word = first_word
                    if one_word not in one_word_dict_list:
                        one_word_dict_list[one_word] = 0
                    one_word_dict_list[one_word]+=1
                    
                    second_word = words[i+1]
                    if second_word.endswith('.'):
                        second_word = second_word[:-1] 
                    if second_word in ignore_word_list:
                        continue
                    two_word = first_word + " " + second_word
                    if two_word not in two_word_dict_list:
                        two_word_dict_list[two_word] = 0
                    two_word_dict_list[two_word]+=1

                    third_word = words[i+2]
                    if third_word.endswith('.'):
                        third_word = third_word[:-1] 
                    if third_word in ignore_word_list:
                        continue
                    three_word = first_word + " " + second_word + " " +third_word
                    if three_word not in three_word_dict_list:
                        three_word_dict_list[three_word] = 0
                    three_word_dict_list[three_word]+=1
        with open('count one word.txt', 'w', encoding="utf-8" ) as f:
            for s in sorted(one_word_dict_list, key=one_word_dict_list.get, reverse=True):
                f.write(str(s) + " - " + str(one_word_dict_list[s]))
                f.write('\n')
            file.close()
        with open('count two word.txt', 'w', encoding="utf-8") as f:
            for s in sorted(two_word_dict_list, key=two_word_dict_list.get, reverse=True):
                f.write(str(s) + " - " + str(two_word_dict_list[s]))
                f.write('\n')
            file.close()
        with open('count three word.txt', 'w', encoding="utf-8") as f:
            for s in sorted(three_word_dict_list, key=three_word_dict_list.get, reverse=True):
                f.write(str(s) + " - " + str(three_word_dict_list[s]))
                f.write('\n')
            file.close()
          
    
    def GenerateLearningResource(self,your_skills, job_skills):
        skills = set()
        if your_skills is not None:
            skills =  job_skills -  your_skills 
        else:
            skills = job_skills

        if len(skills) == 0:
            print("you are good.")
            return False
        
        self.document_pepare_set.clear()

        for s in skills:
            s = self.Filter(s)
            if s != "":
                found = self.Search(s)
                if found == False:
                    self.AddNotFoundDictList(s,s + " in tech")
                    
        if len(self.document_pepare_set) == 0 :
            print("No any learning resource generated.")
            return False
        print("not found")
        for d in self.not_found_dict_list:
            print(d)
        html_content = ""
        for d in self.document_pepare_set:
            path = ""
            if d in self.skill_dict_list:
                v = self.skill_dict_list.get(d)
                path = v.resource_path
            elif d in self.skill_dict_list:
                v = self.group_dict_list.get(d)
                path = v.resource_path
            else:
                continue

            if os.path.isfile(path) == False:
                print(d,"not found in",path)
            else:
                with open(path, "r", encoding="utf-8") as file:
                    title = d.title()
                    html_content +="<h1><u><b>"
                    html_content += title
                    html_content +="</b></u></h1>"
                    html_content += file.read()
                file.close()
        with open("learning resource.html", 'w', encoding='utf-8') as file:
            file.write(html_content)  
            file.close()
        output = pypandoc.convert_text(html_content, 'docx', format='html', outputfile='learning resource.docx')
        if output == "":
            print("Document output sucessfully.")
            return True
        else:
            print("Document output failed")
            return False
                 
            
    def AllThisWillBeRemoveOnceFinalize(self):
        
        self.exact_match_replace_dict_list["aws"]="amazon web services"
        self.exact_match_replace_dict_list["tdd"]="testing"
        self.exact_match_replace_dict_list["webdriver"]="web crawler"
        self.exact_match_replace_dict_list["vbnet"]="visual basic .net"
        self.exact_match_replace_dict_list["vb.net"]="visual basic .net"
        self.exact_match_replace_dict_list["vb"]="visual basic"
        self.exact_match_replace_dict_list["html5"]="html"
        self.exact_match_replace_dict_list["svn"]="subversion"
        self.exact_match_replace_dict_list["rdbms"]="relational"
        self.exact_match_replace_dict_list["unity3d"]="unity"
        self.exact_match_replace_dict_list["mssql"]="microsoft sql"
        self.exact_match_replace_dict_list["shaders"]="shader"
        self.exact_match_replace_dict_list["uat"]="testing"
        self.exact_match_replace_dict_list["mui"]="material ui"
        self.exact_match_replace_dict_list["gui"]="graphical user interface"
        self.exact_match_replace_dict_list["ui"]="user interface"
        self.exact_match_replace_dict_list["mq"]="message queue"
        self.exact_match_replace_dict_list["aliyun"]="alibaba cloud"
        self.exact_match_replace_dict_list["ali-cloud"]="alibaba cloud"
        
        self.partial_match_replace_dict_list["ms"]="microsoft"
        self.partial_match_replace_dict_list["system"]="systems"
        self.partial_match_replace_dict_list["window"]="windows"
        self.partial_match_replace_dict_list["databases"]="database"
        self.partial_match_replace_dict_list["website"]="web"
        self.partial_match_replace_dict_list["test"]="testing"
        self.partial_match_replace_dict_list["networking"]="network"
        self.partial_match_replace_dict_list["solarwinds"]="solarwind"

     
        
        self.ExportMatchReplaceDictList()
     
      

In [2]:
test = TechStack()
f = open("nodeflair skill.txt", "r")
skills = set()
for c in f:
    c = c.replace("\n", "")
    if c == 'x':
        continue
    skills.add(c)
f.close
result = test.GenerateLearningResource(None, skills)
test.ExportNotFoundSet()

not found
canal
ireport
mantis
cocoa framework
jersey
ranger
gauge
charles
hooks
iss
quartz
jws
autonomy
xray
kepler
bourne
kvs
onemap
fission
nose
karate
endur
flux
zeppelin
dapresy
lora
spa
polymer
amber
bamboo
fresco
hudson
sonar
ecr
ems
mode
metal
retrofit
container
combine
bottle
abc
apollo
aquadata
fn
stripe
dash
code climate
dojo
jackson
enzyme
adobe
mocha
concourse
entity
unicon
swing
fisheye
graphite
drone
modular
mvp
scout
fink
karma
photoshop
tencent
leaflet
trac
segment
adobe xd
busted
hyperion
jcr
insomnia
dat
studs
relay
epoxy
eclipse
nexus
yii framework
amplitude
viper
cvs
graven
edb
quasar
openauth
espresso
Document output sucessfully.


In [None]:
#openauth not found in unknown/openauth.html
#elastic bean stalk not found in unknown/elastic bean stalk.html
#https://stackoverflow.com/questions/75475470/how-to-extract-the-all-hyperlink-and-their-text-from-a-word-document-using-pytho

In [9]:
import html2text

html_content = str("")
with open('unknown/c++.html', 'r', encoding="utf-8") as file:
    html_content = file.read()
file.close()

h = html2text.HTML2Text()

h.ignore_links = False
h.inline_links = False
h.reference_links = True

# Convert HTML to text with separated links
text_content = h.handle(html_content)

print(text_content)


![C++][1]

![C++][2]

Explore

Certainly! **C++** is a **cross-platform programming language** that extends
the capabilities of the C language, providing high control over system
resources and memory. [It’s widely used for creating high-performance
applications, operating systems, and embedded systems][3][1][3][2][4][3][5].

Here are **five free resources** where you can learn C++:

  1. [****][3]**[W3Schools C++ Introduction][3]** : This tutorial covers the basics of C++, including syntax, variables, and development[1][3].
  2. [****][3]**[LearnCpp.com][6]** : A comprehensive website with step-by-step tutorials, examples, and quizzes to help you master C++ programming[4][6].
  3. [****][3]**[Programiz C++ Tutorial][7]** : Offers interactive lessons, examples, and references for learning C++[5][7].
  4. [****][3]**[Codecademy C++ Course][8]** : A beginner-friendly course that covers C++ essentials for software development[6][8].
  5. **Official C++ Documentation** : The official docume

In [10]:
from flask import Flask, request, jsonify, send_file, after_this_request
import zipfile
import os
from io import BytesIO

app = Flask(__name__)

@app.route('/get_zip', methods=['GET'])
def get_zip():
    # Retrieve parameters from the GET request
    param1 = request.args.get('param1')
    param2 = request.args.get('param2')

    # Create a zip file in memory
    memory_file = BytesIO()
    with zipfile.ZipFile(memory_file, 'w') as zf:
        # Add files to the zip file using the parameters
        zf.writestr(f'{param1}.txt', f'Content for {param1}')
        zf.writestr(f'{param2}.txt', f'Content for {param2}')
    memory_file.seek(0)

    # Define a function to remove the zip file after sending it
    @after_this_request
    def remove_file(response):
        try:
            os.remove(zip_path)
        except Exception as error:
            app.logger.error("Error removing or closing downloaded file handle", error)
        return response

    # Send the zip file
    response = send_file(memory_file, attachment_filename='files.zip', as_attachment=True)

    # Return the JSON response with the download link
    return jsonify({'success': True, 'message': 'Files are ready for download', 'download_link': '/get_zip'})

if __name__ == '__main__':
    app.run(debug=True)


ModuleNotFoundError: No module named 'flask'

done
