## Importing Modules

In [1]:
import pandas as pd
import en_core_web_sm
from spacy.matcher import PhraseMatcher

In [2]:
nlp = en_core_web_sm.load()

In [3]:
import PyPDF2
pdfFileObj = open('resume.pdf', 'rb')
pdfReader = PyPDF2.PdfReader(pdfFileObj)
print(len(pdfReader.pages))
pageObj = pdfReader.pages[0]
print(pageObj.extract_text ())
text=pageObj.extract_text ()
pdfFileObj.close()

1
Abhinav Sreekumar
+919894373905 |abhinavsreekumarofficial@gmail.com |https://www.linkedin.com/in/abhinav-sreekumar |
https://github.com/ASKOFFICIAL
Education
Vellore Institute of Technology Vellore, Tamil Nadu
Bachelor of Technology in Computer Science. CGPA: 8.34 2020 – ongoing
St. Jude’s Public School and Junior College Kotagiri, Tamil Nadu
Indian School Certificate 12th Grade. Percentage: 94.5 – 2020
Riverside Public School Kotagiri, Tamil Nadu
Indian Certificate of Secondary Education 10th Grade. Percentage: 92.8 – 2018
Experience
Full Stack Machine Learning Intern February 2022 – April 2023
Systematrix Inc. Georgia, Atlanta
•Trained extensively with Neo4J a graph database for Financial Data to detect fraud and helped maintain and
modify the UI used by fraud investigators
•Added Multiple features to the front end D3JS object used to visualise the data such as adding softlinks between
nodes disconnected in the database
•Developed a feature which queries MongoDB with all the nodes 

## Reading in Keywords

In [4]:
keyword_dict = pd.read_csv('Skills_Keywords.csv')
keyword_dict.head()

Unnamed: 0,Statistics,Machine Learning,Deep Learning,R Language,Python Language,NLP,Data Engineering
0,statistical models,linear regression,neural network,r,python,nlp,laws
1,statistical modeling,logistic regression,keras,ggplot,flask,natural language processing,ec2
2,probability,K means,theano,shiny,django,topic modeling,amazon redshift
3,normal distribution,random forest,face detection,cran,pandas,Ida,s3
4,poisson distribution,xgboost,neural networks,dplyr,numpy,named entity recognition,docker


### Creating lists of tokenized keywords for each category for pattern matching

In [5]:
stats_words = [nlp(text) for text in keyword_dict['Statistics'].dropna(axis = 0)]
nlp_words = [nlp(text) for text in keyword_dict['NLP'].dropna(axis = 0)]
ml_words = [nlp(text) for text in keyword_dict['Machine Learning'].dropna(axis = 0)]
dl_words = [nlp(text) for text in keyword_dict['Deep Learning'].dropna(axis = 0)]
r_words = [nlp(text) for text in keyword_dict['R Language'].dropna(axis = 0)]
python_words = [nlp(text) for text in keyword_dict['Python Language'].dropna(axis = 0)]
data_eng_words = [nlp(text) for text in keyword_dict['Data Engineering'].dropna(axis = 0)]

In [6]:
stats_words

[statistical models,
 statistical modeling,
 probability,
 normal distribution,
 poisson distribution,
 survival models,
 hypothesis testing,
 bayesian inference,
 factor analysis,
 forecasting,
 markow chain,
 monte carlo]

### Creating Pattern Matcher for Keyword Extraction

In [7]:
matcher = PhraseMatcher(nlp.vocab)

In [8]:
matcher.add('Stats', None, *stats_words)
matcher.add('NLP', None, *nlp_words)
matcher.add('ML', None, *ml_words)
matcher.add('DL', None, *dl_words)
matcher.add('R', None, *r_words)
matcher.add('Python', None, *python_words)
matcher.add('Data Engineering', None, *data_eng_words)

## Keyword Extraction

In [9]:
d = {}
doc = nlp(text)
print(doc)
# Find matches in the doc
matches = matcher(doc)

# For each of the matches
idx=0
for match_id, start, end in matches:
    # Get the general word and the matched phrase
    gen_word = nlp.vocab.strings[match_id]
    match = doc[start:end]

    # Append all the keywords specific to a resume ID
    d.setdefault(gen_word, []).append(match.text)
    idx+=1

Abhinav Sreekumar
+919894373905 |abhinavsreekumarofficial@gmail.com |https://www.linkedin.com/in/abhinav-sreekumar |
https://github.com/ASKOFFICIAL
Education
Vellore Institute of Technology Vellore, Tamil Nadu
Bachelor of Technology in Computer Science. CGPA: 8.34 2020 – ongoing
St. Jude’s Public School and Junior College Kotagiri, Tamil Nadu
Indian School Certificate 12th Grade. Percentage: 94.5 – 2020
Riverside Public School Kotagiri, Tamil Nadu
Indian Certificate of Secondary Education 10th Grade. Percentage: 92.8 – 2018
Experience
Full Stack Machine Learning Intern February 2022 – April 2023
Systematrix Inc. Georgia, Atlanta
•Trained extensively with Neo4J a graph database for Financial Data to detect fraud and helped maintain and
modify the UI used by fraud investigators
•Added Multiple features to the front end D3JS object used to visualise the data such as adding softlinks between
nodes disconnected in the database
•Developed a feature which queries MongoDB with all the nodes op

In [10]:
d

{'Python': ['python', 'pandas', 'python']}

In [17]:
from question.pipelines import pipeline
nlp = pipeline("multitask-qa-qg", model="valhalla/t5-base-qa-qg-hl")

In [18]:
texts={"Python":"""Python is a high-level, interpreted programming language that was first released in 1991. It has a simple syntax and is easy to learn, which makes it a popular choice for beginners. Python is used for a wide range of tasks, from web development and data analysis to artificial intelligence and scientific computing. \
One of the main features of Python is its extensive standard library, which includes modules for working with regular expressions, network protocols, GUI development, and more. Python also has a large community of developers who have created many third-party libraries and tools for working with specific tasks or domains. \
Python supports multiple programming paradigms, including procedural, object-oriented, and functional programming. It also has a dynamic type system and automatic memory management, which makes it easy to write and debug code. \
Python can be used on a variety of platforms, including Windows, macOS, and Linux. There are several popular integrated development environments (IDEs) available for Python, including PyCharm, Visual Studio Code, and Spyder. \
Python is often used in scientific computing and data analysis, thanks to its support for libraries like NumPy, Pandas, and Matplotlib. It's also commonly used in web development, with popular frameworks like Django and Flask. \
Overall, Python is a versatile and powerful programming language that is widely used in industry, academia, and research."""}

In [19]:
for i in d.keys():
    nlp(texts[i])
#Dataset Link Code Link Code Reference Tasks Link Errors Faced Research Papers

1991 "Python is a high-level, interpreted programming language that was first released in 1991.
simple syntax It has a simple syntax and is easy to learn, which makes it a popular choice for beginners.
artificial intelligence and scientific computing Python is used for a wide range of tasks, from web development and data analysis to artificial intelligence and scientific computing.
its extensive standard library One of the main features of Python is its extensive standard library, which includes modules for working with regular expressions, network protocols, GUI development, and more.
developers Python also has a large community of developers who have created many third-party libraries and tools for working with specific tasks or domains.
multiple programming paradigms Python supports multiple programming paradigms, including procedural, object-oriented, and functional programming.
automatic memory management It also has a dynamic type system and automatic memory management, which mak

[{'answer': '1991', 'question': 'When was Python first released?'},
 {'answer': 'simple syntax',
  'question': 'What does Python have that makes it a popular choice for beginners?'},
 {'answer': 'artificial intelligence and scientific computing',
  'question': 'Python is used for a wide range of tasks, from web development and data analysis to what?'},
 {'answer': 'its extensive standard library',
  'question': 'What is one of the main features of Python?'},
 {'answer': 'developers', 'question': 'Python has a large community of what?'},
 {'answer': 'multiple programming paradigms',
  'question': 'Python supports what types of programming paradigms?'},
 {'answer': 'automatic memory management',
  'question': 'What feature makes Python easy to write and debug code?'},
 {'answer': 'Windows, macOS, and Linux',
  'question': 'What are some platforms that Python can be used on?'},
 {'answer': 'PyCharm, Visual Studio Code, and Spyder',
  'question': 'What are some popular IDEs for Python?'},


In [4]:
from resume_question import generate
generate('resume.pdf')

____________________THE DOCUMENT________________
Abhinav Sreekumar
+919894373905 |abhinavsreekumarofficial@gmail.com |https://www.linkedin.com/in/abhinav-sreekumar |
https://github.com/ASKOFFICIAL
Education
Vellore Institute of Technology Vellore, Tamil Nadu
Bachelor of Technology in Computer Science. CGPA: 8.34 2020 – ongoing
St. Jude’s Public School and Junior College Kotagiri, Tamil Nadu
Indian School Certificate 12th Grade. Percentage: 94.5 – 2020
Riverside Public School Kotagiri, Tamil Nadu
Indian Certificate of Secondary Education 10th Grade. Percentage: 92.8 – 2018
Experience
Full Stack Machine Learning Intern February 2022 – April 2023
Systematrix Inc. Georgia, Atlanta
•Trained extensively with Neo4J a graph database for Financial Data to detect fraud and helped maintain and
modify the UI used by fraud investigators
•Added Multiple features to the front end D3JS object used to visualise the data such as adding softlinks between
nodes disconnected in the database
•Developed a fe