# Job Description (JD) Summarizer

## Install Required Libraries

In [14]:
# Install required libraries

!pip install nltk spacy sumy rake-nltk
!python -m spacy download en_core_web_sm
!pip uninstall -y spacy pydantic

Collecting rake-nltk
  Obtaining dependency information for rake-nltk from https://files.pythonhosted.org/packages/3b/e5/18876d587142df57b1c70ef752da34664bb7dd383710ccf3ccaefba2aa0c/rake_nltk-1.0.6-py3-none-any.whl.metadata
  Downloading rake_nltk-1.0.6-py3-none-any.whl.metadata (6.4 kB)
Downloading rake_nltk-1.0.6-py3-none-any.whl (9.1 kB)
Installing collected packages: rake-nltk
Successfully installed rake-nltk-1.0.6

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be 

In [2]:
!pip install spacy pydantic==1.10.13
!python -m spacy download en_core_web_sm


Collecting spacy
  Obtaining dependency information for spacy from https://files.pythonhosted.org/packages/4c/d9/439aed2e686b30aef877dea6181852810ae9dc2fcbc9003e9eedb543e463/spacy-3.8.4-cp311-cp311-macosx_10_9_x86_64.whl.metadata
  Downloading spacy-3.8.4-cp311-cp311-macosx_10_9_x86_64.whl.metadata (27 kB)
Collecting pydantic==1.10.13
  Obtaining dependency information for pydantic==1.10.13 from https://files.pythonhosted.org/packages/61/ed/dab1c82927bca9c2b510f6aec170036efbc382c2bb3929a23f325fc30ce1/pydantic-1.10.13-cp311-cp311-macosx_10_9_x86_64.whl.metadata
  Downloading pydantic-1.10.13-cp311-cp311-macosx_10_9_x86_64.whl.metadata (149 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m149.6/149.6 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting thinc<8.4.0,>=8.3.4 (from spacy)
  Obtaining dependency information for thinc<8.4.0,>=8.3.4 from https://files.pythonhosted.org/packages/85/47/68187c78a04cdc31cbd3ae393068f994b60476b5ecac6dfe

In [16]:
# Import libraries
import nltk
import spacy
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer
from sumy.summarizers.text_rank import TextRankSummarizer
from collections import Counter
from rake_nltk import Rake
import re
print(f"spaCy version: {spacy.__version__}")


spaCy version: 3.8.4


In [4]:
# Load spaCy model
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    print("Downloading 'en_core_web_sm' model...")
    !python -m spacy download en_core_web_sm
    nlp = spacy.load("en_core_web_sm")

In [5]:
# Then test pydantic import
import pydantic
print(f"pydantic version: {pydantic.__version__}")

pydantic version: 1.10.13


## Import Libraries

In [6]:
import nltk
import spacy
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.lsa import LsaSummarizer
from collections import Counter

# Download NLTK data
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Error loading punkt: <urlopen error [SSL:
[nltk_data]     CERTIFICATE_VERIFY_FAILED] certificate verify failed:
[nltk_data]     unable to get local issuer certificate (_ssl.c:1002)>
[nltk_data] Error loading stopwords: <urlopen error [SSL:
[nltk_data]     CERTIFICATE_VERIFY_FAILED] certificate verify failed:
[nltk_data]     unable to get local issuer certificate (_ssl.c:1002)>


False

In [7]:
import spacy
import pydantic
print("spaCy version:", spacy.__version__)
print("pydantic version:", pydantic.__version__)

spaCy version: 3.8.4
pydantic version: 1.10.13


## Text Preprocessing

In [17]:
# Text preprocessing
def preprocess_text(text):
    # Load spaCy model
    nlp = spacy.load("en_core_web_sm")

    # Tokenize, remove stopwords, and lemmatize
    doc = nlp(text)
    tokens = [token.lemma_ for token in doc if not token.is_stop and not token.is_punct and token.is_alpha]

    return tokens

## Keyword Extraction

In [18]:
# Keyword extraction using RAKE
def extract_keywords(text, top_n=10):
    rake = Rake()
    rake.extract_keywords_from_text(text)
    ranked_phrases = rake.get_ranked_phrases()[:top_n]
    return ranked_phrases


## Summarization

In [10]:
# Summarization using TextRank
def summarize_text(text, sentences_count=3):
    parser = PlaintextParser.from_string(text, Tokenizer("english"))
    summarizer = TextRankSummarizer()
    summary = summarizer(parser.document, sentences_count)
    return ' '.join([str(sentence) for sentence in summary])

## Putting It All Together

In [19]:
# Putting it all together
def summarize_job_description(jd_text):
    # Preprocess the text
    tokens = preprocess_text(jd_text)

    # Extract keywords
    keywords = extract_keywords(jd_text)

    # Summarize the text
    summary = summarize_text(jd_text)

    return {
        "keywords": keywords,
        "summary": summary
    }

## Example Usage

In [20]:
job_description = """
We are looking for a Python Developer to join our team. The ideal candidate should have experience with Python, Django, and Flask.
Responsibilities include developing and maintaining web applications, writing clean and scalable code, and collaborating with cross-functional teams.
The candidate should have a strong understanding of software development principles and be able to work in an agile environment.
Experience with REST APIs, databases, and version control systems like Git is a plus.
"""

result = summarize_job_description(job_description)
print("Keywords:", result["keywords"])
print("Summary:", result["summary"])

Keywords: ['version control systems like git', 'software development principles', 'responsibilities include developing', 'maintaining web applications', 'writing clean', 'strong understanding', 'scalable code', 'rest apis', 'functional teams', 'agile environment']
Summary: We are looking for a Python Developer to join our team. The candidate should have a strong understanding of software development principles and be able to work in an agile environment. Experience with REST APIs, databases, and version control systems like Git is a plus.


In [21]:
job_description = """
UniHub UniHub Dashboard James Notifications0 Spencer Academies Trust Nottingham, East Midlands, United Kingdom 1 job Teaching Intern United Kingdom £425 per week for full attendance & participation Full-time, Temporary Teaching Internship Programme takes place in Spencer Academies Trust schools across Nottinghamshire and Derbyshire. You will gain an insight into the life of a teacher and participate in training sessions. Applications close on 9 Feb 2025 Job details Teaching Internship Programme takes place in Spencer Academies Trust schools across Nottinghamshire and Derbyshire. You will gain an insight into the life of a teacher and participate in training sessions. Subjects: Maths Physics Chemistry Computing MFL. On the programme you will gain in-school experience, learn about the life of a teacher, support pupils in the classroom, shadow experienced teachers, discover the routes into teaching and participate in teaching. Eligibility You must be an undergraduate or master’s student, studying in a UK university, who is considering a career in teaching. The programme lasts for 3 weeks, 9th to 27th June 2025. You'll receive personalised post-programme support to assist you with getting into a career in teaching. More info Job types Graduate job Internship Expected commencement 9th June 2024 Posted 7 Nov 2024 Applications close on 9 Feb 2025 More jobs Independent Advisory Group – Volunteer – Derbyshire Derbyshire Constabulary Derbyshire, England, United Kingdom Youth Work Volunteer Safe and Sound Group First Floor East Mill, Darley Abbey Mills, Darley Abbey., Derby, Derbyshire, England, DE22 1DZ, United Kingdom Awareness Volunteer Safe and Sound Group First Floor East Mill, Darley Abbey Mills, Darley Abbey, Derby, Derbyshire, England, DE22 1DZ, United Kingdom
"""

result = summarize_job_description(job_description)
print("Keywords:", result["keywords"])
print("Summary:", result["summary"])

Keywords: ['info job types graduate job internship expected commencement 9th june 2024 posted 7 nov 2024 applications close', 'united kingdom 1 job teaching intern united kingdom £ 425 per week', 'jobs independent advisory group – volunteer – derbyshire derbyshire constabulary derbyshire', '9 feb 2025 job details teaching internship programme takes place', 'unihub unihub dashboard james notifications0 spencer academies trust nottingham', 'temporary teaching internship programme takes place', 'united kingdom youth work volunteer safe', 'spencer academies trust schools across nottinghamshire', 'spencer academies trust schools across nottinghamshire', 'sound group first floor east mill']
Summary: UniHub UniHub Dashboard James Notifications0 Spencer Academies Trust Nottingham, East Midlands, United Kingdom 1 job Teaching Intern United Kingdom £425 per week for full attendance & participation Full-time, Temporary Teaching Internship Programme takes place in Spencer Academies Trust schools a