In [8]:
import time

start_time = time.time()

# Install required packages
!apt update -y
!apt install tesseract-ocr poppler-utils -y
!pip install pytesseract pdf2image spacy PyMuPDF scikit-learn
!python -m spacy download en_core_web_sm

end_time = time.time()
print(f"Execution Time: {end_time - start_time:.2f} seconds")

Hit:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:2 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:3 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Hit:4 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:5 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:6 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:7 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Fetched 129 kB in 2s (59.3 kB/s)
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
133 packages can be upgraded. Run 'apt list --upgradable' to see them.
[1;33mW: [0mSkipping acquire of configured file 'main/source/Sources' as repo

In [9]:
start_time = time.time()
# Import libraries
from google.colab import drive
import os
import spacy
import fitz  # PyMuPDF
import pytesseract
from pdf2image import convert_from_path
import numpy as np
import re
import json
import random
from spacy.training.example import Example
from spacy.util import minibatch
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import io
import sys
import contextlib
import time
end_time = time.time()
print(f"Execution Time: {end_time - start_time:.2f} seconds")

Execution Time: 7.23 seconds


In [10]:
start_time = time.time()
# Suppress non-critical MuPDF errors
class MuPDFErrorFilter(io.StringIO):
    def write(self, text):
        if "MuPDF error: format error" not in text:
            sys.__stderr__.write(text)

# Mount Google Drive
drive.mount('/content/drive')
end_time = time.time()
print(f"Execution Time: {end_time - start_time:.2f} seconds")

Mounted at /content/drive
Execution Time: 114.32 seconds


In [11]:
start_time = time.time()
# Define paths
resumes_dir = "/content/drive/My Drive/ResumesPDF/"
output_dir = "/content/drive/My Drive/NER_Output/"
json_dir = os.path.join(output_dir, "annotations")
model_dir = os.path.join(output_dir, "ner_model")

# Create directories
for directory in [output_dir, json_dir, model_dir]:
    if not os.path.exists(directory):
        os.makedirs(directory)
        print(f"Created directory: {directory}")
    else:
        print(f"Directory already exists: {directory}")

if not os.path.exists(resumes_dir):
    print("Error: Resumes folder not found!")
    sys.exit(1)
else:
    print("Google Drive Mounted. Resumes loaded")
end_time = time.time()
print(f"Execution Time: {end_time - start_time:.2f} seconds")

Created directory: /content/drive/My Drive/NER_Output/
Created directory: /content/drive/My Drive/NER_Output/annotations
Created directory: /content/drive/My Drive/NER_Output/ner_model
Google Drive Mounted. Resumes loaded
Execution Time: 0.01 seconds


In [12]:
start_time = time.time()
# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Function to extract text from PDFs with OCR fallback
def extract_text_with_ocr(pdf_path):
    try:
        with contextlib.redirect_stderr(MuPDFErrorFilter()):
            doc = fitz.open(pdf_path)
        text = ""
        for page in doc:
            page_text = page.get_text("text")
            if page_text.strip():
                text += page_text + "\n"
        if text.strip():
            return text
        images = convert_from_path(pdf_path)
        ocr_text = ""
        for img in images:
            ocr_text += pytesseract.image_to_string(img) + "\n"
        return ocr_text if ocr_text.strip() else None
    except Exception as e:
        print(f"Error processing {pdf_path}: {str(e)}")
        return None

# Extract resumes
resumes = {}
total_pdfs = len([f for f in os.listdir(resumes_dir) if f.endswith(".pdf")])
processed, failed = 0, 0

print(f"Extracting {total_pdfs} PDFs...")
for file in os.listdir(resumes_dir):
    if file.endswith(".pdf"):
        processed += 1
        if processed % 100 == 0:
            print(f"Processed {processed}/{total_pdfs} PDFs...")
        text = extract_text_with_ocr(os.path.join(resumes_dir, file))
        if text:
            resumes[file] = text
        else:
            failed += 1
print(f"Extraction complete: {len(resumes)} successful, {failed} failed")
# Save extracted text to JSON
extracted_text_path = os.path.join(output_dir, "extracted_resumes.json")
with open(extracted_text_path, 'w', encoding='utf-8') as f:
    json.dump(resumes, f, ensure_ascii=False, indent=2)
print(f"Saved extracted text to {extracted_text_path}")
end_time = time.time()
print(f"Execution Time: {end_time - start_time:.2f} seconds")

Extracting 5050 PDFs...
Processed 100/5050 PDFs...
Processed 200/5050 PDFs...
Processed 300/5050 PDFs...
Processed 400/5050 PDFs...
Processed 500/5050 PDFs...
Processed 600/5050 PDFs...
Processed 700/5050 PDFs...
Processed 800/5050 PDFs...
Processed 900/5050 PDFs...
Processed 1000/5050 PDFs...
Processed 1100/5050 PDFs...
Processed 1200/5050 PDFs...
Processed 1300/5050 PDFs...
Processed 1400/5050 PDFs...
Processed 1500/5050 PDFs...
Processed 1600/5050 PDFs...
Processed 1700/5050 PDFs...
Processed 1800/5050 PDFs...
Processed 1900/5050 PDFs...
Processed 2000/5050 PDFs...
Processed 2100/5050 PDFs...
Processed 2200/5050 PDFs...
Processed 2300/5050 PDFs...
Processed 2400/5050 PDFs...
Processed 2500/5050 PDFs...
Processed 2600/5050 PDFs...
Processed 2700/5050 PDFs...
Processed 2800/5050 PDFs...
Processed 2900/5050 PDFs...
Processed 3000/5050 PDFs...
Processed 3100/5050 PDFs...
Processed 3200/5050 PDFs...
Processed 3300/5050 PDFs...
Processed 3400/5050 PDFs...
Processed 3500/5050 PDFs...
Proce

In [13]:
start_time = time.time()
# Align entity boundaries to token boundaries and strip whitespace/punctuation
def align_to_token_boundaries(text, start, end):
    doc = nlp.make_doc(text)
    tokens = [token for token in doc]
    new_start = start
    new_end = end
    for token in tokens:
        if token.idx <= start < token.idx + len(token.text):
            new_start = token.idx
        if token.idx < end <= token.idx + len(token.text):
            new_end = token.idx + len(token.text)
    # Strip leading/trailing whitespace or punctuation from the span
    span_text = text[new_start:new_end]
    stripped_start = new_start + len(span_text) - len(span_text.lstrip())
    stripped_end = new_end - len(span_text) + len(span_text.rstrip())
    return stripped_start, stripped_end

# Validate entity spans
def validate_entity(text, start, end, entity_type):
    if start < 0 or end > len(text) or start >= end:
        return False
    entity_text = text[start:end]
    if not entity_text.strip() or entity_text.isspace():
        return False
    doc = nlp.make_doc(text)
    entities = [(start, end, entity_type)]
    try:
        tags = spacy.training.offsets_to_biluo_tags(doc, entities)
        return '-' not in tags
    except ValueError:
        return False

# Generate annotations for a resume and save as JSON
def generate_annotations(resume_text, filename):
    annotations = {"text": resume_text, "entities": []}
    patterns = {
        "NAME": [(lambda t: next((line for line in t.split('\n') if line.strip() and len(line.strip().split()) <= 3), None))],
        "EMAIL": [re.compile(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b')],
        "PHONE": [re.compile(r'\b(?:\+?\d{1,2}\s?)?\(?\d{3}\)?[\s.-]?\d{3}[\s.-]?\d{4}\b')],
        "EDUCATION": [
            re.compile(r'\b(?:Bachelor|Master|PhD|BSc|MSc|MBA|degree)\b', re.IGNORECASE),
            re.compile(r'\bUniversity\s+of\s+[A-Za-z\s]+\b', re.IGNORECASE)
        ],
        "SKILL": [re.compile(r'\b(?:Python|Java|JavaScript|SQL|HTML|CSS|React|Angular|Node\.js|Machine Learning|Data Analysis|NLP)\b', re.IGNORECASE)],
        "EXPERIENCE": [
            re.compile(r'\b(?:years of experience|work experience|professional experience)\b', re.IGNORECASE),
            re.compile(r'\b(?:Senior|Junior|Lead|Manager|Director|Engineer|Developer|Analyst)\b', re.IGNORECASE)
        ]
    }

    priority = {"NAME": 1, "EMAIL": 2, "PHONE": 3, "EDUCATION": 4, "SKILL": 5, "EXPERIENCE": 6}

    entity_candidates = []
    for entity_type, pattern_list in patterns.items():
        for pattern in pattern_list:
            if callable(pattern):
                result = pattern(resume_text)
                if result:
                    start_idx = resume_text.find(result)
                    if start_idx >= 0:
                        end_idx = start_idx + len(result)
                        start_idx, end_idx = align_to_token_boundaries(resume_text, start_idx, end_idx)
                        if validate_entity(resume_text, start_idx, end_idx, entity_type):
                            entity_candidates.append((start_idx, end_idx, entity_type))
            else:
                for match in pattern.finditer(resume_text):
                    start_idx = match.start()
                    end_idx = match.end()
                    start_idx, end_idx = align_to_token_boundaries(resume_text, start_idx, end_idx)
                    if validate_entity(resume_text, start_idx, end_idx, entity_type):
                        entity_candidates.append((start_idx, end_idx, entity_type))

    # Resolve overlapping entities
    entity_candidates.sort(key=lambda x: (x[0], x[1], priority[x[2]]))
    final_entities = []
    last_end = -1
    for start, end, entity_type in entity_candidates:
        if start >= last_end:
            final_entities.append([start, end, entity_type])
            last_end = end
        elif priority[entity_type] < priority[final_entities[-1][2]]:
            final_entities[-1] = [start, end, entity_type]
            last_end = end

    annotations["entities"] = final_entities

    # Save annotations as JSON for every resume
    json_path = os.path.join(json_dir, f"{filename.split('.')[0]}_annotations.json")
    with open(json_path, 'w', encoding='utf-8') as f:
        json.dump(annotations, f, ensure_ascii=False, indent=2)
    print(f"Saved annotations to {json_path}")
    return annotations
end_time = time.time()
print(f"Execution Time: {end_time - start_time:.2f} seconds")

Execution Time: 0.00 seconds


In [14]:
start_time = time.time()
import json
import os
import spacy
from spacy.training import offsets_to_biluo_tags

# Generate and save training data for all resumes
print("Generating and saving training annotations for all resumes...")
training_data = []
valid_count = 0
failed_pdf_count = 0
invalid_text_count = 0
too_long_count = 0

for idx, (filename, text) in enumerate(resumes.items(), 1):
    if idx % 100 == 0:
        print(f"Processed {idx}/{len(resumes)} resumes for annotation...")
    try:
        # Check for failed PDFs (empty text)
        if not text or text.isspace():
            print(f"Skipping {filename} - Failed PDF (empty text)")
            failed_pdf_count += 1
            continue
        # Skip if text is too long (>50,000 chars)
        if len(text) > 50000:
            print(f"Skipping {filename} - text too long ({len(text)} chars)")
            annotation = {"text": text[:50000], "entities": []}
            json_path = os.path.join(json_dir, f"{filename.split('.')[0]}_annotations.json")
            with open(json_path, 'w', encoding='utf-8') as f:
                json.dump(annotation, f, ensure_ascii=False, indent=2)
            print(f"Saved annotations to {json_path}")
            too_long_count += 1
            continue
        # Generate annotations
        annotation = generate_annotations(text, filename)
        # Handle invalid annotations
        if isinstance(annotation, str) or not isinstance(annotation, dict) or "entities" not in annotation or "text" not in annotation:
            print(f"Skipping {filename} - Invalid annotation: {str(annotation)[:50]}...")
            invalid_text_count += 1
            continue
        # Validate entities
        doc = nlp.make_doc(annotation["text"])
        try:
            tags = offsets_to_biluo_tags(doc, annotation["entities"])
            if '-' in tags:
                print(f"Skipping {filename} - Misaligned entities: {annotation['entities'][:50]}...")
                invalid_text_count += 1
                continue
            training_data.append((text, annotation))
            valid_count += 1
            if valid_count % 100 == 0:
                print(f"Generated {valid_count} valid annotations so far...")
        except ValueError as e:
            print(f"Skipping {filename} - Validation error: {str(e)}")
            invalid_text_count += 1
    except Exception as e:
        print(f"Skipping {filename} - Error: {str(e)}")
        invalid_text_count += 1

# Log results
total_skipped = len(resumes) - len(training_data)
print(f"Generated {valid_count} valid annotations, skipped {total_skipped} (Failed PDFs: {failed_pdf_count}, Invalid text: {invalid_text_count}, Too long: {too_long_count})")

# Save training_data
import pickle
with open(os.path.join(json_dir, 'training_data.pkl'), 'wb') as f:
    pickle.dump(training_data, f)

# Reformat for training
training_data_for_training = [annotations for _, annotations in training_data]
end_time = time.time()
print(f"Execution Time: {end_time - start_time:.2f} seconds")

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Saved annotations to /content/drive/My Drive/NER_Output/annotations/cv (4783)_annotations.json
Saved annotations to /content/drive/My Drive/NER_Output/annotations/cv (4778)_annotations.json
Saved annotations to /content/drive/My Drive/NER_Output/annotations/cv (4782)_annotations.json
Saved annotations to /content/drive/My Drive/NER_Output/annotations/cv (4780)_annotations.json
Saved annotations to /content/drive/My Drive/NER_Output/annotations/cv (478)_annotations.json
Saved annotations to /content/drive/My Drive/NER_Output/annotations/cv (4784)_annotations.json
Saved annotations to /content/drive/My Drive/NER_Output/annotations/cv (4781)_annotations.json
Saved annotations to /content/drive/My Drive/NER_Output/annotations/cv (4793)_annotations.json
Saved annotations to /content/drive/My Drive/NER_Output/annotations/cv (4785)_annotations.json
Saved annotations to /content/drive/My Drive/NER_Output/annotations/cv (4787)_ann

In [15]:
start_time = time.time()
# Train NER model, evaluate accuracy, and save to Google Drive
def train_ner_model(training_data, output_dir, n_iter=20):
    from sklearn.metrics import precision_recall_fscore_support
    import random
    import spacy
    from spacy.training import Example

    nlp = spacy.blank("en")
    if "ner" not in nlp.pipe_names:
        ner = nlp.add_pipe("ner")
    else:
        ner = nlp.get_pipe("ner")

    # Explicitly add all labels
    all_labels = {"NAME", "EMAIL", "PHONE", "EDUCATION", "SKILL", "EXPERIENCE"}
    for label in all_labels:
        ner.add_label(label)

    # Pre-validate and split training data
    valid_training_data = []
    for annotation in training_data:
        text = annotation["text"]
        entities = annotation["entities"]
        doc = nlp.make_doc(text)
        try:
            tags = spacy.training.offsets_to_biluo_tags(doc, entities)
            if '-' not in tags:
                valid_training_data.append(annotation)
            else:
                print(f"Pre-training skip - Misaligned entities in {text[:50]}...: {entities}")
        except ValueError as e:
            print(f"Pre-training skip - Invalid annotation in {text[:50]}...: {str(e)}")

    # Split into train (80%) and test (20%)
    random.shuffle(valid_training_data)
    split_idx = int(0.8 * len(valid_training_data))
    train_data = valid_training_data[:split_idx]
    test_data = valid_training_data[split_idx:]

    # Train the model
    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != "ner"]
    with nlp.disable_pipes(*other_pipes):
        optimizer = nlp.begin_training()
        for itn in range(n_iter):
            random.shuffle(train_data)
            losses = {}
            batches = minibatch(train_data, size=4)
            for batch in batches:
                for annotation in batch:
                    text = annotation["text"]
                    entities = annotation["entities"]
                    doc = nlp.make_doc(text)
                    try:
                        example = Example.from_dict(doc, {"entities": entities})
                        nlp.update([example], drop=0.5, sgd=optimizer, losses=losses)
                    except ValueError as e:
                        print(f"Training skip - {str(e)} - Text: {text[:50]}...")
                        continue
            print(f"Iteration {itn + 1}/{n_iter} - Losses: {losses}")

    # Evaluate NER accuracy on test set
    if test_data:
        print("\nEvaluating NER model on test set...")
        true_labels = []
        pred_labels = []
        for annotation in test_data:
            text = annotation["text"]
            true_entities = annotation["entities"]
            doc = nlp(text)
            pred_entities = [(ent.start_char, ent.end_char, ent.label_) for ent in doc.ents]

            # Convert entities to token-level labels for evaluation
            true_doc = nlp.make_doc(text)
            true_tags = spacy.training.offsets_to_biluo_tags(true_doc, true_entities)
            pred_tags = spacy.training.offsets_to_biluo_tags(true_doc, pred_entities)

            # Ensure same length by padding/truncating
            min_len = min(len(true_tags), len(pred_tags))
            true_tags = true_tags[:min_len]
            pred_tags = pred_tags[:min_len]

            true_labels.extend(true_tags)
            pred_labels.extend(pred_tags)

        # Compute precision, recall, F1-score
        labels = list(all_labels) + ["O"]  # Include 'O' for non-entity tokens
        precision, recall, f1, _ = precision_recall_fscore_support(
            true_labels, pred_labels, labels=labels, average='weighted', zero_division=0
        )
        print(f"NER Evaluation Metrics:")
        print(f"Precision: {precision:.4f}")
        print(f"Recall: {recall:.4f}")
        print(f"F1-Score: {f1:.4f}")
    else:
        print("\nNo test data available for evaluation.")

    # Save the model
    nlp.to_disk(output_dir)
    print(f"NER model saved to {output_dir}")

# Train and save the model if there’s valid training data
if training_data_for_training:
    print("\nTraining NER model...")
    train_ner_model(training_data_for_training, model_dir)
else:
    print("No valid training data to train the NER model.")
end_time = time.time()
print(f"Execution Time: {end_time - start_time:.2f} seconds")


Training NER model...
Iteration 1/20 - Losses: {'ner': 26019.699653806845}
Iteration 2/20 - Losses: {'ner': 8930.299377659629}
Iteration 3/20 - Losses: {'ner': 7270.909904453376}
Iteration 4/20 - Losses: {'ner': 6273.683361133638}
Iteration 5/20 - Losses: {'ner': 6354.819661814797}
Iteration 6/20 - Losses: {'ner': 5707.36282797439}
Iteration 7/20 - Losses: {'ner': 5153.757208349238}
Iteration 8/20 - Losses: {'ner': 5027.5008261614375}
Iteration 9/20 - Losses: {'ner': 4619.380232778036}
Iteration 10/20 - Losses: {'ner': 4459.544974720306}
Iteration 11/20 - Losses: {'ner': 4284.849675420296}
Iteration 12/20 - Losses: {'ner': 4243.864051743278}
Iteration 13/20 - Losses: {'ner': 4050.249686242746}
Iteration 14/20 - Losses: {'ner': 3764.5262589436775}
Iteration 15/20 - Losses: {'ner': 3926.709674333451}
Iteration 16/20 - Losses: {'ner': 3831.7902984828925}
Iteration 17/20 - Losses: {'ner': 3687.7630929053253}
Iteration 18/20 - Losses: {'ner': 3524.621042982214}
Iteration 19/20 - Losses: {'

In [16]:
start_time = time.time()
 # Preprocess text for matching
def preprocess_text(text):
    if not text:
        return ""
    text = str(text).lower()
    text = re.sub(r'[^a-z0-9\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    doc = nlp(text)
    return " ".join([token.lemma_ for token in doc if not token.is_stop])

# Extract keywords for matching
def extract_keywords(text):
    doc = nlp(text)
    keywords = [token.text for token in doc if token.pos_ in ["NOUN", "PROPN", "NUM"] or token.text in ["experience", "skill"]]
    for ent in doc.ents:
        if ent.label_ in ["ORG", "DATE", "NORP"]:
            keywords.append(ent.text)
    return list(set(keywords))

# Match resumes with job description
def match_resumes_with_job_description(resumes, job_title, job_description):
    print("\nJob Description Used for Matching:")
    print("=" * 50)
    print(f"{job_title}\n\n{job_description}")
    print("=" * 50)

    job_text = f"{job_title} {job_description}"
    processed_job = preprocess_text(job_text)
    job_keywords = extract_keywords(job_text)

    must_have = [job_title.lower()]
    exp_match = re.search(r'(\d+)\s*(years?|yrs?)\s*of\s*experience', job_description.lower())
    if exp_match:
        min_years = int(exp_match.group(1))
        must_have.append(f"{min_years} year")

    print(f"\nCritical Keywords: {', '.join(must_have)}")
    print(f"Additional Keywords: {', '.join([k for k in job_keywords if k.lower() not in must_have])}")

    vectorizer = TfidfVectorizer(ngram_range=(1, 2))
    corpus = [processed_job] + [preprocess_text(text) for text in resumes.values()]
    vectors = vectorizer.fit_transform(corpus)
    similarity_scores = cosine_similarity(vectors[0:1], vectors[1:]).flatten()
    resume_names = list(resumes.keys())

    scored_resumes = []
    for idx, (filename, score) in enumerate(zip(resume_names, similarity_scores)):
        resume_text = resumes[filename].lower()
        meets_criteria = all(kw in resume_text for kw in must_have)
        if "year" in must_have[-1]:
            years = re.findall(r'(\d+)\s*(years?|yrs?)\s*of\s*experience', resume_text)
            meets_criteria &= any(int(y) >= min_years for y, _ in years) if years else False
        if meets_criteria:
            adjusted_score = score * 1.5 if job_title.lower() in resume_text else score
            scored_resumes.append((filename, adjusted_score))

    scored_resumes.sort(key=lambda x: x[1], reverse=True)
    return scored_resumes[:10]
end_time = time.time()
print(f"Execution Time: {end_time - start_time:.2f} seconds")

Execution Time: 0.00 seconds


In [20]:
start_time = time.time()
# Main resume matching function
def run_resume_matching():
    print("\nResume Matching System")
    print("-" * 50)

    use_default = input("Use default job description? (y/n): ").lower().strip() == 'y'
    if use_default:
        job_title = "Python Developer"
        job_description = "3 years of experience with Python programming and software development."
    else:
        job_title = input("Enter job title: ")
        print("Enter job description (type 'END' on a new line when finished):")
        lines = []
        while True:
            line = input()
            if line.strip().upper() == 'END':
                break
            lines.append(line)
        job_description = "\n".join(lines)

    print("\nMatching resumes...")
    top_resumes = match_resumes_with_job_description(resumes, job_title, job_description)

    print("\nTop 10 Matching Resumes:")
    for rank, (filename, score) in enumerate(top_resumes, start=1):
        print(f"{rank}. {filename} - Similarity Score: {score:.4f}")

    while True:
        choice = input("\nEnter resume number to see details (1-10) or 'q' to quit: ")
        if choice.lower() == 'q':
            break
        try:
            rank = int(choice)
            if 1 <= rank <= len(top_resumes):
                filename, score = top_resumes[rank-1]
                print("\n" + "=" * 50)
                print(f"Details for: {filename}\nScore: {score:.4f}\nContent: {resumes[filename][:500]}...")
                print("=" * 50)
            else:
                print(f"Please enter a number between 1 and {len(top_resumes)}")
        except ValueError:
            print("Please enter a valid number or 'q'")

# Main execution
if __name__ == "__main__":
    run_resume_matching()
end_time = time.time()
print(f"Execution Time: {end_time - start_time:.2f} seconds")


Resume Matching System
--------------------------------------------------
Use default job description? (y/n): n
Enter job title: Data Analyst
Enter job description (type 'END' on a new line when finished):
Collected, cleaned, and analyzed data to generate business insights. Built dashboards and reports using SQL, Excel, and Tableau. Supported decision-making through trend analysis and data visualization. Collaborated with teams to deliver actionable insights.
END

Matching resumes...

Job Description Used for Matching:
Data Analyst

Collected, cleaned, and analyzed data to generate business insights. Built dashboards and reports using SQL, Excel, and Tableau. Supported decision-making through trend analysis and data visualization. Collaborated with teams to deliver actionable insights.

Critical Keywords: data analyst
Additional Keywords: reports, Tableau, making, decision, visualization, teams, data, Excel, trend, analysis, Collected, business, Analyst, dashboards, SQL, Data, insight