In [2]:
import csv
import json

# Load from CSV and convert to JSON structure
def convert_csv_to_json(csv_file, json_file):
    with open(csv_file, mode='r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        jobs = []

        for row in reader:
            job = {
                "title": row["Job Title"].strip(),
                "description": row["description"].strip() if "description" in row else "",
                "skills": [skill.strip().lower() for skill in row["Key Skills"].split(",")]
            }
            jobs.append(job)

    # Save to JSON file (optional)
    with open(json_file, mode='w', encoding='utf-8') as f:
        json.dump(jobs, f, indent=4)

    return jobs

# Example usage
csv_file_path = 'jobss_cleaned.csv'
json_file_path = 'final_jobs.json'
result = convert_csv_to_json(csv_file_path, json_file_path)

# Print result
print(json.dumps(result, indent=4))


[
    {
        "title": "Digital Media Planner",
        "description": "",
        "skills": [
            "media planning| digital media"
        ]
    },
    {
        "title": "Online Bidding Executive",
        "description": "",
        "skills": [
            "pre sales| closing| software knowledge| clients| requirements| negotiating| client| online bidding| good communication| technology"
        ]
    },
    {
        "title": "Trainee Research/ Research Executive- Hi- Tech Operations",
        "description": "",
        "skills": [
            "computer science| fabrication| quality check| intellectual property| electronics| support services| research| management| human resource management| research executive"
        ]
    },
    {
        "title": "Technical Support",
        "description": "",
        "skills": [
            "technical support"
        ]
    },
    {
        "title": "Software Test Engineer -hyderabad",
        "description": "",
        "skills": [
     

In [2]:
import csv
import json

# Path to your CSV file
csv_path = 'job_details.csv'
output_json_path = 'skills_new.json'

unique_skills = set()

with open(csv_path, newline='', encoding='utf-8') as csvfile:
    reader = csv.DictReader(csvfile)
    
    for row in reader:
        raw_skills = row.get('Skills Required', '')
        if raw_skills:
            # Split using | delimiter
            skills_list = raw_skills.lower().split(',')
            for skill in skills_list:
                cleaned_skill = skill.strip()
                if cleaned_skill:
                    unique_skills.add(cleaned_skill)

# Convert set to sorted list
sorted_skills = sorted(unique_skills)

# Write to JSON
with open(output_json_path, 'w', encoding='utf-8') as jsonfile:
    json.dump(sorted_skills, jsonfile, indent=4)

print(f"Extracted {len(sorted_skills)} unique skills to {output_json_path}")


Extracted 793 unique skills to skills_new.json


In [5]:
import json

# Load jobs.json
with open('jobs.json', 'r', encoding='utf-8') as f:
    job_data = json.load(f)

# List of non-skills to exclude (e.g., perks)
exclude_phrases = {
    "free snacks", "free snacks & beverages", "informal dress code",
    "5 days a week", "health insurance", "certificate",
    "letter of recommendation", "job offer", "cab/transportation facility"
}

unique_skills = set()

# Go through each job
for job in job_data:
    skills_field = job.get("skills", [])
    
    # If it's a string, split by comma
    if isinstance(skills_field, str):
        skills = skills_field.split(',')
    elif isinstance(skills_field, list):
        skills = []
        for s in skills_field:
            if isinstance(s, str):
                skills.extend(s.split(','))  # In case it's still comma-separated
    else:
        continue  # Skip if not a list or string

    for skill in skills:
        cleaned = skill.strip().lower()
        if cleaned and cleaned not in exclude_phrases:
            unique_skills.add(cleaned)

# Convert to sorted list
sorted_skills = sorted(unique_skills)

# Write to skills.json
with open('skills_3.json', 'w', encoding='utf-8') as out_file:
    json.dump(sorted_skills, out_file, indent=4)

print(f"✅ Extracted {len(sorted_skills)} unique skills to skills.json")


✅ Extracted 860 unique skills to skills.json


In [6]:
import json
import re

# Load jobs.json
with open('jobs.json', 'r', encoding='utf-8') as f:
    job_data = json.load(f)

unique_skills = set()

# Optional: Add known perks to exclude
exclude_phrases = {
    "free snacks", "free snacks & beverages", "informal dress code",
    "5 days a week", "health insurance", "certificate",
    "letter of recommendation", "job offer", "cab/transportation facility"
}

for job in job_data:
    skills_field = job.get("skills", [])
    extracted = []

    if isinstance(skills_field, list):
        for entry in skills_field:
            if isinstance(entry, str):
                # Split by comma, pipe, or newline
                split_skills = re.split(r'[,\n|]', entry)
                extracted.extend(split_skills)
    elif isinstance(skills_field, str):
        extracted = re.split(r'[,\n|]', skills_field)

    # Clean and filter
    for skill in extracted:
        skill = skill.strip().lower()
        if skill and skill not in exclude_phrases:
            unique_skills.add(skill)

# Write unique, sorted skills to skills.json
sorted_skills = sorted(unique_skills)

with open('skills_4.json', 'w', encoding='utf-8') as out_file:
    json.dump(sorted_skills, out_file, indent=4)

print(f"✅ Extracted {len(sorted_skills)} unique cleaned skills.")


✅ Extracted 860 unique cleaned skills.


In [3]:
import csv
import json

csv_file_path = 'job_details.csv'
json_file_path = 'job_details.json'

output_data = []

with open(csv_file_path, mode='r', encoding='utf-8') as csv_file:
    reader = csv.DictReader(csv_file)
    for row in reader:
        job = {
            "title": row["Job Title"],
            "skills": [skill.strip().lower() for skill in row["Skills Required"].split('|')]
        }
        output_data.append(job)

with open(json_file_path, mode='w', encoding='utf-8') as json_file:
    json.dump(output_data, json_file, indent=4)

print(f"JSON file created at: {json_file_path}")


JSON file created at: job_details.json


[
    {
        "title": "Data Scientist",
        "description": "Work with data to develop machine learning models.",
        "skills": ["python", "machine learning", "deep learning", "tensorflow", "sql"]
    },
    {
        "title": "Frontend Developer",
        "description": "Develop and maintain user interfaces for web applications.",
        "skills": ["html", "css", "javascript", "react"]
    },
    {
        "title": "Backend Developer",
        "description": "Develop server-side applications and APIs.",
        "skills": ["python", "java", "nodejs", "aws", "sql"]
    },
    {
        "title": "Full Stack Developer",
        "description": "Work on both frontend and backend development.",
        "skills": ["python", "javascript", "react", "nodejs", "docker"]
    }
]


In [2]:
import csv

csv_file_path = 'job_details.csv'

with open(csv_file_path, mode='r', encoding='utf-8') as csv_file:
    reader = csv.reader(csv_file)
    headers = next(reader)
    print("CSV headers:", headers)


CSV headers: ['Job Title', 'Company Name', 'Location', 'Start Date', 'CTC (Annual)', 'Experience', 'Apply By', 'Skills Required', 'Perks']
