In [19]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

# Expanded Job and Skills Data
job_data = {
    'Job': [
        'Software Developer', 'Data Scientist', 'Machine Learning Engineer', 'AI Specialist', 'Web Developer',
        'Frontend Developer', 'Backend Developer', 'DevOps Engineer', 'Database Administrator', 'Cybersecurity Analyst',
        'Cloud Architect', 'Mobile App Developer', 'Game Developer', 'System Analyst', 'Technical Support Engineer'
    ],
    'Skills': [
        'Python, Java, C++, Algorithms, Git, Data Structures',
        'Python, R, SQL, Statistics, Data Analysis, Pandas, NumPy',
        'Python, TensorFlow, PyTorch, Machine Learning, Scikit-learn, NumPy',
        'Artificial Intelligence, Machine Learning, Deep Learning, NLP, Computer Vision',
        'HTML, CSS, JavaScript, React, Node.js, Git, Responsive Design',
        'HTML, CSS, JavaScript, React, Redux, Bootstrap, UI/UX Design',
        'Java, Spring Boot, Node.js, Express.js, REST APIs, MongoDB',
        'AWS, Docker, Kubernetes, CI/CD, Linux, Jenkins',
        'SQL, MySQL, PostgreSQL, Database Design, Backup, Optimization',
        'Network Security, Ethical Hacking, Penetration Testing, Firewalls, Python, Encryption',
        'AWS, Azure, GCP, Cloud Infrastructure, DevOps, Microservices',
        'Java, Kotlin, Android Studio, Swift, iOS Development, React Native',
        'C++, Unity, Unreal Engine, Game Design, Graphics Programming',
        'Systems Analysis, Business Analysis, Documentation, UML, SQL',
        'Troubleshooting, Hardware Support, Networking, Windows, Linux, Communication'
    ]
}

# Convert to DataFrame
df = pd.DataFrame(job_data)

# TF-IDF Vectorizer with preprocessing
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(df['Skills'])

# Function to recommend top N jobs based on input skillset
def recommend_jobs(user_skills, top_n=5):
    user_vector = vectorizer.transform([user_skills])
    cosine_sim = cosine_similarity(user_vector, tfidf_matrix).flatten()
    
    top_indices = cosine_sim.argsort()[::-1][:top_n]
    
    recommendations = []
    for i in top_indices:
        job_title = df['Job'][i]
        score = round(cosine_sim[i] * 100, 2)
        recommendations.append((job_title, score))
    
    return recommendations

# Example user input - Skills from the user
user_input = 'Python, React, Node.js, AWS, Git, HTML, CSS'

# Get the top N recommended jobs
recommended_jobs = recommend_jobs(user_input, top_n=5)

print("Top Job Recommendations for Your Skillset:")
for idx, (job, score) in enumerate(recommended_jobs, 1):
    print(f"{idx}. {job} - Match Score: {score}%")





Top Job Recommendations for Your Skillset:
1. Web Developer - Match Score: 71.68%
2. Frontend Developer - Match Score: 32.8%
3. Backend Developer - Match Score: 29.64%
4. Software Developer - Match Score: 24.53%
5. DevOps Engineer - Match Score: 12.47%


In [15]:
# Define relevant jobs manually for a given skillset
user_skills = 'Python, React, Node.js, AWS, Git, HTML, CSS'
relevant_jobs = ['Web Developer', 'Frontend Developer', 'Backend Developer']  # Assume these are relevant jobs for the given skills

# Get the top N recommended jobs
recommended_jobs = recommend_jobs(user_skills, top_n=5)

# Check how many of the top N recommended jobs are relevant
top_recommended_jobs = [job for job, score in recommended_jobs]
relevant_count = len(set(top_recommended_jobs) & set(relevant_jobs))

# Precision at k (top_n = 5 in this case)
precision_at_k = relevant_count / 5
print(f"Precision at 5: {precision_at_k * 100:.2f}%")


Precision at 5: 60.00%


In [None]:
pip install pandas scikit-learn


Recommended Job for your skillset: Web Developer

Job Recommendations for Multiple Users:
User Skills: Python, JavaScript, React, Node.js, HTML, CSS
Recommended Jobs: Software Developer, Web Developer

User Skills: Python, R, SQL, Data Analysis
Recommended Jobs: Software Developer, Data Scientist

User Skills: TensorFlow, Machine Learning, NLP
Recommended Jobs: AI Specialist, Machine Learning Engineer

User Skills: Deep Learning, AI, Machine Learning
Recommended Jobs: Machine Learning Engineer, AI Specialist



In [None]:
# we can add in this 

ModuleNotFoundError: No module named 'bs4'

In [12]:
import requests

# Fetch all jobs
response = requests.get("https://remoteok.com/api")
jobs = response.json()[1:]  # Skip metadata

# Go through each job and extract all details
for job in jobs:
    company = job.get('company', 'N/A')
    position = job.get('position', 'N/A')
    location = job.get('location', '🌍 Remote / Not specified')
    salary = job.get('salary', '💸 Not listed')
    url = job.get('url', 'No URL')
    
    print(f"Company: {company}")
    print(f"Position: {position}")
    print(f"Location: {location}")
    print(f"Salary: {salary}")
    print(f"URL: {url}")
    print('-' * 40)


Company: Dapper Labs
Position: Staff Backend Engineer
Location: US / Canada, Remote
Salary: 💸 Not listed
URL: https://remoteOK.com/remote-jobs/remote-staff-backend-engineer-dapper-labs-1092983
----------------------------------------
Company: Serotonin
Position: Software Engineer
Location: 
Salary: 💸 Not listed
URL: https://remoteOK.com/remote-jobs/remote-software-engineer-serotonin-1092981
----------------------------------------
Company: Sei Labs
Position: Solidity Engineer Smart Contracts Engineer
Location: 
Salary: 💸 Not listed
URL: https://remoteOK.com/remote-jobs/remote-solidity-engineer-smart-contracts-engineer-sei-labs-1092978
----------------------------------------
Company: The Apache Software Foundation
Position: ASF Infrastructure Sysadmin
Location: 
Salary: 💸 Not listed
URL: https://remoteOK.com/remote-jobs/remote-asf-infrastructure-sysadmin-the-apache-software-foundation-1092977
----------------------------------------
Company: Winna
Position: Back End Developer Winna.com

In [3]:
# Job Matching System: Simple Console App

class JobRole:
    def __init__(self, title, required_skills):
        self.title = title
        self.required_skills = required_skills

    def match_skills(self, user_skills):
        # Find matched skills between user and job role
        matched_skills = set(user_skills) & set(self.required_skills)
        return matched_skills


# Define some job roles (with required skills)
job_roles = [
    JobRole("Software Engineer", ["Python", "Java", "SQL", "Machine Learning"]),
    JobRole("Data Scientist", ["Python", "Machine Learning", "Deep Learning", "SQL"]),
    JobRole("Web Developer", ["HTML", "CSS", "JavaScript", "React"]),
    JobRole("System Administrator", ["Linux", "Networking", "Security", "Cloud"]),
    JobRole("Project Manager", ["Leadership", "Communication", "Agile", "Scrum"]),
    JobRole("DevOps Engineer", ["Docker", "Kubernetes", "Cloud", "CI/CD"]),
    JobRole("Mobile Developer", ["Android", "Java", "Kotlin", "Swift"]),
]

def get_user_skills():
    """
    Get the skills of the user from console input.
    """
    print("Enter your skills separated by commas (e.g. Python, Java, SQL):")
    skills_input = input()
    # Split the input string by commas and remove extra spaces
    return [skill.strip() for skill in skills_input.split(",")]

def match_jobs(user_skills):
    """
    Match the user's skills with the available job roles and return the matching jobs.
    """
    matched_jobs = []
    for job in job_roles:
        matched_skills = job.match_skills(user_skills)
        if matched_skills:
            matched_jobs.append((job.title, matched_skills))
    return matched_jobs

def display_matching_jobs(matched_jobs):
    """
    Display the list of matched job roles along with the matched skills.
    """
    if matched_jobs:
        print("\nJob Matches Found for Your Skills:")
        for job_title, matched_skills in matched_jobs:
            print(f"- {job_title}: Skills Matched: {', '.join(matched_skills)}")
    else:
        print("\nNo job matches found based on your skills.")

def main():
    """
    Main function to run the Job Matching system.
    """
    user_skills = get_user_skills()  # Step 1: Get user skills
    matched_jobs = match_jobs(user_skills)  # Step 2: Find matching jobs
    display_matching_jobs(matched_jobs)  # Step 3: Display results

if __name__ == "__main__":
    main()  # Start the job matching system


Enter your skills separated by commas (e.g. Python, Java, SQL):

No job matches found based on your skills.


In [11]:
from faker import Faker
import pandas as pd
import random

# Create multiple locales
faker_indian = Faker('en_IN')
faker_foreign = Faker(['en_US', 'en_GB', 'en_CA', 'en_AU'])

# Skill pool
skills_pool = [
    'Python', 'Java', 'C++', 'JavaScript', 'HTML', 'CSS', 'SQL',
    'React', 'Node.js', 'Machine Learning', 'Data Science', 'AI', 'NLP',
    'TensorFlow', 'PyTorch', 'AWS', 'Docker', 'Kubernetes'
]

personality_types = ['INTJ', 'ENTP', 'INFJ', 'ESFP', 'ISTP', 'ENFP']
life_goals = ['High Income', 'Work-Life Balance', 'Global Travel', 'Innovation', 'Social Impact']

# Data generator
def generate_resume():
    # 80% Indian names, 20% foreign
    if random.random() < 0.8:
        name = faker_indian.name()
        location = faker_indian.city()
    else:
        name = faker_foreign.name()
        location = faker_foreign.city()
    
    experience = random.randint(0, 10)
    skills = ", ".join(random.sample(skills_pool, k=random.randint(3, 6)))
    personality = random.choice(personality_types)
    goal = random.choice(life_goals)

    return {
        "Name": name,
        "Experience (Years)": experience,
        "Skills": skills,
        "Personality Type": personality,
        "Life Goal": goal,
        "Preferred Location": location
    }

# Generate multiple entries
resumes = [generate_resume() for _ in range(100)]
df = pd.DataFrame(resumes)

# Save to CSV
df.to_csv("realistic_resume_data.csv", index=False)
print("✅ Data saved as 'realistic_resume_data.csv'")


✅ Data saved as 'realistic_resume_data.csv'


In [12]:
pip install faker pandas

Collecting faker
  Downloading faker-37.1.0-py3-none-any.whl.metadata (15 kB)
Downloading faker-37.1.0-py3-none-any.whl (1.9 MB)
   ---------------------------------------- 0.0/1.9 MB ? eta -:--:--
   ---------------------------------------- 1.9/1.9 MB 26.3 MB/s eta 0:00:00
Installing collected packages: faker
Successfully installed faker-37.1.0
Note: you may need to restart the kernel to use updated packages.


In [20]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

# Sample DataFrame (replace it with your actual dataset)
data = {
    'current_role': ['Junior Developer', 'Senior Developer', 'Junior Developer', 'Senior Developer', 'Tech Lead'],
    'skills': ['Python, Git', 'Python, System Design', 'Java, Git', 'Python, Leadership', 'Python, Leadership, System Design'],
    'experience': [2, 5, 1, 4, 7],
    'next_role': ['Senior Developer', 'Tech Lead', 'Senior Developer', 'Tech Lead', 'Engineering Manager']
}

df = pd.DataFrame(data)

# Combine all the unique values from both current_role and next_role using pd.concat
roles = pd.concat([df['current_role'], df['next_role']]).unique()

# Initialize LabelEncoder and fit on both current_role and next_role values
le = LabelEncoder()
le.fit(roles)

# Transform both current_role and next_role columns using the same encoder
df['current_role'] = le.transform(df['current_role'])
df['next_role'] = le.transform(df['next_role'])

# Convert 'skills' into a simple format (dummy encoding, you could use NLP here)
skills_list = list(set([skill for sublist in df['skills'].str.split(', ') for skill in sublist]))
for skill in skills_list:
    df[skill] = df['skills'].apply(lambda x: 1 if skill in x else 0)

# Prepare features and target
X = df.drop(columns=['next_role', 'skills'])
y = df['next_role']

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Prediction
y_pred = model.predict(X_test)
print(f'Accuracy: {accuracy_score(y_test, y_pred)}')

# Optimized function to predict career path for new input
def predict_career_path(current_role, experience, skills):
    # Handle edge cases first to prevent unnecessary computation
    if experience == 0 and not skills:
        return 'Junior Developer'  # Return default path for no experience and no skills

    # Encode current_role
    current_role_encoded = le.transform([current_role])[0]
    
    # Convert skills to binary features (dummy encoding)
    skills_encoded = [1 if skill in skills else 0 for skill in skills_list]
    
    # Prepare the input data for prediction in the same format as training data
    input_data = pd.DataFrame([[current_role_encoded, experience] + skills_encoded], columns=['current_role', 'experience'] + skills_list)
    
    # Align the new data with the training data columns
    input_data = input_data[X_train.columns]
    
    # Predict next role using the model
    predicted_role = model.predict(input_data)
    return le.inverse_transform(predicted_role)[0]

# Test the optimized function with zero experience and no skills
new_role = 'Tech Lead'
new_experience = 0
new_skills = []
predicted_path = predict_career_path(new_role, new_experience, new_skills)
print(f"Predicted Career Path: {predicted_path}")


Accuracy: 1.0
Predicted Career Path: Junior Developer


In [53]:
import pyttsx3
import speech_recognition as sr
import language_tool_python

# Initialize text-to-speech engine
engine = pyttsx3.init()

# Initialize language tool (for grammar checking)
tool = language_tool_python.LanguageTool('en-US')

# Function to speak text (feedback)
def speak(text):
    engine.say(text)
    engine.runAndWait()

# Function to listen to user's speech and convert it to text
def listen():
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print("Listening...")
        audio = recognizer.listen(source)
    
    try:
        print("Recognizing...")
        text = recognizer.recognize_google(audio)
        print(f"You said: {text}")
        return text
    except sr.UnknownValueError:
        print("Sorry, I could not understand the audio.")
        return None
    except sr.RequestError:
        print("Sorry, the speech service is down.")
        return None

# Function to check grammar
def check_grammar(text):
    matches = tool.check(text)
    if len(matches) > 0:
        corrections = []
        for match in matches:
            corrections.append(f"Suggestion: {match.replacement} for '{match.context}'")
        return corrections
    else:
        return ["Your grammar looks good!"]

# Function to simulate an interview
def interview():
    speak("Hello! Let's begin the interview. Please introduce yourself.")
    
    while True:
        user_input = listen()
        
        if user_input:
            # Check grammar in user's response
            grammar_feedback = check_grammar(user_input)
            
            # Provide feedback to the user
            if len(grammar_feedback) > 0:
                for feedback in grammar_feedback:
                    speak(feedback)
            else:
                speak("Great job, you did not make any mistakes!")
        
            # Ask the next question
            speak("Next question: Why do you want to work with our company?")
        
        # Exit condition (for simplicity, break if the user says "exit")
        if user_input and 'exit' in user_input.lower():
            speak("Thank you for the interview! Goodbye.")
            break

# Start the interview
if __name__ == "__main__":
    interview()


SystemError: Detected java 1.8. LanguageTool requires Java >= 17 for version latest.

In [3]:
import pandas as pd
import random
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the dataset
df = pd.read_csv('Questions.csv', encoding='ISO-8859-1')
df.dropna(subset=["Question", "Answer", "Category"], inplace=True)
df["Category"] = df["Category"].str.lower().str.strip()

# Function to evaluate the answer
def evaluate_answer(user_answer, correct_answer):
    vectorizer = TfidfVectorizer().fit_transform([user_answer, correct_answer])
    similarity = cosine_similarity(vectorizer[0:1], vectorizer[1:2])[0][0]
    score = round(similarity * 100)
    return score, similarity

# Main function to run the interview mentor
def run_interview_mentor():
    print("Welcome to the AI Interview Mentor!\n")
    user_name = input("Enter your name: ")
    print(f"\nHi {user_name}, let's get you interview-ready!")

    available_domains = df["Category"].unique()
    print("\nAvailable categories:", ", ".join(domain.title() for domain in available_domains))

    selected_domain = input("\nEnter your preferred category: ").lower().strip()
    if selected_domain not in available_domains:
        print("Sorry, we don't have questions for that category.")
        return

    domain_questions = df[df["Category"] == selected_domain]
    selected_qas = domain_questions.sample(n=min(5, len(domain_questions)))

    total_score = 0
    for i, row in enumerate(selected_qas.itertuples(), 1):
        print(f"\nQuestion {i}: {row.Question}")
        user_answer = input("Your answer: ")

        score, similarity = evaluate_answer(user_answer, row.Answer)
        total_score += score

        print("--- Feedback ---")
        print(f"Similarity: {similarity:.2f}, Score: {score}/100")
        if score < 30:
            print("Feedback: Your answer is quite different. Try to align more with key concepts.")
        elif score < 70:
            print("Feedback: Decent answer, but it misses some important details.")
        else:
            print("Feedback: Great answer! You captured the main points well.")

    average_score = total_score / len(selected_qas)
    print(f"\n{user_name}, your average interview score for '{selected_domain.title()}' is: {average_score:.2f}/100")

# Entry point of the program
if __name__ == "__main__":
    run_interview_mentor()


Welcome to the AI Interview Mentor!


Hi Varun, let's get you interview-ready!

Available categories: General Programming, General Program, Data Structures, Languages And Frameworks, Database And Sql, Web Development, Software Testing, Version Control, System Design, Security, Devops, Front-End, Back-End, Full-Stack, Algorithms, Machine Learning, Distributed Systems, Networking, Low-Level Systems, Database Systems, Data Engineering, Artificial Intelligence
Sorry, we don't have questions for that category.
