In [16]:
#project: job recommendation system
#using this system we can reccommend the jobs based on their skills and qualification.

1️⃣ Data Processing
The dataset is uploaded manually via Google Colab.
Missing values are handled, and job-related text fields (location, job type) are cleaned for consistency.

In [17]:
#Job Recommendation System Overview
#his system helps users find job recommendations based on their skills, preferred job location, and job type. It uses TF-IDF vectorization and cosine similarity to match user skills with job descriptions in the dataset. The system is deployed using Flask for API handling, ngrok for public exposure, and Streamlit for a user-friendly web interface.

In [1]:
# Import required libraries
import pandas as pd

# Load dataset
from google.colab import files

uploaded = files.upload()  # Upload the dataset manually when prompted
file_name = list(uploaded.keys())[0]  # Get the uploaded filename

df = pd.read_csv(file_name)

# Display dataset info
print("Dataset Information:")
print(df.info())

# Display first few rows
print("\nSample Data:")
print(df.head())

# Check for missing values
print("\nMissing Values Count:")
print(df.isnull().sum())


Saving india_job_market_dataset.csv to india_job_market_dataset.csv
Dataset Information:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000 entries, 0 to 19999
Data columns (total 15 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   Job ID                 20000 non-null  object
 1   Job Title              20000 non-null  object
 2   Company Name           20000 non-null  object
 3   Job Location           20000 non-null  object
 4   Job Type               20000 non-null  object
 5   Salary Range           20000 non-null  object
 6   Experience Required    20000 non-null  object
 7   Posted Date            20000 non-null  object
 8   Application Deadline   20000 non-null  object
 9   Job Portal             20000 non-null  object
 10  Number of Applicants   20000 non-null  int64 
 11  Education Requirement  20000 non-null  object
 12  Skills Required        20000 non-null  object
 13  Remote/Onsite          20000 non

In [2]:
# Fill missing values with empty string
df.fillna("", inplace=True)

# ✅ Ensure location & job type are clean
df["Job Location"] = df["Job Location"].astype(str).str.strip().str.lower()
df["Job Type"] = df["Job Type"].astype(str).str.strip().str.lower()

# Display processed dataset
print("Processed Dataset Sample:")
df.head()


Processed Dataset Sample:


Unnamed: 0,Job ID,Job Title,Company Name,Job Location,Job Type,Salary Range,Experience Required,Posted Date,Application Deadline,Job Portal,Number of Applicants,Education Requirement,Skills Required,Remote/Onsite,Company Size
0,JOB1,Software Engineer,Amazon,ahmedabad,full-time,5-8 LPA,2-5 years,2025-01-16,2025-01-25,LinkedIn,23,PhD,"C++, SQL, Python",Remote,Small (1-50)
1,JOB2,Marketing Executive,Infosys,ahmedabad,internship,5-8 LPA,2-5 years,2024-12-25,2025-01-19,Indeed,462,MBA,"SQL, C++, Python",Remote,Large (500+)
2,JOB3,Financial Analyst,Deloitte,jaipur,contract,20+ LPA,5-10 years,2025-01-22,2025-01-29,Naukri.com,430,M.Tech,"Machine Learning, Excel, React",Remote,Large (500+)
3,JOB4,Business Analyst,Amazon,delhi,full-time,20+ LPA,2-5 years,2025-01-07,2025-02-06,LinkedIn,387,B.Tech,"Machine Learning, Python, SQL",Hybrid,Small (1-50)
4,JOB5,Software Engineer,Infosys,delhi,full-time,12-20 LPA,10+ years,2024-12-26,2025-01-08,Indeed,199,MBA,"UI/UX, C++, Java",Onsite,Small (1-50)


2️⃣ Job Recommendation Logic

TF-IDF Vectorization is applied to the "Skills Required" column.

Cosine Similarity is used to compare the user's input skills with job descriptions.

Jobs are filtered based on preferred location and job type if specified.

In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


# Apply TF-IDF Vectorization on skills
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df["Skills Required"])

# Compute cosine similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# ✅ Function to recommend jobs based on user preferences
def recommend_jobs_for_user(user_skills, preferred_location="", preferred_job_type="", top_n=5):
    user_skills_tfidf = tfidf.transform([user_skills])
    user_similarity_scores = cosine_similarity(user_skills_tfidf, tfidf_matrix).flatten()

    filtered_df = df.copy()
    filtered_df["Similarity_Score"] = user_similarity_scores

    print(f"Total jobs before filtering: {len(filtered_df)}")

    # ✅ Apply location filter
    if preferred_location:
        preferred_location = preferred_location.strip().lower()
        filtered_df = filtered_df[filtered_df["Job Location"].str.contains(preferred_location, na=False)]
        print(f"Jobs after location filter ({preferred_location}): {len(filtered_df)}")

    # ✅ Apply job type filter
    if preferred_job_type:
        preferred_job_type = preferred_job_type.strip().lower()
        filtered_df = filtered_df[filtered_df["Job Type"].str.contains(preferred_job_type, na=False)]
        print(f"Jobs after job type filter ({preferred_job_type}): {len(filtered_df)}")

    if filtered_df.empty:
        print("⚠ No jobs found matching the criteria!")

    # Sort and return top N recommendations
    recommended_jobs = filtered_df.sort_values(by="Similarity_Score", ascending=False).head(top_n)

    return recommended_jobs[["Job Title", "Company Name", "Job Location", "Skills Required", "Job Type"]]

# ✅ Example Test
recommended_jobs = recommend_jobs_for_user("Python, SQL", "delhi", "full-time")
print(recommended_jobs)

Total jobs before filtering: 20000
Jobs after location filter (delhi): 1990
Jobs after job type filter (full-time): 500
                 Job Title Company Name Job Location   Skills Required  \
4062      Graphic Designer          HCL        delhi  Python, C++, SQL   
9373      Graphic Designer      Infosys        delhi  C++, Python, SQL   
168        Product Manager          TCS        delhi  SQL, C++, Python   
17615  Marketing Executive    Accenture        delhi  C++, Python, SQL   
9027     Software Engineer        Wipro        delhi  SQL, C++, Python   

        Job Type  
4062   full-time  
9373   full-time  
168    full-time  
17615  full-time  
9027   full-time  


3️⃣ Flask API for Job Recommendation (app.py)

The Flask API exposes an endpoint (/recommend) to receive user inputs and return job recommendations in JSON format.

The API filters jobs based on skills, location, and job type.

In [4]:
with open("app.py", "w") as f:
    f.write("""\
from flask import Flask, request, jsonify
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load dataset
df = pd.read_csv("india_job_market_dataset.csv")
df.fillna({"Skills Required": ""}, inplace=True)

# Ensure text consistency
df["Job Location"] = df["Job Location"].astype(str).str.strip().str.lower()
df["Job Type"] = df["Job Type"].astype(str).str.strip().str.lower()

# Apply TF-IDF Vectorization on skills
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df["Skills Required"])

# Compute cosine similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Flask app
app = Flask(__name__)

# ✅ Function for recommending jobs
def recommend_jobs_for_user(user_skills, preferred_location="", preferred_job_type="", top_n=5):
    user_skills_tfidf = tfidf.transform([user_skills])
    user_similarity_scores = cosine_similarity(user_skills_tfidf, tfidf_matrix).flatten()

    filtered_df = df.copy()
    filtered_df["Similarity_Score"] = user_similarity_scores

    # Apply location filter
    if preferred_location:
        preferred_location = preferred_location.strip().lower()
        filtered_df = filtered_df[filtered_df["Job Location"].str.contains(preferred_location, na=False)]

    # Apply job type filter
    if preferred_job_type:
        preferred_job_type = preferred_job_type.strip().lower()
        filtered_df = filtered_df[filtered_df["Job Type"].str.contains(preferred_job_type, na=False)]

    # Sort and return top recommendations
    recommended_jobs = filtered_df.sort_values(by="Similarity_Score", ascending=False).head(top_n)

    return recommended_jobs[["Job Title", "Company Name", "Job Location", "Skills Required", "Job Type"]]

# ✅ API Route for Job Recommendations
@app.route("/recommend", methods=["POST"])
def recommend_jobs():
    data = request.json
    user_skills = data.get("skills", "")
    preferred_location = data.get("location", "")
    preferred_job_type = data.get("job_type", "")

    recommended_jobs = recommend_jobs_for_user(user_skills, preferred_location, preferred_job_type)
    return jsonify(recommended_jobs.to_dict(orient="records"))

if __name__ == "__main__":
    app.run(port=5000)

""")


In [5]:
!nohup python app.py &


nohup: appending output to 'nohup.out'


4️⃣ ngrok for Public API Exposure

ngrok is used to make the Flask API publicly accessible.

The API URL is printed after running ngrok.

In [6]:
!pip install flask pyngrok


Collecting pyngrok
  Downloading pyngrok-7.2.3-py3-none-any.whl.metadata (8.7 kB)
Downloading pyngrok-7.2.3-py3-none-any.whl (23 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.3


In [7]:
!ngrok authtoken 2u5PFYKR21zBgEabsuZjuI3CAK1_24BbdskngMkvNg7LPRMBo

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [8]:
from pyngrok import ngrok

# Open a tunnel on port 5000 (where Flask is running)
public_url = ngrok.connect(5000)

# Print the public URL
print(f"Public API URL: {public_url}")


Public API URL: NgrokTunnel: "https://54b2-34-19-119-8.ngrok-free.app" -> "http://localhost:5000"


In [10]:
!curl -X POST "http://54b2-34-19-119-8.ngrok-free.app/recommend" -H "Content-Type: application/json" -d '{"skills": "Python, SQL, Machine Learning"}'


5️⃣ Streamlit Web Interface (streamlit_app.py)

Streamlit provides a user-friendly interface for job searching.

Users enter their skills, location, and job type and get recommendations.

The interface fetches results from the Flask API via ngrok.


In [13]:
with open("streamlit_app.py", "w") as f:
    f.write("""\
import streamlit as st
import requests

st.title("🔍 Job Recommendation System")

# Get user inputs
user_skills = st.text_area("Enter your skills (comma-separated)", "")
preferred_location = st.text_input("Enter preferred job location (optional)")
preferred_job_type = st.selectbox("Select job type (optional)", ["", "full-time", "part-time", "remote", "internship","contract"])

if st.button("Get Job Recommendations"):
    if user_skills:
        API_URL = "https://54b2-34-19-119-8.ngrok-free.app/recommend"  # Update this with your actual ngrok URL

        response = requests.post(API_URL, json={
            "skills": user_skills,
            "location": preferred_location,
            "job_type": preferred_job_type
        })

        if response.status_code == 200:
            job_recommendations = response.json()
            if job_recommendations:
                st.write("### Recommended Jobs:")
                for job in job_recommendations:
                    st.write(f"**{job['Job Title']}** at {job['Company Name']} ({job['Job Location']})")
                    st.write(f"*Required Skills:* {job['Skills Required']}")
                    st.write(f"📌 Job Type: {job.get('Job Type', 'Not Specified')}")
                    st.write("---")
            else:
                st.warning("No matching jobs found.")
        else:
            st.error("Error fetching recommendations. Please try again!")
    else:
        st.warning("Please enter at least one skill!")

""")


In [14]:
!pip install streamlit pyngrok
!nohup streamlit run streamlit_app.py &


Collecting streamlit
  Downloading streamlit-1.43.1-py2.py3-none-any.whl.metadata (8.9 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.43.1-py2.py3-none-any.whl (9.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.7/9.7 MB[0m [31m81.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m92.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[

In [15]:
from pyngrok import ngrok

# Start ngrok tunnel for Streamlit (port 8501)
streamlit_url = ngrok.connect(8501)
print(f"Public Streamlit URL: {streamlit_url}")


Public Streamlit URL: NgrokTunnel: "https://f839-34-19-119-8.ngrok-free.app" -> "http://localhost:8501"


if you get error while running the tunnels or streamlit or flask  then you can use below code to correct the errors

In [18]:
# i go through the link and enter skills but i gives error as "Error fetching recommendations. Please try again!" so the below code is for checking the error.

In [19]:
!ps aux | grep python #running processes


root          68  0.8  0.0      0     0 ?        Z    01:08   0:20 [python3] <defunct>
root          69  0.0  0.3  63772 50036 ?        S    01:08   0:00 python3 /usr/local/bin/colab-file
root          90  0.2  0.9 461256 127908 ?       Sl   01:08   0:06 /usr/bin/python3 /usr/local/bin/j
root         909  1.1 25.4 4635624 3383752 ?     Ssl  01:11   0:25 /usr/bin/python3 -m colab_kernel_
root         943  0.2  0.1 545760 21508 ?        Sl   01:11   0:06 /usr/bin/python3 /usr/local/lib/p
root        2091  0.6 24.9 3731680 3318240 ?     Sl   01:16   0:12 python3 app.py
root        3103  0.2  0.4 366108 61580 ?        Sl   01:20   0:03 /usr/bin/python3 /usr/local/bin/s
root        9667  0.0  0.0   7376  3440 ?        S    01:47   0:00 /bin/bash -c ps aux | grep python
root        9669  0.0  0.0   6484  2248 ?        S    01:47   0:00 grep python


In [20]:
from pyngrok import ngrok
print(ngrok.get_tunnels())


[<NgrokTunnel: "https://54b2-34-19-119-8.ngrok-free.app" -> "http://localhost:5000">, <NgrokTunnel: "https://f839-34-19-119-8.ngrok-free.app" -> "http://localhost:8501">]


In [22]:
import requests

API_URL = "https://54b2-34-19-119-8.ngrok-free.app/recommend"

response = requests.post(API_URL, json={"skills": "C++, SQL, Python"})
print(response.status_code)
print(response.json())


200
[{'Company Name': 'Amazon', 'Job Location': 'ahmedabad', 'Job Title': 'Software Engineer', 'Job Type': 'full-time', 'Skills Required': 'C++, SQL, Python'}, {'Company Name': 'Google', 'Job Location': 'mumbai', 'Job Title': 'Sales Representative', 'Job Type': 'part-time', 'Skills Required': 'C++, SQL, Python'}, {'Company Name': 'Amazon', 'Job Location': 'pune', 'Job Title': 'Financial Analyst', 'Job Type': 'internship', 'Skills Required': 'SQL, C++, Python'}, {'Company Name': 'Amazon', 'Job Location': 'hyderabad', 'Job Title': 'Software Engineer', 'Job Type': 'contract', 'Skills Required': 'SQL, Python, C++'}, {'Company Name': 'Microsoft', 'Job Location': 'noida', 'Job Title': 'Data Scientist', 'Job Type': 'full-time', 'Skills Required': 'C++, Python, SQL'}]


In [32]:
#to kill multiple tunnels running

In [2]:
!pgrep -af ngrok


In [63]:
!kill 55352  #!pkill -f ngrok (to kill all ngrok processes)

