In [None]:
# Install necessary libraries
!pip install pandas scikit-learn flask openpyxl



**Uploading the dataset**

In [None]:
import pandas as pd
from google.colab import files

# Upload the dataset
uploaded = files.upload()

# Load the dataset
file_name = list(uploaded.keys())[0]
data = pd.read_excel(file_name)

# Display the first few rows of the dataset
data.head()

Saving updated_automation_risk_1.xlsx to updated_automation_risk_1.xlsx


Unnamed: 0,id,task,job_role,complexity,task_type,creativity,human_interaction,time_taken,frequency,skill_level,Associated_Skills,Tools,Technologies,risk
0,1,Optimize algorithms,Machine Learning Engineer,Low,Technical,Moderate,High,<1 hour,Monthly,Beginner,"TensorFlow, Node.js, Python, SQL, React",VS Code,Cloud Computing,Medium Risk
1,2,Conduct user research,Backend Developer,High,Managerial,High,,Full-day,Monthly,Advanced,"Git, TensorFlow, Node.js, React","MongoDB, Jira, Slack",Web Development,Medium Risk
2,3,Train and fine-tune models,Software Engineer,Low,Creative,High,Moderate,<1 hour,Daily,Beginner,"SQL, TensorFlow, Agile, Docker, JavaScript","Jira, MySQL","Cloud Computing, Big Data",Medium Risk
3,4,Automate infrastructure,Product Manager,Medium,Technical,High,Moderate,1-3 hours,Weekly,Intermediate,"Python, SQL","Tableau, MySQL, MongoDB",Cloud Computing,Medium Risk
4,5,Design wireframes and prototypes,Product Manager,Medium,Managerial,Moderate,,1-3 hours,Daily,Advanced,"Docker, JavaScript, TensorFlow, SQL","Tableau, MongoDB","AI, Web Development",Medium Risk


**Preprocess the data**

combining columns (task, Associated_Skills, Tools, Technologies) into a single text feature for text analysis.

In [None]:
# Combining columns into a single "Combined Features" column
data['Combined Features'] = (
    data['task'].fillna('') + ' ' +
    data['Associated_Skills'].fillna('') + ' ' +
    data['Tools'].fillna('') + ' ' +
    data['Technologies'].fillna('')
)

# Check the processed data
data[['job_role', 'Combined Features', 'risk']].head()

Unnamed: 0,job_role,Combined Features,risk
0,Machine Learning Engineer,"Optimize algorithms TensorFlow, Node.js, Pytho...",Medium Risk
1,Backend Developer,"Conduct user research Git, TensorFlow, Node.js...",Medium Risk
2,Software Engineer,"Train and fine-tune models SQL, TensorFlow, Ag...",Medium Risk
3,Product Manager,"Automate infrastructure Python, SQL Tableau, M...",Medium Risk
4,Product Manager,"Design wireframes and prototypes Docker, JavaS...",Medium Risk


**Vectorizing the task column**

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# TF-IDF Vectorization for "Combined Features" or "task" column
tfidf_task = TfidfVectorizer()
tfidf_matrix = tfidf_task.fit_transform(data['task'].fillna(''))  # Replace 'task' with the appropriate column name if needed

# Calculate cosine similarity
similarity_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Check the shape of the similarity matrix
print(f"Similarity Matrix Shape: {similarity_matrix.shape}")


Similarity Matrix Shape: (1000, 1000)


**Compute Cosine Similarity**

Use cosine similarity to calculate how similar each job role is to others.

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

# Calculate cosine similarity between job roles
similarity_matrix = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Check the shape of the similarity matrix
print(f"Similarity Matrix Shape: {similarity_matrix.shape}")

Similarity Matrix Shape: (1000, 1000)


**Build the Recommendation Function**

Creating a function to recommend job roles based on similarity and prioritize low-risk roles.

In [None]:
def recommend_jobs_by_multiple_tasks(tasks, num_recommendations=5):
    """
    Recommend jobs based on multiple tasks, prioritizing lower-risk roles.

    Parameters:
    - tasks (list of str): A list of input tasks to base recommendations on.
    - num_recommendations (int): The number of recommendations to return.

    Returns:
    - recommendations (list of tuples): A list of recommended job roles and similarity scores.
    """
    # Combine the input tasks into a single string
    combined_query = " ".join(tasks)

    # Transform the combined query into a TF-IDF vector
    query_vector = tfidf_task.transform([combined_query])

    # Calculate similarity between the query and dataset tasks
    similarity_scores = cosine_similarity(query_vector, tfidf_task_matrix).flatten()

    # Sort by similarity score in descending order
    sorted_tasks = sorted(enumerate(similarity_scores), key=lambda x: x[1], reverse=True)

    # Filter for "Low Risk" roles and exclude duplicate tasks
    low_risk_recommendations = [
        (data['job_role'][i], data['task'][i], similarity_scores[i])
        for i, score in sorted_tasks if data['risk'][i] == 'Low Risk'
    ]

    # Return the top N recommendations
    return low_risk_recommendations[:num_recommendations]


**Test the Recommendation System**

Test the function by providing a sample job title.

In [None]:
# Sample input: A list of tasks
input_tasks = [
    "Train and fine-tune models",
    "Optimize algorithms",
    "Automate infrastructure",
    "Build predictive models"
]

# Get recommendations
recommendations = recommend_jobs_by_multiple_tasks(input_tasks, num_recommendations=5)

# Display recommendations
print(f"Recommendations for tasks: {input_tasks}")
for role, task, score in recommendations:
    print(f"- {role} ({task}) [Similarity Score: {score:.2f}]")


Recommendations for tasks: ['Train and fine-tune models', 'Optimize algorithms', 'Automate infrastructure', 'Build predictive models']
- Data Scientist (Train and fine-tune models) [Similarity Score: 0.69]
- DevOps Engineer (Build predictive models) [Similarity Score: 0.58]
- Machine Learning Engineer (Optimize algorithms) [Similarity Score: 0.39]
- Machine Learning Engineer (Write and debug code) [Similarity Score: 0.06]
- Frontend Developer (Write and debug code) [Similarity Score: 0.06]
