In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import joblib

# Load the data
df = pd.read_csv('team.csv', encoding='MacRoman')
print(df.head())

   code                                    Job Description        label
0   1176       Develop, test, and maintain web applications  Response B
1   1176  Collaborate with the team to design software s...  Response B
2   1176              Occasionally mentor junior developers  Response B
3   1186         Plan and implement communication campaigns  Response B
4   1186  Support team members in creating content and d...  Response B


In [2]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import joblib

# Preprocess the data
df = pd.read_csv('team.csv', encoding='MacRoman')

# Create TF-IDF vectorizer
tfidf = TfidfVectorizer(max_features=1000, stop_words='english')
X = tfidf.fit_transform(df['Job Description '].astype(str))
y = df['label']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Make predictions and evaluate
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Model Accuracy:", accuracy)
print("\
Classification Report:")
print(classification_report(y_test, y_pred))

# Save the model and vectorizer
joblib.dump(model, 'team_development_model.joblib')
joblib.dump(tfidf, 'team_development_vectorizer.joblib')

Model Accuracy: 0.8571428571428571
Classification Report:
              precision    recall  f1-score   support

  Response A       0.96      0.92      0.94        24
  Response B       0.72      0.93      0.81        14
  Response C       1.00      0.25      0.40         4

    accuracy                           0.86        42
   macro avg       0.89      0.70      0.72        42
weighted avg       0.88      0.86      0.84        42



['team_development_vectorizer.joblib']

In [3]:
# Load the saved model and vectorizer
import joblib

# Load the model and vectorizer
model = joblib.load('team_development_model.joblib')
vectorizer = joblib.load('team_development_vectorizer.joblib')

print("Model and vectorizer loaded successfully")

Model and vectorizer loaded successfully


In [4]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import joblib

# Load and preprocess the data
df = pd.read_csv('team.csv', encoding='MacRoman')

# Create three separate models for each question
tfidf = TfidfVectorizer(max_features=1000, stop_words='english')
X = tfidf.fit_transform(df['Job Description '].astype(str))

# Train three separate models for questions 43, 44, and 45
models = []
for i in range(3):
    model = LogisticRegression(max_iter=1000)
    model.fit(X, df['label'])
    models.append(model)

# Save models and vectorizer
joblib.dump(models, 'team_development_models.joblib')
joblib.dump(tfidf, 'team_development_vectorizer.joblib')

print("Models and vectorizer trained and saved successfully")

Models and vectorizer trained and saved successfully


In [6]:
# Load the saved models and vectorizer
models = joblib.load('team_development_models.joblib')
vectorizer = joblib.load('team_development_vectorizer.joblib')

# Job description to analyze
job_description = """The purpose of the role is to contribute to the delivery of taught modules and extra-curricular sessions for undergraduate, postgraduate, apprenticeship, and work-based programmes by providing lived experiences as a guest speaker, aiding curriculum development, and advising on opportunities for the University in West Yorkshire. Reporting to the Director of the Centre for Apprenticeships, Work-Based Learning, and Skills, the key responsibilities include developing and delivering workshops on important topics relevant to the West Yorkshire region, such as race, ethnicity, religion, and economic and social history. Additionally, the role involves contributing to extra-curricular activities and Equality & Diversity events hosted by the University or its partners, supporting recruitment activities, and maintaining links with external organizations to support these contributions. The individual will also advise on expanding the Ambassador network, identifying gaps in provision, and enhancing the student experience through further expertise. Furthermore, they will set up additional activities to raise the University's external profile. General duties include ensuring data use complies with regulations, particularly GDPR, adhering to health, safety, and wellbeing policies, promoting safeguarding and protection of others, applying the University's Equality, Diversity, and Inclusion Policy, and performing other duties as directed by their line manager."""

# Transform the job description using the vectorizer
job_description_vectorized = vectorizer.transform([job_description])

# Predict labels for each question
predictions = [model.predict(job_description_vectorized)[0] for model in models]

# Map numerical labels to A, B, C responses
label_mapping = {
    'Response A': 'A (Team development is frequently used)',
    'Response B': 'B (Team development is used occasionally)',
    'Response C': 'C (Team development is not relevant)'
}

# Print predictions
print("Predictions for Team Development Questions:")
for i, prediction in enumerate(predictions, start=43):
    print(f"Question {i}: {label_mapping[prediction]}")

Predictions for Team Development Questions:
Question 43: A (Team development is frequently used)
Question 44: A (Team development is frequently used)
Question 45: A (Team development is frequently used)


In [7]:
# Load the saved models and vectorizer
models = joblib.load('team_development_models.joblib')
vectorizer = joblib.load('team_development_vectorizer.joblib')

# Job description to analyze
job_description = """The Web Services Manager, part of the Marketing, Communications, and Recruitment Department (Grade 8a), works 35 hours per week, typically from 9:00 am to 5:00 pm, Monday to Friday, with a one-hour lunch break. They are responsible to the Head of Marketing and Recruitment and oversee the Web Developer. The role involves managing the technical infrastructure and framework for Web Services, including the externally facing website and the Web Content Management System (WCMS), to ensure a seamless user experience that aligns with institutional objectives. Key duties include managing the institutional CMS, developing responsive style templates, integrating the CMS with corporate systems, and overseeing the deployment of bespoke code like HTML, CSS, and JavaScript for high-performing, SEO-optimized, user-friendly, and secure websites. Additionally, the manager leads the development of the digital governance framework, including maintaining the University's Pattern Library, and oversees the development of web tools that enhance user experience. They act as the main point of contact for web-related technologies, plan future web services developments, generate reports for leadership, and ensure the effective implementation of the University’s Web Strategy. Other responsibilities include user testing, using analytics to enhance functionality, managing the web budget, documenting code and processes, collaborating with Marketing, Recruitment, and Communications team members, liaising with external suppliers, and ensuring the upkeep of associated systems like the Search Tool and Asset Management Library. The Web Services Manager also supports digital transformation by promoting new technologies and trends, manages web support duties with Digital Team members, ensures secure system hosting in collaboration with IT services, and trains staff on CMS usage. Leadership responsibilities include managing the Web Developer, fostering teamwork, and driving transformational digital change. The role also involves compliance with GDPR, health and safety regulations, and the University's Equality, Diversity, and Inclusion Policy, with flexibility to undertake additional duties as required. The job description is subject to change to accommodate institutional developments, last updated in July 2021"""

# Transform the job description using the vectorizer
job_description_vectorized = vectorizer.transform([job_description])

# Predict labels for each question
predictions = [model.
               predict(job_description_vectorized)[0] for model in models]

# Map numerical labels to A, B, C responses
label_mapping = {
    'Response A': 'A (Team development is frequently used)',
    'Response B': 'B (Team development is used occasionally)',
    'Response C': 'C (Team development is not relevant)'
}

# Print predictions
print("Predictions for Team Development Questions:")
for i, prediction in enumerate(predictions, start=43):
    print(f"Question {i}: {label_mapping[prediction]}")

Predictions for Team Development Questions:
Question 43: A (Team development is frequently used)
Question 44: A (Team development is frequently used)
Question 45: A (Team development is frequently used)


In [8]:
# Load the saved models and vectorizer
models = joblib.load('team_development_models.joblib')
vectorizer = joblib.load('team_development_vectorizer.joblib')

# Job description to analyze
job_description = """The purpose of the role is to create, implement, and manage sustainable solutions for the University's needs across all campus locations by evaluating the environmental impact of day-to-day activities and developments. The Sustainability Manager is responsible for supporting the management, monitoring, and reporting of the institution’s efforts to reduce its carbon footprint across scopes 1, 2, and 3 emissions, managing waste, recycling, and construction activities, and sourcing sustainable energy. The role also includes implementing new green technologies to achieve a net zero carbon status and minimizing the environmental impact of daily operations. Reporting to the Estates Project Development Manager, key responsibilities include sourcing solutions to reduce the University's environmental impact in areas such as materials, waste, energy, and water management, and ensuring compliance with environmental legislation. The role involves conducting ecology surveys, performing environmental impact assessments, and completing mandatory assessments like BREEAM. The manager will collect and analyze data to produce reports on the University's current status and recommend strategies for improvement, including cost-benefit analyses and timelines for implementation. They will advise on compliance with environmental law and suggest cost-effective technologies to lower energy emissions and waste. The role requires staying updated on research and legislation through seminars, publications, and social media. The Sustainability Manager will manage sustainability projects, prepare project bid documents, and ensure compliance with University policies. They will also oversee sustainability initiatives, ensuring the effective use of resources, compliance with legal requirements for waste management, and providing sustainability guidance on construction projects. The role includes maintaining the University’s sustainability and environmental policies and contributing to the management of carbon footprint reduction. The manager will work closely with the Head of Estates and Facilities and the Head of Commercial Enterprises to ensure sustainable and environmental responsibilities are managed effectively. General duties include ensuring compliance with GDPR, adhering to health, safety, and wellbeing policies, promoting safeguarding, applying the University’s Equality, Diversity, and Inclusion Policy, and performing other duties as directed by the line manager. This job description is subject to variation by the Vice-Chancellor to reflect institutional developments."""

# Transform the job description using the vectorizer
job_description_vectorized = vectorizer.transform([job_description])

# Predict labels for each question
predictions = [model.predict(job_description_vectorized)[0] for model in models]

# Map numerical labels to A, B, C responses
label_mapping = {
    'Response A': 'A (Team development is frequently used)',
    'Response B': 'B (Team development is used occasionally)',
    'Response C': 'C (Team development is not relevant)'
}

# Print predictions
print("Predictions for Team Development Questions:")
for i, prediction in enumerate(predictions, start=43):
    print(f"Question {i}: {label_mapping[prediction]}")

Predictions for Team Development Questions:
Question 43: B (Team development is used occasionally)
Question 44: B (Team development is used occasionally)
Question 45: B (Team development is used occasionally)
