In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import os  

learning_outcomes = [
    "Graduates are familiar with the most relevant theories, methods and techniques in the domain of Software Engineering.",
    "Graduates have the necessary background knowledge to familiarise themselves with novel methods and techniques for life-long learning.",
    "Graduates can successfully apply theory in practice in order to find innovative solutions for both general and domain-specific software engineering problems.",
    "Graduates can make valuable contributions to complex software engineering projects through the independent and critical application of academic knowledge and skills.",
    "Graduates have sufficient technical understanding and intellectual capacity to play, after some years of practical experience, a managerial or advisory role in software engineering.",
    "Graduates can clearly report their findings, both in oral and in written form, and can explain problems at an audience-focused level of abstraction.",
    "Graduates have research skills at the academic level and are capable to autonomously perform research in the domain of software engineering.",
    "Graduates understand why user needs are difficult to express, capture and understand and graduates are familiar with best practices in requirements engineering as well as their shortcomings.",
    "Graduates are able to produce formal specifications of modest-sized samples of software and to use them for the generation of meaningful tests; they understand the essential concepts of software verification.",
    "Graduates master the methods and techniques for analysing existing software systems and their evolution in the context of changing requirements.",
    "Graduates are familiar with the characteristics of software for embedded systems and know how to accommodate these characteristics in the software design and development phases.",
    "Graduates understand why big software projects are prone to failure and are familiar with software engineering process models, their situation-awareness and their general shortcomings.",
    "Graduates are familiar with the concept of DevOps and their benefits for organisational IT infrastructure and services management; they understand how to build cloud-based applications and how to use cloud automation tools across a wide range of application scenarios."
]

df = pd.read_csv('data/courses.csv')

# Initialize the SentenceTransformer model
model = SentenceTransformer('all-mpnet-base-v2')

# Extract the course columns (ignoring the Year column)
course_columns = df.columns[1:14] 

# Combine content for each course by aggregating all rows
course_contents = {}
for course in course_columns:
    combined_content = df[course].dropna().str.cat(sep=' ')
    course_contents[course] = combined_content

# Convert the combined contents into a DataFrame
courses_df = pd.DataFrame(list(course_contents.items()), columns=['Course', 'Content'])

# Generate embeddings for all course contents
course_embeddings = model.encode(courses_df['Content'].tolist())

# Generate embeddings for learning outcomes
outcome_embeddings = model.encode(learning_outcomes)

# Calculate similarity scores between each course and each learning outcome
similarity_scores = cosine_similarity(course_embeddings, outcome_embeddings)

# Convert to DataFrame for better visualization
similarity_df = pd.DataFrame(similarity_scores, index=courses_df['Course'], columns=[f'Outcome {i+1}' for i in range(len(learning_outcomes))])

# Display the similarity scores
print("Similarity Scores between Courses and Learning Outcomes:")
print(similarity_df)

# Plotting the heatmap
plt.figure(figsize=(16, 9))
sns.heatmap(similarity_df, annot=True, cmap='coolwarm', fmt=".2f", square=True, cbar_kws={"shrink": .8})
plt.title('Course-Level Semantic Similarity to Learning Outcomes')
plt.xlabel('Learning Outcomes')
plt.ylabel('Courses')
plt.xticks(rotation=45, ha='right')
plt.yticks()

img_folder = 'img' 
file_path = os.path.join(img_folder, 'similarity-courses-to-outcomes-individual.png')

plt.savefig(file_path, bbox_inches='tight', dpi=300) 
plt.close()  

print(f"Heatmap saved to {file_path}")


In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import PCA
import os  

learning_outcomes = [
    "Graduates are familiar with the most relevant theories, methods and techniques in the domain of Software Engineering.",
    "Graduates have the necessary background knowledge to familiarise themselves with novel methods and techniques for life-long learning.",
    "Graduates can successfully apply theory in practice in order to find innovative solutions for both general and domain-specific software engineering problems.",
    "Graduates can make valuable contributions to complex software engineering projects through the independent and critical application of academic knowledge and skills.",
    "Graduates have sufficient technical understanding and intellectual capacity to play, after some years of practical experience, a managerial or advisory role in software engineering.",
    "Graduates can clearly report their findings, both in oral and in written form, and can explain problems at an audience-focused level of abstraction.",
    "Graduates have research skills at the academic level and are capable to autonomously perform research in the domain of software engineering.",
    "Graduates understand why user needs are difficult to express, capture and understand and graduates are familiar with best practices in requirements engineering as well as their shortcomings.",
    "Graduates are able to produce formal specifications of modest-sized samples of software and to use them for the generation of meaningful tests; they understand the essential concepts of software verification.",
    "Graduates master the methods and techniques for analysing existing software systems and their evolution in the context of changing requirements.",
    "Graduates are familiar with the characteristics of software for embedded systems and know how to accommodate these characteristics in the software design and development phases.",
    "Graduates understand why big software projects are prone to failure and are familiar with software engineering process models, their situation-awareness and their general shortcomings.",
    "Graduates are familiar with the concept of DevOps and their benefits for organisational IT infrastructure and services management; they understand how to build cloud-based applications and how to use cloud automation tools across a wide range of application scenarios."
]

df = pd.read_csv('data/courses.csv')

# Initialize the SentenceTransformer model
model = SentenceTransformer('all-mpnet-base-v2')

# Extract the course columns (ignoring the Year column)
course_columns = df.columns[1:14]  

# Combine content for each course by aggregating all rows
course_contents = {}
for course in course_columns:
    combined_content = df[course].dropna().str.cat(sep=' ')
    course_contents[course] = combined_content

# Convert the combined contents into a DataFrame
courses_df = pd.DataFrame(list(course_contents.items()), columns=['Course', 'Content'])

# Generate embeddings for all course contents
course_embeddings = model.encode(courses_df['Content'].tolist())

# Generate embeddings for learning outcomes
outcome_embeddings = model.encode(learning_outcomes)

# Calculate similarity scores between each course and each learning outcome
similarity_scores = cosine_similarity(course_embeddings, outcome_embeddings)

# Convert to DataFrame for better visualization
similarity_df = pd.DataFrame(similarity_scores, index=courses_df['Course'], columns=[f'Outcome {i+1}' for i in range(len(learning_outcomes))])

# Perform PCA on the combined embeddings
combined_embeddings = np.vstack((course_embeddings, outcome_embeddings))
pca = PCA(n_components=2)
pca_result = pca.fit_transform(combined_embeddings)

# Separate the PCA results into courses and outcomes
num_courses = len(course_embeddings)
pca_courses = pca_result[:num_courses]
pca_outcomes = pca_result[num_courses:]

# Create a DataFrame for PCA results
pca_df = pd.DataFrame(pca_result, columns=['PCA Component 1', 'PCA Component 2'])
pca_df['Type'] = ['Course'] * num_courses + ['Outcome'] * len(learning_outcomes)

# Correctly assign course names to the PCA DataFrame
pca_df['Name'] = ['Course: ' + course for course in courses_df['Course']] + [f'Outcome {i + 1}' for i in range(len(learning_outcomes))]

# Plotting the PCA results
plt.figure(figsize=(30, 10))
sns.scatterplot(data=pca_df, x='PCA Component 1', y='PCA Component 2', hue='Type', style='Type', s=100)

# Annotate each point with the corresponding course or outcome
for i, row in pca_df.iterrows():
    plt.annotate(row['Name'],
                 (row['PCA Component 1'], row['PCA Component 2']),
                 textcoords="offset points",
                 xytext=(0, 5),
                 ha='center')

plt.title('PCA of Course Contents and Learning Outcomes')
plt.xlabel('PCA Component 1')
plt.ylabel('PCA Component 2')
plt.axhline(0, color='grey', lw=0.5, ls='--')
plt.axvline(0, color='grey', lw=0.5, ls='--')
plt.grid()
plt.legend()
plt.tight_layout()

# Save the PCA plot to a file
img_folder = 'img'  
pca_file_path = os.path.join(img_folder, 'pca_courses_outcomes.png')
plt.savefig(pca_file_path, bbox_inches='tight', dpi=300) 
plt.close()  

print(f"PCA plot saved to {pca_file_path}")


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import os  

learning_outcomes = [
    "Graduates are familiar with the most relevant theories, methods and techniques in the domain of Software Engineering.",
    "Graduates have the necessary background knowledge to familiarise themselves with novel methods and techniques for life-long learning.",
    "Graduates can successfully apply theory in practice in order to find innovative solutions for both general and domain-specific software engineering problems.",
    "Graduates can make valuable contributions to complex software engineering projects through the independent and critical application of academic knowledge and skills.",
    "Graduates have sufficient technical understanding and intellectual capacity to play, after some years of practical experience, a managerial or advisory role in software engineering.",
    "Graduates can clearly report their findings, both in oral and in written form, and can explain problems at an audience-focused level of abstraction.",
    "Graduates have research skills at the academic level and are capable to autonomously perform research in the domain of software engineering.",
    "Graduates understand why user needs are difficult to express, capture and understand and graduates are familiar with best practices in requirements engineering as well as their shortcomings.",
    "Graduates are able to produce formal specifications of modest-sized samples of software and to use them for the generation of meaningful tests; they understand the essential concepts of software verification.",
    "Graduates master the methods and techniques for analysing existing software systems and their evolution in the context of changing requirements.",
    "Graduates are familiar with the characteristics of software for embedded systems and know how to accommodate these characteristics in the software design and development phases.",
    "Graduates understand why big software projects are prone to failure and are familiar with software engineering process models, their situation-awareness and their general shortcomings.",
    "Graduates are familiar with the concept of DevOps and their benefits for organisational IT infrastructure and services management; they understand how to build cloud-based applications and how to use cloud automation tools across a wide range of application scenarios."
]

df = pd.read_csv('data/courses.csv')

# Filter for the current academic year 
current_year = '2024-2025'
df = df[df['Year'] == current_year] 

# Initialize the SentenceTransformer model
model = SentenceTransformer('all-mpnet-base-v2')

# Extract the course columns
course_columns = df.columns[1:14]  

# Combine content for each course by aggregating all rows
course_contents = {}
for course in course_columns:
    combined_content = df[course].dropna().str.cat(sep=' ')

    # Remove courses that only contain '-'
    if combined_content.strip() != '-':
        course_contents[course] = combined_content

# Convert the combined contents into a DataFrame
courses_df = pd.DataFrame(list(course_contents.items()), columns=['Course', 'Content'])

# Check if there are any courses left after filtering
if courses_df.empty:
    print("No valid courses found for the current academic year.")
else:
    # Generate embeddings for all course contents
    course_embeddings = model.encode(courses_df['Content'].tolist())

    # Generate embeddings for learning outcomes
    outcome_embeddings = model.encode(learning_outcomes)

    # Calculate similarity scores between each course and each learning outcome
    similarity_scores = cosine_similarity(course_embeddings, outcome_embeddings)

    # Convert to DataFrame for better visualization
    similarity_df = pd.DataFrame(similarity_scores, index=courses_df['Course'], columns=[f'Outcome {i+1}' for i in range(len(learning_outcomes))])

    # Display the similarity scores
    print("Similarity Scores between Courses and Learning Outcomes:")
    print(similarity_df)

    # Plotting the heatmap
    plt.figure(figsize=(16, 9))
    sns.heatmap(similarity_df, annot=True, cmap='coolwarm', fmt=".2f", square=True, cbar_kws={"shrink": .8})
    plt.title('2024-2025 Course-Level Semantic Similarity to Learning Outcomes')
    plt.xlabel('Learning Outcomes')
    plt.ylabel('Courses')
    plt.xticks(rotation=45, ha='right')
    plt.yticks()

    img_folder = 'img'  
    file_path = os.path.join(img_folder, 'similarity-courses-to-outcomes-24-25.png')

    # Save the figure
    plt.savefig(file_path, bbox_inches='tight', dpi=300)  
    plt.close()  

    print(f"Heatmap saved to {file_path}")
