In [46]:
from docx import Document
import pandas as pd
import nltk
import numpy as np
import spacy
from sklearn.metrics.pairwise import cosine_similarity

## Teachers Upload their Syllabus 

In [36]:
# Load the document
doc = Document('Syllabus.docx')

# Initialize lists to store table data
co_numbers = []
course_outcomes = []

# Assuming the data is in a table within the Word document
for table in doc.tables:
    for row in table.rows:
        # Get the text from each cell in the row
        cells = [cell.text.strip() for cell in row.cells]
        
        # Skip rows without the expected structure
        if len(cells) == 2:
            co_numbers.append(cells[0])
            course_outcomes.append(cells[1])

# Create a DataFrame
# Directly create DataFrame from extracted table data if column headings are included
df = pd.DataFrame([co_numbers, course_outcomes]).transpose()

# Rename columns only if needed
df.columns = df.iloc[0]  # Set the first row as the header
df = df[1:].reset_index(drop=True)  # Drop the first row and reset the index


In [37]:
df

Unnamed: 0,No.,Course Outcomes
0,CO1,Understand the fundamental principles of image...
1,CO2,Develop proficiency in image enhancement and s...
2,CO3,Develop skills in object detection and recogni...
3,CO4,Apply the image and video analysis approaches ...


## Extract Verbs from the Course Outcomes

In [38]:
course_outcomes=[]
for i in range(len(df)):
    data=df['Course Outcomes'].iloc[i]
    course_outcomes.append(data)

In [39]:
course_outcomes

['Understand the fundamental principles of image and video analysis',
 'Develop proficiency in image enhancement and segmentation',
 'Develop skills in object detection and recognition',
 'Apply the image and video analysis approaches to solve real world problems']

In [40]:
verbs=['VB','VBP','VBD','VBG','VBN']

In [41]:
course_verbs=[]
for i in range(len(course_outcomes)):
    review=course_outcomes[i]
    review=review.split()
    review=nltk.pos_tag(review)
    print(review)
    review=[word for word,tag in review if tag in verbs]
    course_verbs.append(review)


[('Understand', 'IN'), ('the', 'DT'), ('fundamental', 'JJ'), ('principles', 'NNS'), ('of', 'IN'), ('image', 'NN'), ('and', 'CC'), ('video', 'NN'), ('analysis', 'NN')]
[('Develop', 'NNP'), ('proficiency', 'NN'), ('in', 'IN'), ('image', 'NN'), ('enhancement', 'NN'), ('and', 'CC'), ('segmentation', 'NN')]
[('Develop', 'NNP'), ('skills', 'NNS'), ('in', 'IN'), ('object', 'JJ'), ('detection', 'NN'), ('and', 'CC'), ('recognition', 'NN')]
[('Apply', 'VB'), ('the', 'DT'), ('image', 'NN'), ('and', 'CC'), ('video', 'NN'), ('analysis', 'NN'), ('approaches', 'NNS'), ('to', 'TO'), ('solve', 'VB'), ('real', 'JJ'), ('world', 'NN'), ('problems', 'NNS')]


In [42]:
course_verbs

[[], [], [], ['Apply', 'solve']]

In [43]:
df_assements=pd.read_excel("Course Assessments.xlsx")

In [44]:
df_assements

Unnamed: 0.1,Unnamed: 0,CO ID,Course Outcome,Verbs,Assessments
0,0,CO1,Understand the essence of research and the imp...,,
1,1,CO2,Explore the fundamental concepts of data science,,
2,2,CO3,Understand various machine learning algorithms...,"learning, used",Objective test items such as fill-in-the-blank...
3,3,CO4,Learn to think through the ethics surrounding ...,"Learn, think, surrounding","Activities such as research projects, musical ..."
4,4,CO5,Create scientific reports according to specifi...,"according, specified",Objective test items such as fill-in-the-blank...
5,5,CO6,able to understand the concept of the random v...,understand,Objective test items such as fill-in-the-blank...
6,6,CO7,evaluate condition probabilities and condition...,,
7,7,CO8,gain the knowledge of applications of discrete...,gain,"Activities such as research projects, musical ..."
8,8,CO9,identify the applications of continuous distri...,identify,Objective test items such as fill-in-the-blank...
9,9,CO10,apply Chebychevs inequality to verify the conv...,verify,Objective test items such as fill-in-the-blank...


In [45]:
df_assements['Verbs']

0                           NaN
1                           NaN
2                learning, used
3     Learn, think, surrounding
4          according, specified
5                    understand
6                           NaN
7                          gain
8                      identify
9                        verify
10                  programming
11                          NaN
12                      examine
Name: Verbs, dtype: object

In [58]:
flattened_course_verbs = [verb for sublist in course_verbs for verb in sublist]

In [59]:
flattened_course_verbs

['Apply', 'solve']

In [61]:
# Convert the 'verbs' column to lists of individual words, ignoring NaNs
flattened_assessment_verbs = []
for item in df_assements['Verbs'].dropna():
    flattened_assessment_verbs.extend([verb.strip() for verb in item.split(',')])

flattened_assessment_verbs

['learning',
 'used',
 'Learn',
 'think',
 'surrounding',
 'according',
 'specified',
 'understand',
 'gain',
 'identify',
 'verify',
 'programming',
 'examine']

In [63]:

# Load SpaCy model
import numpy as np
import spacy
from sklearn.metrics.pairwise import cosine_similarity

# Load SpaCy model
nlp = spacy.load("en_core_web_md")

# Function to get the SpaCy vector for a word
def get_word_vector(word):
    doc = nlp(word)
    if doc.has_vector:  # Check if the word has a vector
        return doc.vector
    else:
        return np.zeros(nlp.vocab.vectors_length)  # Return a zero vector if word not in vocabulary


# Convert course verbs to vectors
course_verb_vectors = np.array([get_word_vector(verb) for verb in flattened_course_verbs])

# Convert assessment verbs to vectors
assessment_verb_vectors = np.array([get_word_vector(verb) for verb in flattened_assessment_verbs])

# Set similarity threshold
threshold = 0.5  

# Dictionary to store matching assessments
matching_assessments = {}

for i, course_verb in enumerate(flattened_course_verbs):
    matching_assessments[course_verb] = []
    
    # Calculate cosine similarity between the course verb and each assessment verb
    sim_scores = cosine_similarity([course_verb_vectors[i]], assessment_verb_vectors).flatten()
    
    # Find assessment verbs with similarity scores above the threshold
    for j, score in enumerate(sim_scores):
        if score > threshold:
            # Append the assessment description from df_assessments for the matching verb
            matching_assessments[course_verb].append(df_assements.iloc[j]['Assessments'])

# Print results
for verb, assessments in matching_assessments.items():
    print(f"Verb: {verb}")
    print(f"Assessments for course verb '{verb}': {assessments}\n")



Verb: Apply
Assessments for course verb 'Apply': []

Verb: solve
Assessments for course verb 'solve': ['Activities such as research projects, musical compositions, performances, essays, business plans, website designs, or set designs that require students to: make, build, design or generate something new •', 'Objective test items such as fill-in-the-blank, matching, labeling, or multiple-choice questions that require students to: recall or recognize terms, facts, and concepts •, Activities such as case studies, critiques, labs, papers, projects, debates, or concept maps that require students to: discriminate or select relevant and irrelevant parts • determine how elements function together • determine bias, values, or underlying intent in presented material •, Activities such as journals, diaries, critiques, problem sets, product reviews, or studies that require students to: test, monitor, judge, or critique readings, performances, or products against established • criteria or standard