In [3]:
import pandas as pd
import numpy as np

import nltk 
from nltk.corpus import wordnet as wn

from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer

from nltk.corpus import wordnet_ic

In [4]:
pos_df = pd.read_csv('po.csv',header=None)
pos_df.columns=['POs']
pos_df.shape

(12, 1)

In [5]:
pos_df.head()

Unnamed: 0,POs
0,Engineering knowledge: Apply the knowledge of ...
1,"Problem analysis: Identify, formulate, review ..."
2,Design/development of solutions: Design soluti...
3,Conduct investigations of complex problems: Us...
4,"Modern tool usage: Create, select, and apply a..."


In [6]:
po_tokens=[]
for po in pos_df['POs']:
    po_tokens.append(word_tokenize(po.lower()))

In [7]:
print(pos_df['POs'][0])

Engineering knowledge: Apply the knowledge of mathematics, science, engineering fundamentals, and an engineering specialization to the solution of complex engineering problems.


In [8]:
print(po_tokens[0])

['engineering', 'knowledge', ':', 'apply', 'the', 'knowledge', 'of', 'mathematics', ',', 'science', ',', 'engineering', 'fundamentals', ',', 'and', 'an', 'engineering', 'specialization', 'to', 'the', 'solution', 'of', 'complex', 'engineering', 'problems', '.']


In [9]:
eng_stopwords = stopwords.words('english')
eng_stopwords.extend([',','.',"'",":"])

In [10]:
words = []
for list_1 in po_tokens:
    main_words = []
    for word in list_1:
        if word not in eng_stopwords:
            if word not in main_words:
                main_words.append(word)
    words.append(main_words)

In [11]:
print(words[0])

['engineering', 'knowledge', 'apply', 'mathematics', 'science', 'fundamentals', 'specialization', 'solution', 'complex', 'problems']


In [12]:
wnet = WordNetLemmatizer()

In [13]:
for i in range(len(words)):
    for j in range(len(words[i])):
        lemm = wnet.lemmatize(words[i][j], pos='n')
        words[i][j] = lemm

In [14]:
print(words[0])

['engineering', 'knowledge', 'apply', 'mathematics', 'science', 'fundamental', 'specialization', 'solution', 'complex', 'problem']


In [15]:
po_synonym=[]
for i in range(len(words)):
    po_syn = []
    for j in range(len(words[i])):
        for syn in wn.synsets(words[i][j]): 
#             print("--WORD-- : ",words[i][j])
            if syn not in po_syn:
#                 print("syn : ",syn.name())
                po_syn.append(syn)
    po_synonym.append(po_syn)

In [16]:
print(po_synonym[0])

[Synset('technology.n.01'), Synset('engineering.n.02'), Synset('engineering.n.03'), Synset('engineer.v.01'), Synset('mastermind.v.01'), Synset('cognition.n.01'), Synset('use.v.01'), Synset('apply.v.02'), Synset('apply.v.03'), Synset('put_on.v.07'), Synset('lend_oneself.v.01'), Synset('give.v.20'), Synset('practice.v.04'), Synset('enforce.v.01'), Synset('apply.v.09'), Synset('apply.v.10'), Synset('mathematics.n.01'), Synset('science.n.01'), Synset('skill.n.02'), Synset('fundamental.n.01'), Synset('fundamental.n.02'), Synset('cardinal.s.01'), Synset('fundamental.s.02'), Synset('fundamental.s.03'), Synset('specialization.n.01'), Synset('specialization.n.02'), Synset('specialization.n.03'), Synset('solution.n.01'), Synset('solution.n.02'), Synset('solution.n.03'), Synset('solution.n.04'), Synset('solution.n.05'), Synset('complex.n.01'), Synset('complex.n.02'), Synset('complex.n.03'), Synset('building_complex.n.01'), Synset('complex.a.01'), Synset('problem.n.01'), Synset('problem.n.02'), Sy

In [17]:
## EXAMPLE
dog=wn.synsets('dog', pos=wn.NOUN)[0] #get the first noun synonym of the word "dog"
cat=wn.synsets('cat', pos=wn.NOUN)[0]
rose=wn.synsets('rose', pos=wn.NOUN)[0]
flower=wn.synsets('flower', pos=wn.NOUN)[0]

brown_ic = wordnet_ic.ic('ic-brown.dat') #load the brown corpus to compute the IC

print(rose.res_similarity(flower, brown_ic),rose.res_similarity(dog, brown_ic),cat.res_similarity(dog, brown_ic))

6.0283161048744525 2.2241504712318556 7.911666509036577


In [18]:
cos_df = pd.read_csv('co list.csv',header=None,encoding = 'unicode_escape')
cos_df.columns=['sub_code','Cos']
cos_df.shape

(28, 2)

In [19]:
cos_df.head()

Unnamed: 0,sub_code,Cos
0,UBT304C CO1,Ability to interpret principles of bioenergeti...
1,UBT304C CO2,Ability to understand Carbohydrate metabolism ...
2,UBT304C CO3,Ability to recognize the importance of Lipid m...
3,UBT304C CO4,Ability to understand the origin of atom in th...
4,UBT304C CO5,Ability to comprehend Nucleic acid metabolism ...


In [20]:
cos = cos_df['Cos']
cos.shape

(28,)

In [21]:
co_tokens=[]
for co in cos_df['Cos']:
    co_tokens.append(word_tokenize(co.lower()))

In [22]:
print(cos_df['Cos'][0])

Ability to interpret principles of bioenergetics of high energy compounds


In [23]:
print(co_tokens[0])

['ability', 'to', 'interpret', 'principles', 'of', 'bioenergetics', 'of', 'high', 'energy', 'compounds']


In [24]:
co_words = []
for list_1 in co_tokens:
    co_main_words = []
    for word in list_1:
        if word not in eng_stopwords:
            if word not in co_main_words:
                co_main_words.append(word)
    co_words.append(co_main_words)

In [25]:
print(co_words[0])

['ability', 'interpret', 'principles', 'bioenergetics', 'high', 'energy', 'compounds']


In [26]:
for i in range(len(co_words)):
    for j in range(len(co_words[i])):
        lemm = wnet.lemmatize(co_words[i][j], pos='n')
        co_words[i][j] = lemm

In [27]:
print(co_words[0])

['ability', 'interpret', 'principle', 'bioenergetics', 'high', 'energy', 'compound']


In [38]:
similarity = np.zeros([cos_df.shape[0],pos_df.shape[0]])
for i in range(len(po_synonym)):
    print('i:',i)
    for j in range(len(po_synonym[i])):
#         print('j:',j)
        for x in range(len(co_words)):
#             print('x:',x)
            for y in range(len(co_words[x])):
#                 print('y:',y)
#                 print(similarity[x][i])
#                 print(co_words[x][y])
#                 print(po_synonym[i][j])
                if len(wn.synsets(co_words[x][y])) != 0 :
#                     print('syn:',(wn.synsets(co_words[x][y])[0]).wup_similarity(po_synonym[i][j],brown_ic))
                    if (wn.synsets(co_words[x][y])[0]).wup_similarity(po_synonym[i][j],brown_ic) != None:
                        if i < 4:
                            if (wn.synsets(co_words[x][y])[0]).wup_similarity(po_synonym[i][j],brown_ic) >= 0.5:
                                similarity[x][i] = similarity[x][i]+((wn.synsets(co_words[x][y])[0]).wup_similarity(po_synonym[i][j],brown_ic))
                        elif i == 4:
                            if (wn.synsets(co_words[x][y])[0]).wup_similarity(po_synonym[i][j],brown_ic) >= 0.6:
                                similarity[x][i] = similarity[x][i]+((wn.synsets(co_words[x][y])[0]).wup_similarity(po_synonym[i][j],brown_ic))
                        elif i < 8:
                            if (wn.synsets(co_words[x][y])[0]).wup_similarity(po_synonym[i][j],brown_ic) >= 0.75:
                                similarity[x][i] = similarity[x][i]+((wn.synsets(co_words[x][y])[0]).wup_similarity(po_synonym[i][j],brown_ic))
                        elif i < 10:
                            if (wn.synsets(co_words[x][y])[0]).wup_similarity(po_synonym[i][j],brown_ic) >= 0.7:
                                similarity[x][i] = similarity[x][i]+((wn.synsets(co_words[x][y])[0]).wup_similarity(po_synonym[i][j],brown_ic))
                        else:
                            if (wn.synsets(co_words[x][y])[0]).wup_similarity(po_synonym[i][j],brown_ic) >= 0.6:
                                similarity[x][i] = similarity[x][i]+((wn.synsets(co_words[x][y])[0]).wup_similarity(po_synonym[i][j],brown_ic))
#                         print('sim:',similarity[x][i])
#                         print("####")

for i in range(similarity.shape[0]):
    for j in range(similarity.shape[1]):
        similarity[i][j] = similarity[i][j]/(len(co_words[i])+len(po_synonym[j]))
        similarity[i][j] = (similarity[i][j]*10)
        similarity[i][j] = round(similarity[i][j],ndigits=1)
        if similarity[i][j] > 3:
            similarity[i][j] = 3

i: 0
i: 1
i: 2
i: 3
i: 4
i: 5
i: 6
i: 7
i: 8
i: 9
i: 10
i: 11


In [39]:
for i in range(similarity.shape[0]):
    a = i+1
    b = max(similarity[i])
    c = min(similarity[i])
    print(a,b,c)

1 2.3 0.0
2 1.8 0.0
3 0.9 0.0
4 1.9 0.0
5 1.0 0.0
6 1.2 0.0
7 3.0 0.3
8 3.0 0.1
9 2.6 0.3
10 1.9 0.0
11 2.5 0.1
12 1.4 0.1
13 3.0 0.1
14 2.7 0.5
15 3.0 0.1
16 3.0 0.4
17 3.0 0.2
18 2.5 0.0
19 2.8 0.1
20 1.7 0.0
21 1.3 0.0
22 1.9 0.0
23 3.0 0.0
24 1.9 0.0
25 1.3 0.0
26 0.4 0.0
27 2.7 0.0
28 1.6 0.0


In [40]:
map_df = pd.DataFrame(similarity, columns = ['PO1', 'PO2', 'PO3', 'PO4', 'PO5', 'PO6', 'PO7', 'PO8', 'PO9', 'PO10', 'PO11', 'PO12'])
map_df.insert(0,"Subject code",cos_df['sub_code'])
map_df.insert(1,"CO",cos_df['Cos'])

In [41]:
map_df.head()

Unnamed: 0,Subject code,CO,PO1,PO2,PO3,PO4,PO5,PO6,PO7,PO8,PO9,PO10,PO11,PO12
0,UBT304C CO1,Ability to interpret principles of bioenergeti...,2.3,1.5,2.0,2.3,0.6,0.0,0.0,0.4,0.2,0.1,0.9,0.5
1,UBT304C CO2,Ability to understand Carbohydrate metabolism ...,1.6,1.5,1.8,1.1,0.9,0.2,0.4,0.0,0.1,0.1,0.2,0.6
2,UBT304C CO3,Ability to recognize the importance of Lipid m...,0.9,0.3,0.9,0.8,0.8,0.0,0.1,0.0,0.1,0.1,0.1,0.7
3,UBT304C CO4,Ability to understand the origin of atom in th...,1.6,1.4,1.9,1.0,0.6,0.0,0.2,0.0,0.3,0.1,0.4,0.6
4,UBT304C CO5,Ability to comprehend Nucleic acid metabolism ...,1.0,0.3,0.9,0.7,0.8,0.2,0.2,0.0,0.1,0.1,0.0,0.6


In [42]:
map_df.to_csv('map6.csv',index=False)

In [43]:
y = pd.DataFrame()
y = map_df.head()
co_po_mapping = pd.read_excel('co_po mapping.xlsx')
co_po_map = pd.DataFrame(co_po_mapping)
x = co_po_map.head()
for i in range(0,len(y)):
    print(x.iloc[i]-y.iloc[i])

CO              NaN
PO1            -1.3
PO10           -0.1
PO11           -0.9
PO12           -0.5
PO2             0.5
PO3               1
PO4            -2.3
PO5            -0.6
PO6               0
PO7               3
PO8             2.6
PO9            -0.2
Subject code    NaN
Unnamed: 0      NaN
Name: 0, dtype: object
CO              NaN
PO1             0.4
PO10           -0.1
PO11           -0.2
PO12           -0.6
PO2             1.5
PO3             1.2
PO4             0.9
PO5            -0.9
PO6            -0.2
PO7             1.6
PO8               3
PO9            -0.1
Subject code    NaN
Unnamed: 0      NaN
Name: 1, dtype: object
CO              NaN
PO1             1.1
PO10           -0.1
PO11           -0.1
PO12           -0.7
PO2             2.7
PO3             2.1
PO4             2.2
PO5            -0.8
PO6               3
PO7             1.9
PO8               2
PO9            -0.1
Subject code    NaN
Unnamed: 0      NaN
Name: 2, dtype: object
CO              NaN
PO1        

In [44]:
x

Unnamed: 0.1,Unnamed: 0,PO1,PO2,PO3,PO4,PO5,PO6,PO7,PO8,PO9,PO10,PO11,PO12
0,UBT304C CO1,1,2,3,0,0,0,3,3,0,0,0,0
1,UBT304C CO2,2,3,3,2,0,0,2,3,0,0,0,0
2,UBT304C CO3,2,3,3,3,0,3,2,2,0,0,0,0
3,UBT304C CO4,3,3,3,2,0,2,2,2,0,0,0,0
4,UBT304C CO5,2,2,2,2,0,1,2,2,0,0,0,0


In [45]:
y

Unnamed: 0,Subject code,CO,PO1,PO2,PO3,PO4,PO5,PO6,PO7,PO8,PO9,PO10,PO11,PO12
0,UBT304C CO1,Ability to interpret principles of bioenergeti...,2.3,1.5,2.0,2.3,0.6,0.0,0.0,0.4,0.2,0.1,0.9,0.5
1,UBT304C CO2,Ability to understand Carbohydrate metabolism ...,1.6,1.5,1.8,1.1,0.9,0.2,0.4,0.0,0.1,0.1,0.2,0.6
2,UBT304C CO3,Ability to recognize the importance of Lipid m...,0.9,0.3,0.9,0.8,0.8,0.0,0.1,0.0,0.1,0.1,0.1,0.7
3,UBT304C CO4,Ability to understand the origin of atom in th...,1.6,1.4,1.9,1.0,0.6,0.0,0.2,0.0,0.3,0.1,0.4,0.6
4,UBT304C CO5,Ability to comprehend Nucleic acid metabolism ...,1.0,0.3,0.9,0.7,0.8,0.2,0.2,0.0,0.1,0.1,0.0,0.6
