In [2]:
from transformers import AutoModel,AutoTokenizer
import torch.nn.functional as F

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = AutoModel.from_pretrained("bert-base-uncased",output_hidden_states=True)

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


In [3]:
sentence_1="dead"
sentence_2="born"

token_1=tokenizer("dead",return_tensors="pt")
token_2=tokenizer("born",return_tensors="pt")

sentence_1_embedding=model(**token_1).hidden_states
sentence_2_embedding=model(**token_2).hidden_states


In [16]:
vector_1=sentence_1_embedding[12][0][0].unsqueeze(0)
vector_2=sentence_2_embedding[12][0][0].unsqueeze(0)

In [5]:
F.cosine_similarity(vector_1,vector_2)

tensor([0.9177], grad_fn=<SumBackward1>)

In [6]:
from sentence_transformers import SentenceTransformer, util

sentence_model = SentenceTransformer('all-MiniLM-L6-v2')

emb1 = sentence_model.encode("dead", convert_to_tensor=True)
emb2 = sentence_model.encode("born", convert_to_tensor=True)

util.cos_sim(emb1, emb2)


tensor([[0.4146]], device='cuda:0')

In [7]:
#Testing Data
import json 
datas=json.load(open("datas1.json"))
datas

[{'student': 'at sea level 100 celsius degree boils Water.',
  'answer_key': 'Water boils at 100 degree celsius at sea level.'},
 {'student': 'powerhouse cell mitochondria the is of The.',
  'answer_key': 'The mitochondria is the powerhouse of the cell.'},
 {'student': 'law states force upon unless uniform acted object motion an an of external rest in first Newtonâ€™s remains a or.',
  'answer_key': 'Newtonâ€™s first law states that an object remains in a state of rest or uniform motion unless acted upon by an external force.'},
 {'student': 'leaves the chloroplasts happens plants. Photosynthesis the of in in',
  'answer_key': 'Photosynthesis happens in the leaves of plants, in the chloroplasts.'},
 {'student': 'capital Paris. of The France is',
  'answer_key': 'The capital of France is Paris.'},
 {'student': 'change of velocity time. with Acceleration respect is to of rate the',
  'answer_key': 'Acceleration is the rate of change of velocity with respect to time.'},
 {'student': 'sola

In [8]:
def bert_base_eval(datas):
    for data in datas:
        tok_1=tokenizer(data["answer_key"],return_tensors="pt")
        tok_2=tokenizer(data["student"],return_tensors="pt")
        
        sen_1=model(**tok_1).hidden_states[12][0][0]
        sen_2=model(**tok_2).hidden_states[12][0][0]
        similarity_score=F.cosine_similarity(sen_1.unsqueeze(0),sen_2.unsqueeze(0))
        data["result"]=similarity_score[0].item()
        print(similarity_score[0].item())
    return datas
datas=bert_base_eval(datas)

0.8676475882530212
0.8583948612213135
0.8573887348175049
0.8058281540870667
0.7572724223136902
0.7925795912742615
0.8996734619140625
0.8967288732528687


In [20]:
json.dump(datas,open("result.json","w"))

In [19]:
def miniLM_eval(datas):
    for data in datas:
        emb1 = sentence_model.encode(data["student"], convert_to_tensor=True)
        emb2 = sentence_model.encode(data['answer_key'], convert_to_tensor=True)
        print(util.cos_sim(emb1, emb2).item())
miniLM_eval(datas)

0.9715495109558105
0.9411647915840149
0.8498974442481995
0.9112030267715454
0.9515819549560547
0.8504413366317749
0.9069757461547852
0.946125864982605


In [24]:
miniLM_eval(json.load(open("datas.json",'r')))

0.9698219299316406
0.9241585731506348
0.9132532477378845
0.8559527397155762
0.7573239207267761
0.8477521538734436
0.9327422380447388
0.9517584443092346


In [25]:
miniLM_eval(json.load(open("datas2.json",'r')))

0.925910472869873
0.9345394372940063
0.7338201403617859
0.8938953876495361
0.94517982006073
0.7775416970252991
0.8826137781143188
0.9281245470046997


In [11]:
bert_base_eval(json.load(open("datas2.json",'r')))

0.929330587387085
0.8221029043197632
0.713216245174408
0.892433226108551
0.8916035294532776
0.7825384736061096
0.8471053242683411
0.8463495373725891


[{'student': 'Water boils 100Â°C sea level',
  'answer_key': 'Water boils at 100 degree celsius at sea level.',
  'result': 0.929330587387085},
 {'student': 'Mitochondria powerhouse cell',
  'answer_key': 'The mitochondria is the powerhouse of the cell.',
  'result': 0.8221029043197632},
 {'student': 'Newton first law rest motion external force',
  'answer_key': 'Newtonâ€™s first law states that an object remains in a state of rest or uniform motion unless acted upon by an external force.',
  'result': 0.713216245174408},
 {'student': 'Photosynthesis leaves plants chloroplasts',
  'answer_key': 'Photosynthesis happens in the leaves of plants, in the chloroplasts.',
  'result': 0.892433226108551},
 {'student': 'Capital France Paris',
  'answer_key': 'The capital of France is Paris.',
  'result': 0.8916035294532776},
 {'student': 'Acceleration rate change velocity time',
  'answer_key': 'Acceleration is the rate of change of velocity with respect to time.',
  'result': 0.7825384736061096

In [17]:
bert_base_eval(json.load(open("datas.json",'r')))

0.9990492463111877
0.9725960493087769
0.9732392430305481
0.9720597863197327
0.9818588495254517
0.9661609530448914
0.9929742813110352
0.9989824891090393


[{'student': 'Water boils at 90 degree celsius at sea level.',
  'answer_key': 'Water boils at 100 degree celsius at sea level.',
  'result': 0.9990492463111877},
 {'student': 'The mitochondria is the powerhouse of the body.',
  'answer_key': 'The mitochondria is the powerhouse of the cell.',
  'result': 0.9725960493087769},
 {'student': 'Newtonâ€™s first law states that an object in motion stays at motion unless stopped by an external force.',
  'answer_key': 'Newtonâ€™s first law states that an object remains in a state of rest or uniform motion unless acted upon by an external force.',
  'result': 0.9732392430305481},
 {'student': 'Photosynthesis happens in the roots of plants.',
  'answer_key': 'Photosynthesis happens in the leaves of plants, in the chloroplasts.',
  'result': 0.9720597863197327},
 {'student': 'The capital of France is Berlin.',
  'answer_key': 'The capital of France is Paris.',
  'result': 0.9818588495254517},
 {'student': 'Acceleration is rate of change of speed 

In [3]:
tokenizer_large = AutoTokenizer.from_pretrained("bert-large-cased")
model_large = AutoModel.from_pretrained("bert-large-cased",output_hidden_states=True)

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


In [5]:
import json
def bert_base_large_eval(datas):
    for data in datas:
        tok_1=tokenizer_large(data["answer_key"],return_tensors="pt")
        tok_2=tokenizer_large(data["student"],return_tensors="pt")
        
        sen_1=model_large(**tok_1).hidden_states[12][0][0]
        sen_2=model_large(**tok_2).hidden_states[12][0][0]
        similarity_score=F.cosine_similarity(sen_1.unsqueeze(0),sen_2.unsqueeze(0))
        data["result"]=similarity_score[0].item()
        print(similarity_score[0].item())
    return datas
bert_base_large_eval(json.load(open("datas.json",'r')))

0.9975014328956604
0.9886413216590881
0.9853746891021729
0.9835785627365112
0.978088915348053
0.9814592599868774
0.9895030856132507
0.9984897375106812


[{'student': 'Water boils at 90 degree celsius at sea level.',
  'answer_key': 'Water boils at 100 degree celsius at sea level.',
  'result': 0.9975014328956604},
 {'student': 'The mitochondria is the powerhouse of the body.',
  'answer_key': 'The mitochondria is the powerhouse of the cell.',
  'result': 0.9886413216590881},
 {'student': 'Newtonâ€™s first law states that an object in motion stays at motion unless stopped by an external force.',
  'answer_key': 'Newtonâ€™s first law states that an object remains in a state of rest or uniform motion unless acted upon by an external force.',
  'result': 0.9853746891021729},
 {'student': 'Photosynthesis happens in the roots of plants.',
  'answer_key': 'Photosynthesis happens in the leaves of plants, in the chloroplasts.',
  'result': 0.9835785627365112},
 {'student': 'The capital of France is Berlin.',
  'answer_key': 'The capital of France is Paris.',
  'result': 0.978088915348053},
 {'student': 'Acceleration is rate of change of speed w

In [3]:
tokenizer_chem = AutoTokenizer.from_pretrained("recobo/chemical-bert-uncased")
model_chem = AutoModel.from_pretrained("recobo/chemical-bert-uncased",output_hidden_states=True)

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Some weights of BertModel were not initialized from the model checkpoint at recobo/chemical-bert-uncased and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [10]:
import json
def bert_base_large_eval(datas):
    for data in datas:
        tok_1=tokenizer_chem(data["answer_key"],return_tensors="pt")
        tok_2=tokenizer_chem(data["student"],return_tensors="pt")
        
        sen_1=model_chem(**tok_1).hidden_states[12][0][0]
        sen_2=model_chem(**tok_2).hidden_states[12][0][0]
        similarity_score=F.cosine_similarity(sen_1.unsqueeze(0),sen_2.unsqueeze(0))
        data["result"]=similarity_score[0].item()
        print(similarity_score[0].item())
    return datas
bert_base_large_eval(json.load(open("chem1.json",'r')))

0.941123366355896
0.8748675584793091
0.9729120135307312
0.9875351190567017
0.9782614707946777
0.9324246644973755
0.950079619884491
0.943078339099884


[{'student': 'Water is made up of two atoms of hydrogen and one atom of oxygen.',
  'answer_key': 'Water consists of two hydrogen atoms and one oxygen atom.',
  'result': 0.941123366355896},
 {'student': 'NaCl is called sodium dioxide.',
  'answer_key': 'NO2 is called sodium chloride.',
  'result': 0.8748675584793091},
 {'student': 'The atomic number of carbon is 14.',
  'answer_key': 'The atomic number of carbon is 6.',
  'result': 0.9729120135307312},
 {'student': 'An acid increases the concentration of hydroxide ions in a solution.',
  'answer_key': 'An acid increases the concentration of hydrogen ions in a solution.',
  'result': 0.9875351190567017},
 {'student': 'Covalent bonds are formed by the transfer of electrons.',
  'answer_key': 'Covalent bonds are formed by the sharing of electrons.',
  'result': 0.9782614707946777},
 {'student': 'The pH of a neutral solution is 7.',
  'answer_key': 'A neutral solution has a pH of 7.',
  'result': 0.9324246644973755},
 {'student': 'Ammonia

In [13]:
bert_base_large_eval(json.load(open("chem2.json",'r',encoding="utf-8")))

0.968495786190033
0.9103330969810486
0.9139240980148315
0.9621149301528931
0.9078491926193237
0.8751125931739807
0.9483076930046082
0.9091223478317261


[{'student': 'Water is composed of hydrogen and oxygen atoms. It has a bent molecular shape due to the two lone pairs on the oxygen atom.',
  'answer_key': 'Water consists of two hydrogen atoms and one oxygen atom, forming a bent shape because of lone pairs on oxygen. This gives water its polar nature.',
  'result': 0.968495786190033},
 {'student': 'Sodium chloride is a compound formed when sodium gives up one electron to chlorine. This forms an ionic bond between them.',
  'answer_key': 'NaCl forms through ionic bonding, where sodium donates an electron to chlorine. The resulting ions are held together by electrostatic forces.',
  'result': 0.9103330969810486},
 {'student': 'Carbon has an atomic number of 14 and forms 2 bonds with other elements. It is a nonmetal.',
  'answer_key': 'Carbon has an atomic number of 6, meaning it has 6 protons. It typically forms 4 covalent bonds due to its 4 valence electrons.',
  'result': 0.9139240980148315},
 {'student': 'Acids increase hydroxide ion

In [14]:
bert_base_large_eval(json.load(open("chem3.json",'r',encoding="utf-8")))

0.9647703170776367
0.9330381751060486
0.9301390647888184
0.9197496771812439
0.9528072476387024
0.9427844285964966
0.9344231486320496
0.9267257452011108


[{'student': 'Water is composed of hydrogen and oxygen atoms. It is a universal solvent and has a bent molecular geometry due to the lone pairs on the oxygen. These lone pairs cause a partial negative charge near the oxygen atom, making the molecule polar.',
  'answer_key': 'A water molecule consists of two hydrogen atoms and one oxygen atom, forming a bent shape because of two lone electron pairs on the oxygen. This polarity enables hydrogen bonding, making water an excellent solvent for many substances.',
  'result': 0.9647703170776367},
 {'student': 'Sodium chloride is an example of a covalent compound formed when sodium and chlorine share electrons. It dissolves well in water due to its polar nature.',
  'answer_key': 'Sodium chloride (NaCl) is an ionic compound formed when sodium donates an electron to chlorine, creating positive (Na⁺) and negative (Cl⁻) ions. These ions are held together by strong electrostatic forces and readily dissociate in water.',
  'result': 0.9330381751060