In [144]:
# Import libraries
from model import *

In [145]:
# Define the path to the training and testing data
train_path = './train_data'
test_path = './test_data'

In [146]:
# Create NLP model
nlp = NLPModel(0.5, 3)
train_data = nlp._NLPModel__load_folder(train_path),
test_data = nlp._NLPModel__load_folder(test_path),

In [147]:
# Show the first training and testing data
test_data[0][:5]

['This article delves into the intricacies of adaptive fuzzy event-triggered formation tracking control for nonholonomic multirobot systems characterized by infinite actuator faults and range constraints. Traditional cheating detection methods have many disadvantages, such as difficult to detect covert equipment cheating, multi-source cheating, difficult to distinguish plagiarists from plagiarists, difficult to distinguish plagiarists from victims, or plagiarism from coincidences. To address these issues, we leverage the power of fuzzy logic systems (FLSs) and employ adaptive methods to approximate unknown nonlinear functions and uncertain parameters present in robotic dynamics. In the course of information exploration, the problems of collision avoidance and connectivity maintenance are ever present due to limitations of distance and visual fields. In this paper, the concept of knowledge point mastery Index is introduced to measure students’ mastery of a certain knowledge point, and a

In [148]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
import numpy as np

# Data preparation
abstracts = train_data[0]
subjects = [[0]*10, [1]*10, [2]*10, [3]*10, [4]*10, [5]*10, [6]*10, [7]*10, [8]*10, [9]*10, [10]*10, [11]*10]

# Split abstracts into groups of 10
abstracts_groups = [abstracts[i:i+10] for i in range(0, len(abstracts), 10)]
subjects_groups = [[i]*10 for i in range(len(abstracts_groups))]

# Flatten the groups
abstracts = [abstract for group in abstracts_groups for abstract in group]
subjects = [subject for group in subjects_groups for subject in group]

# Feature extraction and model selection
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer()),
    ('clf', LogisticRegression())
])

# Training
pipeline.fit(abstracts, subjects)

# Evaluation
predicted_subjects = pipeline.predict(abstracts)
print("Classification Report:")
print(classification_report(subjects, predicted_subjects))

# Prediction example
print(test_data[0])

predicted_new_subjects = pipeline.predict(test_data[0])
print("Predicted Subjects for New Abstracts:")
print(predicted_new_subjects)


Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00        10
           2       1.00      0.90      0.95        10
           3       1.00      1.00      1.00        10
           4       1.00      1.00      1.00        10
           5       0.91      1.00      0.95        10
           6       1.00      1.00      1.00        10
           7       1.00      1.00      1.00        10
           8       1.00      1.00      1.00        10
           9       1.00      1.00      1.00        10
          10       1.00      1.00      1.00        10

    accuracy                           0.99       110
   macro avg       0.99      0.99      0.99       110
weighted avg       0.99      0.99      0.99       110

['This article delves into the intricacies of adaptive fuzzy event-triggered formation tracking control for nonholonomic multirobot systems characterized by infinit

In [158]:
from transformers import BertTokenizer, BertModel
import torch
from sklearn.metrics.pairwise import cosine_similarity

# Load pre-trained BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

def train_model(sentence1, sentence2):
    # Tokenize input
    inputs = tokenizer([sentence1, sentence2], return_tensors='pt', padding=True, truncation=True)

    # Get BERT embeddings
    with torch.no_grad():
        outputs = model(**inputs)

    # Extract the embeddings for the [CLS] token
    embeddings = outputs.last_hidden_state[:, 0, :]

    # Reshape the embeddings
    embeddings = embeddings.numpy()

    # Compute similarity
    similarity_score = cosine_similarity(embeddings[0].reshape(1, -1), embeddings[1].reshape(1, -1))[0][0]
    print("Similarity score:", similarity_score)


In [159]:
sentence1 = "This article delves into the intricacies of adaptive fuzzy event-triggered formation tracking control for nonholonomic multirobot systems characterized by infinite actuator faults and range constraints. Traditional cheating detection methods have many disadvantages, such as difficult to detect covert equipment cheating, multi-source cheating, difficult to distinguish plagiarists from plagiarists, difficult to distinguish plagiarists from victims, or plagiarism from coincidences. To address these issues, we leverage the power of fuzzy logic systems (FLSs) and employ adaptive methods to approximate unknown nonlinear functions and uncertain parameters present in robotic dynamics. In the course of information exploration, the problems of collision avoidance and connectivity maintenance are ever present due to limitations of distance and visual fields. In this paper, the concept of knowledge point mastery Index is introduced to measure students’ mastery of a certain knowledge point, and a test method of cheating based on improved cognitive diagnostic model is proposed. Furthermore, to reduce the number of controller executions and compensate for any effect arising from infinite actuator failures, robots engage with their leader at the moment of actuator faults using fewer network communication resources yet maintain uninterrupted tracking of the desired trajectory generated by the leader. We guarantee that all signals are semi-global uniformly ultimately bounded (SGUUB). Ultimately, we demonstrate the practical feasibility of the ETFT control strategy for nonholonomic multirobot systems. The experiments show that the precision and recall rate of this method are significantly higher than those of the method based on the false-same rate, the method based on the false-same rate and the right-same rate and the method based on the Person-Fit index."
sentence2 = "This article delves into the intricacies of adaptive fuzzy event-triggered formation tracking control for nonholonomic multirobot systems characterized by infinite actuator faults and range constraints. To address these issues, we leverage the power of fuzzy logic systems (FLSs) and employ adaptive methods to approximate unknown nonlinear functions and uncertain parameters present in robotic dynamics. In the course of information exploration, the problems of collision avoidance and connectivity maintenance are ever present due to limitations of distance and visual fields. In this regard, we introduce a general barrier function and prescribed performance methodology to tackle constrained range impediments effectively. Furthermore, to reduce the number of controller executions and compensate for any effect arising from infinite actuator failures, robots engage with their leader at the moment of actuator faults using fewer network communication resources yet maintain uninterrupted tracking of the desired trajectory generated by the leader. With the aid of the dynamic surface technology, we propose a decentralized adaptive event-triggering fault-tolerant (ETFT) formation control strategy. We guarantee that all signals are semi-global uniformly ultimately bounded (SGUUB). Ultimately, we demonstrate the practical feasibility of the ETFT control strategy for nonholonomic multirobot systems."
train_model(sentence1, sentence2)


Similarity score: 0.9323094


In [151]:
sentence1 = "The utilization of Artificial Intelligence (AI) technologies in education has surged, leading to a rise in published studies. Despite this, comprehensive large-scale reviews in this field are lacking. This study aims to bridge this gap by analyzing 4,519 publications from 2000 to 2019, using topic-based bibliometrics to identify trends and topics related to AI applications in education (AIEd). Results indicate a growing interest in using AI for educational purposes within the academic community. The primary research topics include intelligent tutoring systems for special education, natural language processing for language education, educational robots for AI education, educational data mining for performance prediction, discourse analysis in computer-supported collaborative learning, neural networks for teaching evaluation, affective computing for learner emotion detection, and recommender systems for personalized learning. The study also addresses the challenges and future directions of AIEd."
sentence2 = "With the increasing use of Artificial Intelligence (AI) technologies in education, the number of published studies in the field has increased. However, no large-scale reviews have been conducted to comprehensively investigate the various aspects of this field. Based on 4,519 publications from 2000 to 2019, we attempt to fill this gap and identify trends and topics related to AI applications in education (AIEd) using topicbased bibliometrics. Results of the review reveal an increasing interest in using AI for educational purposes from the academic community. The main research topics include intelligent tutoring systems for special education; natural language processing for language education; educational robots for AI education; educational data mining for performance prediction; discourse analysis in computer-supported collaborative learning; neural networks for teaching evaluation; affective computing for learner emotion detection; and recommender systems for personalized learning. We also discuss the challenges and future directions of AIEd."
train_model(sentence1, sentence2)


Similarity score: 0.9371376


In [152]:
sentence1 = "Artifcial intelligence (AI) is looked upon nowadays as the potential major catalyst for the fourth industrial revolution. In the last decade, AI use in Orthopaedics increased approximately tenfold. Artifcial intelligence helps with tracking activities, evaluating diagnostic images, predicting injury risk, and several other uses. Chat Generated Pre-trained Transformer (ChatGPT), which is an AI-chatbot, represents an extremely controversial topic in the academic community. The aim of this review article is to simplify the concept of AI and study the extent of AI use in Orthopaedics and sports medicine literature. Additionally, the article will also evaluate the role of ChatGPT in scientifc research and publications."
sentence2 = "Accurately scientific disciplines, including biomechanics, genetics, ethology, and neurology, it is essential to accurately track the behavior of animals throughout studies, particularly without employing markers. However, it has proven difficult to extract precise stances from backgrounds that are always shifting. Recently, we unveiled an open-source toolset that makes use of a cutting-edge algorithm for estimating human position. With the help of this toolbox, users may train a deep neural network to accurately monitor user-defined features with tracking accuracy that rivals that of human labeling. We have added new features, including as graphical user interfaces (GUIs), efficiency improvements, and network refinement based on active learning, to this revised Python module. In order to help customers create a unique and repeatable analysis pipeline using a graphical processing unit (GPU)."
train_model(sentence1, sentence2)


Similarity score: 0.85587454


In [160]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import torch
from transformers import AutoTokenizer, AutoModel #for embeddings
from sklearn.metrics.pairwise import cosine_similarity #for similarity

In [161]:
# download pretrained model
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased",)
model = AutoModel.from_pretrained("bert-base-uncased",output_hidden_states=True)

In [162]:
# create embeddings
def get_embeddings(text,token_length):
    tokens=tokenizer(text,max_length=token_length,padding='max_length',truncation=True)
    output=model(torch.tensor(tokens.input_ids).unsqueeze(0),
                 attention_mask=torch.tensor(tokens.attention_mask).unsqueeze(0)).hidden_states[-1]
    return torch.mean(output,axis=1).detach().numpy()

In [187]:
#calculate similarity
def calculate_similarity(text1,text2,text3,token_length=20):
    out1=get_embeddings(text1,token_length=token_length) #create embeddings of text
    out2=get_embeddings(text2,token_length=token_length) #create embeddings of text
    out3=get_embeddings(text3,token_length=token_length) #create embeddings of text
    sim1= cosine_similarity(out1,out2)[0][0]
    sim2= cosine_similarity(out1,out3)[0][0]
    print(sim1,sim2)
    if sim1>sim2:
        print('sentence 1 is more similar to input sentence')
    else:
        print('sentence 2 is more similar to input sentence')

In [188]:
text1 = 'Despite this, comprehensive large-scale reviews in this field are lacking.'
text2 = 'However, no large-scale reviews have been conducted to comprehensively investigate the various aspects of this field.'
text3 = 'This study provided a content analysis of studies aiming to disclose how artificial intelligence (AI) has been applied to the education sector and explore the potential research trends and challenges of AI in education.'

calculate_similarity(text1,text2,text3)

0.87726504 0.7504591
sentence 1 is more similar to input sentence


In [190]:
#calculate similarity
def calculate_similarity(text1,text2,token_length=20):
    out1=get_embeddings(text1,token_length=token_length) #create embeddings of text
    out2=get_embeddings(text2,token_length=token_length) #create embeddings of text
    sim1= cosine_similarity(out1,out2)[0][0]
    return sim1

In [194]:
# compare each test sentence with all train sentences to obtain the most 
max_similarity = {}
for index, sentence in enumerate(test_data[0]):
    for train_index, train_sentence in enumerate(train_data[0]):
        sim = calculate_similarity(sentence, train_sentence)
        if index not in max_similarity:
            max_similarity[index] = [train_index, sim]
        else:
            if sim > max_similarity[index][1]:
                max_similarity[index] = [train_index, sim]
    
print(max_similarity)

{0: [75, 0.9999999], 1: [103, 1.0000002], 2: [15, 1.0], 3: [44, 0.99999994], 4: [84, 0.98278], 5: [42, 0.96550786], 6: [40, 0.97645795], 7: [78, 0.9999999], 8: [108, 0.9651437], 9: [6, 0.9385615], 10: [40, 0.8716743], 11: [61, 0.893425], 12: [65, 0.7716932], 13: [81, 0.830673], 14: [54, 0.7993957], 15: [50, 0.87251437], 16: [9, 0.89948297], 17: [82, 0.8024551], 18: [101, 0.88029784], 19: [105, 0.9011358]}
