In [28]:
# Import libraries
import keras
import tensorflow
import numpy as np
import pandas as pd
import tensorflow as tf
from keras.models import Model
from keras.layers import Input, Embedding, LSTM, Lambda, Dense
from keras import backend as K
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

In [29]:
# Load data
data = pd.read_csv('data.csv')
data = data.sample(frac=1).reset_index(drop=True)

In [30]:
# Preprocess data
print(data.isna().sum())
data.dropna(inplace=True)
data

plagiarized        0
original           0
plagiarism_type    0
dtype: int64


Unnamed: 0,plagiarized,original,plagiarism_type
0,Integrating empathy into healthcare chatbots i...,Implementing empathy to healthcare chatbots is...,2
1,It is still debatable if using exergames in ph...,﻿Whether the application of exergames in physi...,2
2,Inverse gas chromatography (IGC) is a method t...,Inverse gas chromatography (IGC) has emerged a...,2
3,Network news serves as a crucial means for net...,Network news is an important way for netizens ...,2
4,Current efforts encounter challenges in balanc...,Multimodal Emotion Recognition in Conversation...,1
...,...,...,...
655,Artificial Intelligence (AI) is reshaping the ...,Artificial Intelligence (AI) is reshaping the ...,1
656,"The application of AI and machine learning, pa...","The application of AI and machine learning, pa...",0
657,"Utilizing AI and machine learning, namely the ...","The application of AI and machine learning, pa...",2
658,"Accurately scientific disciplines, including b...","Accurately scientific disciplines, including b...",1


In [31]:
# Assign data to variables
plagiarized_texts = data['plagiarized']
original_texts = data['original']
labels = data['plagiarism_type']

In [32]:
# Fill missing values
plagiarized_texts = plagiarized_texts.fillna('')
original_texts = original_texts.fillna('')

In [33]:
# Define parameters
max_sequence_length = 800
embedding_dim = 300
num_classes = 3

In [34]:
# Combine texts
texts = (plagiarized_texts + ' ' + original_texts).astype(str)
texts[0]

"Integrating empathy into healthcare chatbots is seen as a viable approach to evoke a feeling of human warmth. Nevertheless, current research often fails to consider the multifaceted nature of empathy, resulting in a limited comprehension of whether manufactured empathy is experienced in a similar manner to interpersonal empathy. This research contends that the implementation of experiential manifestations of empathy may result in unforeseen adverse effects due to their potential inauthenticity. Alternatively, offering instrumental assistance may be more appropriate for simulating artificial empathy, as it is more compatible with computer-based frameworks used in chatbots. Two empirical investigations utilizing healthcare chatbots investigate the impact of empathetic (experiencing with), sympathetic (experiencing for), and behavioral-empathetic (empathetic aiding) versus non-empathetic responses on the perception of warmth, perception of authenticity, and their subsequent effects on tr

In [35]:
# Tokenize texts
tokens = [word for sentence in texts for word in sentence.split()]

In [36]:
# Get vocabulary size
vocabulary_size = len(set(tokens))
vocabulary_size

8405

In [37]:
# Tokenize texts
tokenizer = Tokenizer(num_words=vocabulary_size)
tokenizer.fit_on_texts(texts)

In [38]:
# Convert texts to sequences and pad them
sequences_plagiarized = tokenizer.texts_to_sequences(plagiarized_texts)
sequences_original = tokenizer.texts_to_sequences(original_texts)
padded_sequences_plagiarized = pad_sequences(sequences_plagiarized, maxlen=max_sequence_length)
padded_sequences_original = pad_sequences(sequences_original, maxlen=max_sequence_length)

In [39]:
# Define model (First version)
"""
input_layer1 = Input(shape=(max_sequence_length,))
input_layer2 = Input(shape=(max_sequence_length,))

embedding_layer = Embedding(input_dim=vocabulary_size, output_dim=embedding_dim)

lstm_layer = LSTM(units=50)

x1 = embedding_layer(input_layer1)
x1 = lstm_layer(x1)

x2 = embedding_layer(input_layer2)
x2 = lstm_layer(x2)

distance_layer = Lambda(lambda x: tf.keras.backend.abs(x[0] - x[1]),
                        output_shape=lambda _: (1,))([x1, x2])

output_layer = Dense(num_classes, activation='softmax')(distance_layer)

model = Model(inputs=[input_layer1, input_layer2], outputs=output_layer)

model.compile(optimizer=Adam(0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

callbacks = [
    EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True),
]
"""

"\ninput_layer1 = Input(shape=(max_sequence_length,))\ninput_layer2 = Input(shape=(max_sequence_length,))\n\nembedding_layer = Embedding(input_dim=vocabulary_size, output_dim=embedding_dim)\n\nlstm_layer = LSTM(units=50)\n\nx1 = embedding_layer(input_layer1)\nx1 = lstm_layer(x1)\n\nx2 = embedding_layer(input_layer2)\nx2 = lstm_layer(x2)\n\ndistance_layer = Lambda(lambda x: tf.keras.backend.abs(x[0] - x[1]),\n                        output_shape=lambda _: (1,))([x1, x2])\n\noutput_layer = Dense(num_classes, activation='softmax')(distance_layer)\n\nmodel = Model(inputs=[input_layer1, input_layer2], outputs=output_layer)\n\nmodel.compile(optimizer=Adam(0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])\n\ncallbacks = [\n    EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True),\n]\n"

In [40]:
# Define model (Second version)
from keras.layers import LSTM, Embedding, Dense, Input, Concatenate, Dropout # type: ignore

# Define model
input_layer1 = Input(shape=(max_sequence_length,))
input_layer2 = Input(shape=(max_sequence_length,))

embedding_layer = Embedding(input_dim=vocabulary_size, output_dim=embedding_dim)

# First LSTM layer
lstm_layer1 = LSTM(units=100, return_sequences=True)
x1 = embedding_layer(input_layer1)
x1 = lstm_layer1(x1)

# Second LSTM layer
lstm_layer2 = LSTM(units=50)
x1 = lstm_layer2(x1)

# Repeat for the second input
x2 = embedding_layer(input_layer2)
x2 = lstm_layer1(x2)
x2 = lstm_layer2(x2)

# Concatenate the LSTM outputs
concatenated = Concatenate()([x1, x2])

# Add additional layers for processing
x = Dense(128, activation='relu')(concatenated)
x = Dropout(0.5)(x)

output_layer = Dense(num_classes, activation='softmax')(x)

model = Model(inputs=[input_layer1, input_layer2], outputs=output_layer)

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

callbacks = [
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
]

model.summary()

Model: "model_7"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_15 (InputLayer)       [(None, 800)]                0         []                            
                                                                                                  
 input_16 (InputLayer)       [(None, 800)]                0         []                            
                                                                                                  
 embedding_7 (Embedding)     (None, 800, 300)             2521500   ['input_15[0][0]',            
                                                                     'input_16[0][0]']            
                                                                                                  
 lstm (LSTM)                 (None, 800, 100)             160400    ['embedding_7[0][0]',   

In [41]:
# Show the model architecture
model.summary()

Model: "model_7"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_15 (InputLayer)       [(None, 800)]                0         []                            
                                                                                                  
 input_16 (InputLayer)       [(None, 800)]                0         []                            
                                                                                                  
 embedding_7 (Embedding)     (None, 800, 300)             2521500   ['input_15[0][0]',            
                                                                     'input_16[0][0]']            
                                                                                                  
 lstm (LSTM)                 (None, 800, 100)             160400    ['embedding_7[0][0]',   

In [42]:
# Run the model
model.fit([padded_sequences_plagiarized, padded_sequences_original], labels, epochs=20,
          batch_size=32, validation_split=0.2, callbacks=callbacks)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20


<keras.src.callbacks.History at 0x305423b80>

In [43]:
# Preprocess function
def preprocess(sentence):
  sequence = pad_sequences(tokenizer.texts_to_sequences([sentence]), maxlen=max_sequence_length)
  return sequence

In [47]:
# FID-01.txt and org-076.txt -> 0
prediction = model.predict([
    preprocess("This article delves into the intricacies of adaptive fuzzy event-triggered formation tracking control for nonholonomic multirobot systems characterized by infinite actuator faults and range constraints. Traditional cheating detection methods have many disadvantages, such as difficult to detect covert equipment cheating, multi-source cheating, difficult to distinguish plagiarists from plagiarists, difficult to distinguish plagiarists from victims, or plagiarism from coincidences. To address these issues, we leverage the power of fuzzy logic systems (FLSs) and employ adaptive methods to approximate unknown nonlinear functions and uncertain parameters present in robotic dynamics. In the course of information exploration, the problems of collision avoidance and connectivity maintenance are ever present due to limitations of distance and visual fields. In this paper, the concept of knowledge point mastery Index is introduced to measure students’ mastery of a certain knowledge point, and a test method of cheating based on improved cognitive diagnostic model is proposed. Furthermore, to reduce the number of controller executions and compensate for any effect arising from infinite actuator failures, robots engage with their leader at the moment of actuator faults using fewer network communication resources yet maintain uninterrupted tracking of the desired trajectory generated by the leader. We guarantee that all signals are semi-global uniformly ultimately bounded (SGUUB). Ultimately, we demonstrate the practical feasibility of the ETFT control strategy for nonholonomic multirobot systems. The experiments show that the precision and recall rate of this method are significantly higher than those of the method based on the false-same rate, the method based on the false-same rate and the right-same rate and the method based on the Person-Fit index."), 
    preprocess("This article delves into the intricacies of adaptive fuzzy event-triggered formation tracking control for nonholonomic multirobot systems characterized by infinite actuator faults and range constraints. To address these issues, we leverage the power of fuzzy logic systems (FLSs) and employ adaptive methods to approximate unknown nonlinear functions and uncertain parameters present in robotic dynamics. In the course of information exploration, the problems of collision avoidance and connectivity maintenance are ever present due to limitations of distance and visual fields. In this regard, we introduce a general barrier function and prescribed performance methodology to tackle constrained range impediments effectively. Furthermore, to reduce the number of controller executions and compensate for any effect arising from infinite actuator failures, robots engage with their leader at the moment of actuator faults using fewer network communication resources yet maintain uninterrupted tracking of the desired trajectory generated by the leader. With the aid of the dynamic surface technology, we propose a decentralized adaptive event-triggering fault-tolerant (ETFT) formation control strategy. We guarantee that all signals are semi-global uniformly ultimately bounded (SGUUB). Ultimately, we demonstrate the practical feasibility of the ETFT control strategy for nonholonomic multirobot systems.")])
predicted_classes = np.argmax(prediction, axis=1)
predicted_classes[0]



1

In [48]:
# FID-03.txt and Org-016.txt -> 1
prediction = model.predict([
    preprocess("At present, the application of Artificial Intelligence (AI) in industrial control, smart home and other fields has received good response. However, AI technology has certain requirements for computer performance, and also faces problems in network security, data analysis, human-computer interaction, etc. At present, the visual platform of embedded system has achieved remarkable results in practical applications, but its development has been seriously hampered by problems such as low overall development efficiency and unstable system performance. The test results showed that when other conditions were the same, students and experts had 83.5% and 90% positive evaluations of System X, and 16.5% and 10% negative evaluations respectively. This paper designed an EP Vision System (VS) based on AI technology. The platform combined the embedded hardware design with the Support Vector Machine (SVM) algorithm to realize the intelligent robot interaction and target detection functions. It showed the positive relationship between AI technology and EP VS. The proportion of positive evaluation of System X was much higher than that of System Y, which indicated that System X can meet the actual application requirements and improve the system recognition efficiency to a certain extent. However, their positive evaluation of System Y only accounted for 19% and 4%, while the negative evaluation accounted for 81% and 96%. However, their positive evaluation of System Y only accounted for 19% and 4%, while the negative evaluation accounted for 81% and 96%. "), 
    preprocess("At present, the application of Artificial Intelligence (AI) in industrial control, smart home and other fields has received good response. However, AI technology has certain requirements for computer performance, and also faces problems in network security, data analysis, human-computer interaction, etc. At present, the visual platform of embedded system has achieved remarkable results in practical applications, but its development has been seriously hampered by problems such as low overall development efficiency and unstable system performance. This paper designed an EP Vision System (VS) based on AI technology. The platform combined the embedded hardware design with the Support Vector Machine (SVM) algorithm to realize the intelligent robot interaction and target detection functions. The test results showed that when other conditions were the same, students and experts had 83.5% and 90% positive evaluations of System X, and 16.5% and 10% negative evaluations respectively. However, their positive evaluation of System Y only accounted for 19% and 4%, while the negative evaluation accounted for 81% and 96%. The proportion of positive evaluation of System X was much higher than that of System Y, which indicated that System X can meet the actual application requirements and improve the system recognition efficiency to a certain extent. It showed the positive relationship between AI technology and EP VS.")])
predicted_classes = np.argmax(prediction, axis=1)
predicted_classes[0]



2

In [49]:
# FID-09.txt and Org-109.txt -> 2
prediction = model.predict([
    preprocess("Drug designing and development represent crucial areas of research for pharmaceutical companies and chemical scientists. However, challenges such as low efficacy, off-target delivery, time consumption, and high cost hinder progress in drug design and discovery. Additionally, the complexity and volume of data from genomics, proteomics, microarray data, and clinical trials pose significant obstacles in the drug discovery pipeline. Artificial intelligence (AI) and machine learning (ML) technologies have revolutionized drug discovery and development, particularly through the use of artificial neural networks and deep learning algorithms. These technologies have modernized various processes in drug discovery, including peptide synthesis, structure-based virtual screening, ligand-based virtual screening, toxicity prediction, drug monitoring and release, pharmacophore modeling, quantitative structure–activity relationship, drug repositioning, polypharmacology, and physiochemical activity. Historical evidence supports the implementation of AI and deep learning in drug discovery. Furthermore, novel data mining, curation, and management techniques have provided critical support to newly developed modeling algorithms. In summary, advancements in AI and deep learning offer significant opportunities for rational drug design and discovery, ultimately benefiting mankind. Drug designing and development is an important area of research for pharmaceutical companies and chemical scientists. However, low efficacy, off-target delivery, time consumption, and high cost impose a hurdle and challenges that impact drug design and discovery. Further, complex and big data from genomics, proteomics, microarray data, and clinical trials also impose an obstacle in the drug discovery pipeline. Artificial intelligence and machine learning technology play a crucial role in drug discovery and development. In other words, artificial neural networks and deep learning algorithms have modernized the area. Machine learning and deep learning algorithms have been implemented in several drug discovery processes such as peptide synthesis, structure-based virtual screening, ligand-based virtual screening, toxicity prediction, drug monitoring and release, pharmacophore modeling, quantitative structure–activity relationship, drug repositioning, polypharmacology, and physiochemical activity. Evidence from the past strengthens the implementation of artificial intelligence and deep learning in this field. Moreover, novel data mining, curation, and management techniques provided critical support to recently developed modeling algorithms. In summary, artificial intelligence and deep learning advancements provide an excellent opportunity for rational drug design and discovery process, which will eventually impact mankind. "), 
    preprocess("Drug designing and development is an important area of research for pharmaceutical companies and chemical scientists. However, low efficacy, off-target delivery, time consumption, and high cost impose a hurdle and challenges that impact drug design and discovery. Further, complex and big data from genomics, proteomics, microarray data, and clinical trials also impose an obstacle in the drug discovery pipeline. Artificial intelligence and machine learning technology play a crucial role in drug discovery and development. In other words, artificial neural networks and deep learning algorithms have modernized the area. Machine learning and deep learning algorithms have been implemented in several drug discovery processes such as peptide synthesis, structure-based virtual screening, ligand-based virtual screening, toxicity prediction, drug monitoring and release, pharmacophore modeling, quantitative structure–activity relationship, drug repositioning, polypharmacology, and physiochemical activity. Evidence from the past strengthens the implementation of artificial intelligence and deep learning in this field. Moreover, novel data mining, curation, and management techniques provided critical support to recently developed modeling algorithms. In summary, artificial intelligence and deep learning advancements provide an excellent opportunity for rational drug design and discovery process, which will eventually impact mankind. ")])
predicted_classes = np.argmax(prediction, axis=1)
predicted_classes[0]



1