-
Notifications
You must be signed in to change notification settings - Fork 1
/
server.py
153 lines (121 loc) · 4.99 KB
/
server.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#!/usr/bin/env python
import argparse
import connexion
import os
import yaml
from flask import send_from_directory, redirect, request, render_template, jsonify
from flask_cors import CORS
# from backend.Project import Project # TODO !!
from backend import AVAILABLE_MODELS
from backend.api import LM
import time
from backend.utils.ngram import find_similarity_n_grams
import json
from backend.utils.detect import detect
from backend.utils.plag import plag_for_file, plag_for_text
import docx2txt
def docx_2_txt(file):
print(os.path.abspath(file))
text = docx2txt.process(file)
with open('{}.txt'.format(file.split('.')[0]), 'w') as fl:
fl.write(text)
__author__ = 'Divyansh'
CONFIG_FILE_NAME = 'lmf.yml'
projects = {}
app = connexion.App(__name__, debug=False)
APP_ROOT = os.path.dirname(os.path.abspath(__file__))
class Project:
def __init__(self, LM, config):
self.config = config
self.lm = LM()
#########################
# some non-logic routes
#########################
@app.route('/')
def redir():
return redirect('client/fun.html')
@app.route('/client/<path:path>')
def send_static(path):
""" serves all files from ./client/ to ``/client/<path:path>``
:param path: path from api call
"""
return send_from_directory('client/dist/', path)
@app.route('/data/<path:path>')
def send_data(path):
""" serves all files from the data dir to ``/data/<path:path>``
:param path: path from api call
"""
print('Got the data route for', path)
return send_from_directory(args.dir, path)
@app.route('/upload-target', methods=['POST'])
def upload():
target = os.path.join(APP_ROOT, 'uploads/')
for file in request.files.getlist('file'):
filename = file.filename
print(filename)
dest = "/".join([target, filename])
file.save(dest)
return 'YOUR FILE HAS BEEN SAVED'
@app.route('/check-plag', methods=['POST'])
def check_plag():
text = request.form['area']
files_uploaded = []
data = {}
for _,_,files in os.walk('uploads'):
for file in files:
files_uploaded.append(file)
print(files_uploaded)
path = os.path.abspath('uploads/')
if len(files_uploaded) > 0:
if len(files_uploaded) > 1:
for i in range(len(files_uploaded)):
if i == len(files_uploaded) - 1:
break
else:
## check if the the file is a doc file and the change the format
if files_uploaded[i].split('.')[1] == 'docx':
docx_2_txt(os.path.join(path, files_uploaded[i]))
os.remove(os.path.join(path, '{}.docx'.format(files_uploaded[i].split('.')[0])))
## checking the n-gram similarity
data['files_' + files_uploaded[0] + '_' + files_uploaded[i+1]] = find_similarity_n_grams(os.path.join(path, '{}.txt'.format(files_uploaded[0].split('.')[0])), os.path.join(path, '{}.txt'.format(files_uploaded[i+1].split('.')[0])))
## Check the ngram similarity and the AI plagiaism
for i in range(len(files_uploaded)):
if files_uploaded[i].split('.')[1] == 'docx':
print("ABSOLUTE PATH: - {}".format(os.path.join(path, files_uploaded[i])))
docx_2_txt(os.path.join(path, files_uploaded[i]))
os.remove(os.path.join(path, '{}.docx'.format(files_uploaded[i].split('.')[0])))
data['file_ai_plag_' + files_uploaded[i]] = detect(file=os.path.join(path, '{}.txt'.format(files_uploaded[i].split('.')[0])))
data['file_plag_' + files_uploaded[i]] = plag_for_file(os.path.join(path, '{}.txt'.format(files_uploaded[i].split('.')[0])))
else:
data[text] = text
data['text_score_ai_plag'] = detect(text=text)
data['text_score_plag']= plag_for_text(text)
for _,_,files in os.walk('uploads'):
for file in files:
os.remove('uploads/' + file)
return jsonify(data)
parser = argparse.ArgumentParser()
parser.add_argument("--model", default='gpt-2-small')
parser.add_argument("--nodebug", default=False)
parser.add_argument("--address",
default="127.0.0.1") # 0.0.0.0 for nonlocal use
parser.add_argument("--port", default="5001")
parser.add_argument("--nocache", default=False)
parser.add_argument("--dir", type=str, default=os.path.abspath('data'))
parser.add_argument("--no_cors", action='store_true')
if __name__ == '__main__':
args = parser.parse_args()
if not args.no_cors:
CORS(app.app, headers='Content-Type')
app.run(port=int(args.port), debug=not args.nodebug, host=args.address)
else:
args, _ = parser.parse_known_args()
# load_projects(args.dir)
try:
model = AVAILABLE_MODELS[args.model]
except KeyError:
print("Model {} not found. Make sure to register it.".format(
args.model))
print("Loading GPT-2 instead.")
model = AVAILABLE_MODELS['gpt-2']
projects[args.model] = Project(model, args.model)