-
Notifications
You must be signed in to change notification settings - Fork 0
/
PAT.py
102 lines (67 loc) · 2.51 KB
/
PAT.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/usr/bin/python
# -*- coding: UTF8 -*-
## PAT: Plagiat Auto-Tester
# Version 0.0.1
## Configuration
howManyWordsByCheck = 20 # Maximum 32; words delimited by "'", "-", space and newline
## Imports
import argparse # For the command-line parsing
import re # To split the file in sentences
import urllib # Download Google's results
import sys # To flush!
## Script
# Parse arguments
parser = argparse.ArgumentParser (description="PAT: Plagiat Auto-Tester; try to determine how much of your text is on Google.")
parser.add_argument ("file", type=file, help="Text to test")
args = parser.parse_args()
print "Loading text...",
# Open text file, get contents
with args.file as textFile :
text = textFile.read ()
# Cleanup
del args
del parser
print "done!"
print "Text processing:",
# Making a nice list
sentences = re.split ("[ -'\n]", text)
del text
for i in range (sentences.count ("")) :
sentences.remove ("")
# Setup some variables
totalLine = len (sentences)
totalPlagiat = 0
sentence = ""
# Main part
for i in range (totalLine - howManyWordsByCheck) :
# New Google ID
if i % 100 == 0 :
class AppURLopener(urllib.FancyURLopener):
version = str (i)
urllib._urlopener = AppURLopener()
sentence = ""
for text in sentences [i:i+howManyWordsByCheck] :
if not sentence == "" :
sentence = sentence + " " + text
else :
sentence = text
print
print " Processing of text-bloc [" + str (i+1) + "/" + str (totalLine - howManyWordsByCheck) + "]:"
print " Downloading Google's result...",
# Everyday in flusheling
sys.stdout.flush()
(fileName, Null) = urllib.urlretrieve ("http://www.google.co.uk/search?q=\"" + urllib.quote_plus (sentence) + "\"")
print "done!"
print " Page processing...",
plagiat = False
with open (fileName) as page :
if not re.search ("www.gstatic.com/m/images/icons/warning.gif| - did not match any documents.", page.read ()) :
plagiat = True
print "done!"
if plagiat :
print " /!\\ Oops, plagiat detected in \"" + sentence + "\""
totalPlagiat = totalPlagiat + 1
print "--"
print str (totalPlagiat) + " plagiat's occurence were found"
print str (totalLine - howManyWordsByCheck) + " words were processed"
print "what give us a beautiful score of " + str (totalPlagiat * 100 / (totalLine - howManyWordsByCheck)) + "% of palgiat! (more or less)"