/
server.py
135 lines (92 loc) · 3.68 KB
/
server.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
from markov_chain_first import first_order_markov # , tweet_generator
from histogram_dictionary import get_histogram
from flask import Flask, request, render_template, redirect
import random
import sys
import re
import string
app = Flask(__name__)
app.config['DEBUG'] = True
def get_random_word(histogram):
# if the lenght of my histogram dictionary
if len(histogram) == 0:
return None
# variable for measuring the probability of a word (float)
cummu_wght = 0
#calling a random number from 0 to 1 (float)
random_num = random.random()
#iterating through my histogram with the item properties
for key, value in histogram.items():
#calculating my percentage of a word based on the value
type_percentage = value / sum(histogram.values())
cummu_wght += type_percentage
#if my random number is less than the weight of a word, don't add it
if random_num <= cummu_wght:
remove_more = re.sub(",", '', key)
remove_even_more = re.sub("'", ' ', remove_more)
return remove_even_more.strip()
#import pdb; pdb.set_trace() # debugging
def test_get_random_word(repetitions, histogram):
list_of_words = []
for _ in range(repetitions):
rand_word = get_random_word(histogram)
list_of_words.append(rand_word)
histogram = get_histogram(list_of_words)
return histogram
def tweet_generator(order, markov_dict):
final_sentence = ''
counter = 0
#convert a dict to a list
list_of_words = list(markov_dict)
#returns to me a random word from the previous list
first_word = random.choice(list_of_words)
#loop to attach my random words to sentence
while counter != order:
# index random_word function to my random word
random_word_sentence = get_random_word(markov_dict[first_word])
first_word = random_word_sentence
# append to my sentence string
final_sentence += random_word_sentence
sentence = ''.join(final_sentence)
counter += 1
remove_more = re.sub(",", '', sentence)
remove_even_more = re.sub("'", ' ', remove_more)
# print(remove_even_more)
# print("test: " + sentence[0])
return remove_even_more.strip()
@app.route('/', methods=['GET', 'POST'])
def main():
try:
with open('test_corpus.txt') as file:
raw_data = file.read().lower()
except:
print('Please enter a valid file name')
return
if request.method == 'POST':
clean_data = get_clean_data(raw_data)
histogram = get_histogram(clean_data)
try:
sentence_length = int(request.form['sentence_length'])
except:
sentence_length = 7
# raise ValueError('please enter a number')
# return
if sentence_length > 25:
raise ValueError('Please enter a number less than 25')
# test_result = test_get_random_word(sentence_length, histogram)
# Turns dictionary into string so that it can be displayed in the browser
# remove period and spaces in the starting token
starting_word = random.choice(starting_words)[1:].strip()
end_token = random.choice(ending_words)
rand_sentence = sentence_generator(sentence_length, histogram, starting_word)
rand_sentence += ' ' + end_token
return render_template('display_sentence.html',
rand_sentence=rand_sentence)
else:
return render_template('show_form.html')
def get_start_end_tokens(text, pattern):
words = re.findall(pattern, text)
clean_words = [word.strip() for word in words]
return clean_words
if __name__=='__main__':
app.run()