-
Notifications
You must be signed in to change notification settings - Fork 0
/
problem2_concordance.py
90 lines (78 loc) · 3.15 KB
/
problem2_concordance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
"""
Program : problem2_concordance.py
Author : Jigar R. Gosalia
Verion : 1.0
Course : CSC-520 (Homework 2)
Prof. : Srinivasan Mandyam
A file concordance tracks the unique words in a file and their frequencies.
Write a program that displays a concordance for a file. The program should
output the unique words and their frequencies in alphabetical order.
NOTE: here unique words means ignore case i.e. Hello and hello will be treated as same word.
"""
from library import getLines
# List of English Punctuation Symbols
# Reference : Took maximum puntuations symbols possible from https://en.wikipedia.org/wiki/Punctuation_of_English
# NOTE: Apostrophe is excluded from the list as having it or not having it will give always distinct words.
punctuations = ["[", "]", "(", ")", "{", "}", "<", ">", \
":", ";", ",", "`", "'", "\"", "-", ".", \
"|", "\\", "?", "/", "!", "-", "_", "@", \
"\#", "$", "%", "^", "&", "*", "+", "~", "=" ]
def stripPunctuation(data):
""" Strip Punctuations from the given string. """
for punctuation in punctuations:
data = data.replace(punctuation, " ")
return data
def display(wordsDictionary):
""" Display sorted dictionary of words and their frequencies. """
noOfWords = 0
print("-" * 42)
print("| %20s | %15s |" % ("WORDS".center(20), "FREQUENCY".center(15)))
print("-" * 42)
for word in list(sorted(wordsDictionary.keys())):
noOfWords += 1
print("| %-20s | %15s |" % (word, str(wordsDictionary.get(word)).center(15)))
# Halt every 20 words (configurable)
if (noOfWords != 0 and noOfWords % 20 == 0):
print("\n" * 2)
input("PRESS ENTER TO CONTINUE ... ")
print("\n" * 5)
print("-" * 42)
print("| %20s | %15s |" % ("WORDS".center(20), "FREQUENCY".center(15)))
print("-" * 42)
print("-" * 42)
print("\n" * 2)
def prepareDictionary(words):
""" Prepare dictionary of words and count their occurences. """
wordsDictionary = {}
for word in words:
# Handle subsequent Occurences
if (wordsDictionary.get(word.lower(), None) != None):
# Search and add words by checking their lowercase version
wordsDictionary[word.lower()] = wordsDictionary.get(word.lower()) + 1
# Handle first Occurence
else:
wordsDictionary[word.lower()] = 1
return wordsDictionary
def main():
""" Main method """
print("\n" * 10)
print("Given a file name, program will find unique words and their occurences!", end="\n\n");
input("Press ENTER to start execution ... \n");
# To store all the words and their frequencies
wordsDictionary = {}
lines = ""
# Get valid input file
while (len(lines) == 0):
fileName = input("Enter the file name (RELATIVE ONLY and NOT ABSOLUTE): ")
print("\n\n" * 1)
lines = getLines(fileName)
# Get all words by removing all puntuations
words = stripPunctuation(lines).split()
# Prepare the words dictionary
wordsDictionary = prepareDictionary(words)
# Display words dictionary
display(wordsDictionary)
"""
Starting point
"""
main()