Skip to content

Commit

Permalink
removing unneeded code/comments
Browse files Browse the repository at this point in the history
  • Loading branch information
shawnmjones committed Jul 13, 2020
1 parent 005e3ac commit bf68c78
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 136 deletions.
2 changes: 1 addition & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
# The short X.Y version
version = u''
# The full version, including alpha/beta/rc tags
release = u'0.2020.06.24.032337'
release = u'0.2020.07.13.224110'

# -- General configuration ---------------------------------------------------

Expand Down
180 changes: 46 additions & 134 deletions hypercane/report/sumgrams.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,126 +40,38 @@ def generate_sumgrams(urimlist, cache_storage, added_stopwords=[]):
module_logger.exception("URI-M [{}] generated an exception [{}], skipping...".format(urim, repr(exc)))
hypercane.errors.errorstore.add( urim, traceback.format_exc() )

# now = datetime.now()
# current_year = now.year
# last_year = current_year - 1
# current_date = now.day

# sumgram processes stop words at two levels:
# 1. when the vocabulary is built
# 2. stopwords are applied when finding sumgrams
# start with single terms before moving on to bigrams, etc.

# TODO: load these from a file
# added_stopwords = [
# "associated press",
# "com",
# "donald trump",
# "fox news",
# "abc news",
# "getty images",
# "last month",
# "last week",
# "last year",
# "pic",
# "pinterest reddit",
# "pm et",
# "president donald",
# "president donald trump",
# "president trump",
# "president trump's",
# "print mail",
# "reddit print",
# "said statement",
# "send whatsapp",
# "sign up",
# "trump administration",
# "trump said",
# "twitter",
# "united states",
# "washington post",
# "white house",
# "whatsapp pinterest",
# "subscribe whatsapp",
# "york times",
# "privacy policy",
# "terms use"
# ]

# added_stopwords.append( "{} read".format(last_year) )
# added_stopwords.append( "{} read".format(current_year) )

# stopmonths = [
# "january",
# "february",
# "march",
# "april",
# "may",
# "june",
# "july",
# "august",
# "september",
# "october",
# "november",
# "december"
# ]

# # add just the month to the stop words
# added_stopwords.extend(stopmonths)

# stopmonths_short = [
# "jan",
# "feb",
# "mar",
# "apr",
# "may",
# "jun",
# "jul",
# "aug",
# "sep",
# "oct",
# "nov",
# "dec"
# ]

# added_stopwords.extend(stopmonths_short)

# # add the day of the week, too
# added_stopwords.extend([
# "monday",
# "tuesday",
# "wednesday",
# "thursday",
# "friday",
# "saturday",
# "sunday"
# ])

# added_stopwords.extend([
# "mon",
# "tue",
# "wed",
# "thu",
# "fri",
# "sat",
# "sun"
# ])

# # for i in range(1, 13):
# # added_stopwords.append(
# # datetime(current_year, i, current_date).strftime('%b %Y')
# # )
# # added_stopwords.append(
# # datetime(last_year, i, current_date).strftime('%b %Y')
# # )

# # for i in range(1, 13):
# # added_stopwords.append(
# # datetime(current_year, i, current_date).strftime('%B %Y')
# # )
# # added_stopwords.append(
# # datetime(last_year, i, current_date).strftime('%B %Y')
# # )
now = datetime.now()
current_year = now.year

stopmonths = [
"january",
"february",
"march",
"april",
"may",
"june",
"july",
"august",
"september",
"october",
"november",
"december"
]

stopmonths_short = [
"jan",
"feb",
"mar",
"apr",
"may",
"jun",
"jul",
"aug",
"sep",
"oct",
"nov",
"dec"
]

params = {
"add_stopwords": ", ".join(added_stopwords),
Expand All @@ -184,20 +96,20 @@ def generate_sumgrams(urimlist, cache_storage, added_stopwords=[]):

addsumgram = True

# # workaround for sumgram expanding dates
# for stopmonth in stopmonths:
# module_logger.info("checking if long stopmonth {} in {}".format(stopmonth, ngram))
# if stopmonth in ngram and str(current_year) in ngram:
# module_logger.info("detected {} and {} in {}".format(stopmonth, current_year, ngram))
# addsumgram = False
# break

# for stopmonth in stopmonths_short:
# module_logger.info("checking if short stopmonth {} in {}".format(stopmonth, ngram))
# if stopmonth in ngram and str(current_year) in ngram:
# module_logger.info("detected {} and {} in {}".format(stopmonth, current_year, ngram))
# addsumgram = False
# break
# workaround for sumgram expanding dates
for stopmonth in stopmonths:
module_logger.info("checking if long stopmonth {} in {}".format(stopmonth, ngram))
if stopmonth in ngram and str(current_year) in ngram:
module_logger.info("detected {} and {} in {}".format(stopmonth, current_year, ngram))
addsumgram = False
break

for stopmonth in stopmonths_short:
module_logger.info("checking if short stopmonth {} in {}".format(stopmonth, ngram))
if stopmonth in ngram and str(current_year) in ngram:
module_logger.info("detected {} and {} in {}".format(stopmonth, current_year, ngram))
addsumgram = False
break

if addsumgram == True:
sf.append(
Expand Down
2 changes: 1 addition & 1 deletion hypercane/version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
__appname__ = "hypercane"
__appversion__ = '0.2020.06.24.032337'
__appversion__ = '0.2020.07.13.224110'
__useragent__ = "{}/{}".format(__appname__, __appversion__)

0 comments on commit bf68c78

Please sign in to comment.