Skip to content

Commit

Permalink
applies fix for issue39 also to action analysis widget. corrects scip…
Browse files Browse the repository at this point in the history
…y duplication in setup.cfg
  • Loading branch information
kodymoodley committed Apr 18, 2024
1 parent d7015b9 commit e757e5e
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 16 deletions.
48 changes: 34 additions & 14 deletions orangecontrib/storynavigation/widgets/OWSNActionAnalysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
import storynavigation.modules.constants as constants
import storynavigation.modules.util as util
import storynavigation.modules.error_handling as error_handling

from thefuzz import fuzz

HTML = """
<!doctype html>
Expand Down Expand Up @@ -702,9 +702,36 @@ def list_docs(self):
docs = self.regenerate_docs()
self.doc_list_model.setup_data(self.stories.titles.tolist(), docs)

def get_el_story_text(self, df):
return ' '.join(df['sentence'].unique().tolist()) # Concatenate all unique sentences in a dataframe column into a single story text

def fuzzy_match_text(self, text1, text2):
return fuzz.ratio(text1, text2) # Fuzzy string matching of two story texts

def find_matching_story_in_story_elements(self, c_index, story_text):
for storyid, story_df in self.story_elements_dict.items(): # Loop through dataframes for each story (subset of rows of the Elements table)
el_story_text = self.get_el_story_text(story_df) # Concatenate the sentences in the current dataframe into a single story string
score = self.fuzzy_match_text(el_story_text, story_text) # Check if the current story text is the same as the selected story text
if score >= 90:
return int(storyid) # If the stories match, return the Elements storyid (the correct story id)
return c_index # Otherwise, return the default storyid given by the doclist model

def get_selected_indexes(self) -> Set[int]:
m = self.doc_list.model().mapToSource
return {m(i).row() for i in self.doc_list.selectionModel().selectedRows()}
result = set()
for i in self.doc_list.selectionModel().selectedRows(): # Each i represents a new selected story
c_index = m(i).row() # Get the currently selected story i index (int)
obj = self.regenerate_docs()[c_index] # get the story object at c_index location in the doc_list model, obj (str) : has the structure 'filename path/to/filename.ext story-text'
story_text = ' '.join(obj.split()[2:]) # Only select the story text itself from obj (third component)
sentences = util.preprocess_text(story_text) # Preprocess story i text to match similar output sentences to Elements table (sentences)
sen_fullstop = [sen+'.' for sen in sentences] # Add a fullstop after each sentence
proc_story_text = ' '.join(sen_fullstop) # Concatenate sentences together to create a story string
correct_story_id = self.find_matching_story_in_story_elements(c_index, proc_story_text) # Find the matching story in Elements table for story i
result.add(correct_story_id) # Add the correct story_id to the selected documents index
return result
# def get_selected_indexes(self) -> Set[int]:
# m = self.doc_list.model().mapToSource
# return {m(i).row() for i in self.doc_list.selectionModel().selectedRows()}

def set_selection(self) -> None:
"""
Expand Down Expand Up @@ -814,14 +841,6 @@ def show_docs(self):
self.custom,
self.story_elements_dict[str(c_index)]
)
# else:
# value = self.actiontagger.postag_text(
# value,
# self.past_vbz,
# self.present_vbz,
# self.custom,
# None
# )

if feature in self.search_features and (len(self.regexp_filter) > 0):
value = self.__mark_text(self.original_text)
Expand All @@ -835,10 +854,11 @@ def show_docs(self):
value = os.path.join(feature.attributes.get("origin", ""), value)
value = '<img src="{}"></img>'.format(value)

text += (
f'<tr><td class="variables"><strong>{feature.name}:</strong></td>'
f'<td class="content">{value}</td></tr>'
)
if feature.name.lower() == "content" or feature.name.lower() == "text":
text += (
# f'<tr><td class="variables"><strong>{feature.name}:</strong></td>'
f'<td class="content">{value}</td></tr>'
)

parts.append(text)

Expand Down
1 change: 0 additions & 1 deletion orangecontrib/storynavigation/widgets/OWSNActorAnalysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@
import storynavigation.modules.error_handling as error_handling

from thefuzz import fuzz
from thefuzz import process

HTML = """
<!doctype html>
Expand Down
1 change: 0 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ install_requires =
scipy == 1.12.0
dhtmlparser3 >= 3.0.17
nltk >= 3.8.1
scipy == 1.12.0
textblob >= 0.17.1
textblob-nl >= 0.0.1
pydot >= 1.4.2
Expand Down

0 comments on commit e757e5e

Please sign in to comment.