added neutral language analysis to scrape

madetech · Aug 31, 2023 · 4100161 · 4100161
1 parent 5f29ac8
commit 4100161
Show file tree

Hide file tree

Showing 6 changed files with 45 additions and 5 deletions.
diff --git a/backend/alembic_files/versions/06dcd68024ca_add_neutral_language_column_to_table.py b/backend/alembic_files/versions/06dcd68024ca_add_neutral_language_column_to_table.py
@@ -0,0 +1,28 @@
+"""add neutral language column to table
+
+Revision ID: 06dcd68024ca
+Revises: a363f59d034e
+Create Date: 2023-08-31 15:31:34.030264
+
+"""
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = '06dcd68024ca'
+down_revision = 'a363f59d034e'
+branch_labels = None
+depends_on = None
+
+
+def upgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.add_column('section', sa.Column('neutral_language_percent', sa.Float(), nullable=True))
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_column('section', 'neutral_language_percent')
+    # ### end Alembic commands ###
diff --git a/backend/data/report_writer.py b/backend/data/report_writer.py
@@ -64,7 +64,8 @@ def upsert_report_section(section: Section, report_id: int, session: Session):
         title=section.title,
         decision=section.decision,
         positive_language_percent=section.positive_language_percent,
-        constructive_language_percent = section.constructive_language_percent
+        constructive_language_percent = section.constructive_language_percent,
+        neutral_language_percent = section.neutral_language_percent
     )
 
     upsert_section_statement = insert(section_table).values(section_to_insert)

diff --git a/backend/models/section.py b/backend/models/section.py
@@ -21,3 +21,4 @@ class Section(Base):
     feedback: Mapped[List["Feedback"]] = relationship()
     positive_language_percent: Mapped[float] = mapped_column(nullable=True)
     constructive_language_percent: Mapped[float] = mapped_column(nullable=True)
+    neutral_language_percent: Mapped[float] = mapped_column(nullable=True)
diff --git a/backend/services/basic_info_scraper.py b/backend/services/basic_info_scraper.py
@@ -19,7 +19,7 @@
 def scrape_reports() -> list[Report]:
     LOGGER.info("Retrieving report links")
     report_links = get_report_links()
-    report_links = ["/service-standard-reports/get-security-clearance"]
+    # report_links = ["/service-standard-reports/get-security-clearance"]
     reports_models = []
     number_of_reports = len(report_links)
     LOGGER.info(f"Processing {number_of_reports} reports")
@@ -266,6 +266,7 @@ def create_report_model(report_dict: dict, url: str) -> Report:
                 section.title = report_section["title"]
             section.positive_language_percent = report_section["positive_feedback_percentage"]
             section.constructive_language_percent = report_section["negative_feedback_percentage"]
+            section.neutral_language_percent = report_section["neutral_feedback_percentage"]
 
 
             if "feedback" in report_section:

diff --git a/backend/services/section_info_scraper.py b/backend/services/section_info_scraper.py
@@ -47,6 +47,7 @@ def analyse_feedback(feedback_string):
     si_obj = SentimentIntensityAnalyzer()
     sentiment_dict = si_obj.polarity_scores(feedback_string)
     analysed_percentages.insert(0,sentiment_dict['neg']*100)
+    analysed_percentages.insert(0,sentiment_dict['neu']*100)
     analysed_percentages.insert(0,sentiment_dict['pos']*100)
 
     return analysed_percentages
@@ -86,6 +87,7 @@ def scrape_one(soup: BeautifulSoup, sections: list[dict]):
             feedback.extend(extract_feedback(section_decision, "what-the-team-has-done-well", FeedbackType.POSITIVE))
             feedback.extend(extract_feedback(section_decision, "what-the-team-needs-to-explore", FeedbackType.CONSTRUCTIVE))
             feedback_text = extract_text_from_feedback(feedback)
+            analyse_feedback(feedback_text)
 
 
             sections.append(dict(
@@ -94,7 +96,8 @@ def scrape_one(soup: BeautifulSoup, sections: list[dict]):
                 title = section_element.text.strip(),
                 feedback = feedback,
                 positive_feedback_percentage = analyse_feedback(feedback_text)[0],
-                negative_feedback_percentage = analyse_feedback(feedback_text)[1]
+                neutral_feedback_percentage = analyse_feedback(feedback_text)[1],
+                negative_feedback_percentage = analyse_feedback(feedback_text)[2]
             ))
             break
 

diff --git a/backend/tests/test_fixtures/section_fixtures.py b/backend/tests/test_fixtures/section_fixtures.py
@@ -6,14 +6,20 @@
         report_id=1,
         number=1,
         decision="Met",
-        feedback=[]
+        feedback=[],
+        positive_language_percent = 10,
+        constructive_language_percent = 8,
+        neutral_language_percent = 4,
 
     ),
     Section(
         id=1,
         report_id=1,
         number=2,
         decision="Not met",
-        feedback=[]
+        feedback=[],
+        positive_language_percent = 14,
+        constructive_language_percent = 3,
+        neutral_language_percent = 56,
     )
 ]