Skip to content

Commit

Permalink
added neutral language analysis to scrape
Browse files Browse the repository at this point in the history
  • Loading branch information
samfallowfield committed Aug 31, 2023
1 parent 5f29ac8 commit 4100161
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 5 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
"""add neutral language column to table
Revision ID: 06dcd68024ca
Revises: a363f59d034e
Create Date: 2023-08-31 15:31:34.030264
"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = '06dcd68024ca'
down_revision = 'a363f59d034e'
branch_labels = None
depends_on = None


def upgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.add_column('section', sa.Column('neutral_language_percent', sa.Float(), nullable=True))

Check failure on line 21 in backend/alembic_files/versions/06dcd68024ca_add_neutral_language_column_to_table.py

View workflow job for this annotation

GitHub Actions / build (3.8)

Ruff (E501)

backend/alembic_files/versions/06dcd68024ca_add_neutral_language_column_to_table.py:21:89: E501 Line too long (94 > 88 characters)

Check failure on line 21 in backend/alembic_files/versions/06dcd68024ca_add_neutral_language_column_to_table.py

View workflow job for this annotation

GitHub Actions / build (3.9)

Ruff (E501)

backend/alembic_files/versions/06dcd68024ca_add_neutral_language_column_to_table.py:21:89: E501 Line too long (94 > 88 characters)

Check failure on line 21 in backend/alembic_files/versions/06dcd68024ca_add_neutral_language_column_to_table.py

View workflow job for this annotation

GitHub Actions / build (3.10)

Ruff (E501)

backend/alembic_files/versions/06dcd68024ca_add_neutral_language_column_to_table.py:21:89: E501 Line too long (94 > 88 characters)

Check failure on line 21 in backend/alembic_files/versions/06dcd68024ca_add_neutral_language_column_to_table.py

View workflow job for this annotation

GitHub Actions / build (3.11)

Ruff (E501)

backend/alembic_files/versions/06dcd68024ca_add_neutral_language_column_to_table.py:21:89: E501 Line too long (94 > 88 characters)
# ### end Alembic commands ###


def downgrade() -> None:
# ### commands auto generated by Alembic - please adjust! ###
op.drop_column('section', 'neutral_language_percent')
# ### end Alembic commands ###
3 changes: 2 additions & 1 deletion backend/data/report_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,8 @@ def upsert_report_section(section: Section, report_id: int, session: Session):
title=section.title,
decision=section.decision,
positive_language_percent=section.positive_language_percent,
constructive_language_percent = section.constructive_language_percent
constructive_language_percent = section.constructive_language_percent,
neutral_language_percent = section.neutral_language_percent
)

upsert_section_statement = insert(section_table).values(section_to_insert)
Expand Down
1 change: 1 addition & 0 deletions backend/models/section.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ class Section(Base):
feedback: Mapped[List["Feedback"]] = relationship()
positive_language_percent: Mapped[float] = mapped_column(nullable=True)
constructive_language_percent: Mapped[float] = mapped_column(nullable=True)
neutral_language_percent: Mapped[float] = mapped_column(nullable=True)
3 changes: 2 additions & 1 deletion backend/services/basic_info_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
def scrape_reports() -> list[Report]:
LOGGER.info("Retrieving report links")
report_links = get_report_links()
report_links = ["/service-standard-reports/get-security-clearance"]
# report_links = ["/service-standard-reports/get-security-clearance"]
reports_models = []
number_of_reports = len(report_links)
LOGGER.info(f"Processing {number_of_reports} reports")
Expand Down Expand Up @@ -266,6 +266,7 @@ def create_report_model(report_dict: dict, url: str) -> Report:
section.title = report_section["title"]
section.positive_language_percent = report_section["positive_feedback_percentage"]
section.constructive_language_percent = report_section["negative_feedback_percentage"]
section.neutral_language_percent = report_section["neutral_feedback_percentage"]


if "feedback" in report_section:
Expand Down
5 changes: 4 additions & 1 deletion backend/services/section_info_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def analyse_feedback(feedback_string):
si_obj = SentimentIntensityAnalyzer()
sentiment_dict = si_obj.polarity_scores(feedback_string)
analysed_percentages.insert(0,sentiment_dict['neg']*100)
analysed_percentages.insert(0,sentiment_dict['neu']*100)
analysed_percentages.insert(0,sentiment_dict['pos']*100)

return analysed_percentages
Expand Down Expand Up @@ -86,6 +87,7 @@ def scrape_one(soup: BeautifulSoup, sections: list[dict]):
feedback.extend(extract_feedback(section_decision, "what-the-team-has-done-well", FeedbackType.POSITIVE))
feedback.extend(extract_feedback(section_decision, "what-the-team-needs-to-explore", FeedbackType.CONSTRUCTIVE))
feedback_text = extract_text_from_feedback(feedback)
analyse_feedback(feedback_text)


sections.append(dict(
Expand All @@ -94,7 +96,8 @@ def scrape_one(soup: BeautifulSoup, sections: list[dict]):
title = section_element.text.strip(),
feedback = feedback,
positive_feedback_percentage = analyse_feedback(feedback_text)[0],
negative_feedback_percentage = analyse_feedback(feedback_text)[1]
neutral_feedback_percentage = analyse_feedback(feedback_text)[1],
negative_feedback_percentage = analyse_feedback(feedback_text)[2]
))
break

Expand Down
10 changes: 8 additions & 2 deletions backend/tests/test_fixtures/section_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,20 @@
report_id=1,
number=1,
decision="Met",
feedback=[]
feedback=[],
positive_language_percent = 10,
constructive_language_percent = 8,
neutral_language_percent = 4,

),
Section(
id=1,
report_id=1,
number=2,
decision="Not met",
feedback=[]
feedback=[],
positive_language_percent = 14,
constructive_language_percent = 3,
neutral_language_percent = 56,
)
]

0 comments on commit 4100161

Please sign in to comment.