<a href="https://colab.research.google.com/github/monkcypher/super-translate/blob/main/NLP(sentiment%20analysis%20on%20health%20data).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [27]:
import pandas as pd
import spacy
from textblob import TextBlob



In [28]:

class HealthRecordAnalyser:
    def __init__(self):
        # Load the English language model from spaCy
        # This is a small model that's quick to load but still effective
        self.nlp = spacy.load('en_core_web_sm')

    def analyse_text(self, text):
        # Process the medical text using spaCy
        doc = self.nlp(text)

        # Extract named entities (e.g., conditions, medicines, measurements)
        entities = [f"{ent.text} ({ent.label_})" for ent in doc.ents]

        # Calculate sentiment score (-1 to 1)
        sentiment = TextBlob(text).sentiment.polarity

        # Find numerical values in the text
        numbers = [token.text for token in doc if token.like_num]

        return {
            'entities': entities,
            'sentiment': sentiment,
            'numbers': numbers
        }

def main():
    # Attempt to load the NHS records from CSV
    try:
        df = pd.read_csv('/health_data.csv')
        print(f"Successfully loaded {len(df)} NHS records\n")
    except FileNotFoundError:
        print("Error: Cannot find 'health_data.csv' in the current directory")
        return

    # Create our analysis tool
    analyser = HealthRecordAnalyser()

    # Process each medical record
    for index, row in df.iterrows():
        print(f"Record {index + 1}:")
        print(f"Date: {row['date']}")
        print(f"Type: {row['record_type']}")

        # Analyse the medical text
        analysis = analyser.analyse_text(row['text'])

        # Display results in a clear format
        print("\nAnalysis Results:")
        print("Entities Found:", ", ".join(analysis['entities']) if analysis['entities'] else "None")
        print("Sentiment Score:", round(analysis['sentiment'], 2),
              "(positive > 0, negative < 0, neutral = 0)")
        if analysis['numbers']:
            print("Numerical Values:", ", ".join(analysis['numbers']))
        print("-" * 50)

if __name__ == "__main__":
    main()


Successfully loaded 10 NHS records

Record 1:
Date: 2024-01-15
Type: GP Note

Analysis Results:
Entities Found: 2 (CARDINAL), Metformin 500 (PERSON), daily (DATE)
Sentiment Score: 0.0 (positive > 0, negative < 0, neutral = 0)
Numerical Values: 2, 500
--------------------------------------------------
Record 2:
Date: 2024-01-30
Type: Follow-up

Analysis Results:
Entities Found: None
Sentiment Score: 0.32 (positive > 0, negative < 0, neutral = 0)
--------------------------------------------------
Record 3:
Date: 2024-02-01
Type: Emergency

Analysis Results:
Entities Found: ECG (ORG), 75mg (TIME)
Sentiment Score: 0.07 (positive > 0, negative < 0, neutral = 0)
Numerical Values: 75
--------------------------------------------------
Record 4:
Date: 2024-02-15
Type: GP Note

Analysis Results:
Entities Found: 140/90 (DATE)
Sentiment Score: 0.0 (positive > 0, negative < 0, neutral = 0)
Numerical Values: 140/90
--------------------------------------------------
Record 5:
Date: 2024-03-01
Type: F