# Group 2 Dashboard Submission

* Vatsalya
* Swathi
* Thejaswi

In [None]:
from google.colab import auth
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
import pandas as pd

auth.authenticate_user()

In [None]:
!pip install gspread pandas

from google.colab import auth
from google.auth import default

auth.authenticate_user()
creds, _ = default()

In [None]:
import gspread

# Authenticate and access the spreadsheet
gc = gspread.authorize(creds)

spreadsheet_url = "https://docs.google.com/spreadsheets/d/1kOTpKF5z2QpvvhP-UsrBmVKqBfCxUrgRHlvPlCKxEos/edit?gid=1943948906#gid=1943948906"
spreadsheet = gc.open_by_url(spreadsheet_url)

# Selecting the first sheet
sheet = spreadsheet.sheet1

# Fetching data as a DataFrame
data = pd.DataFrame(sheet.get_all_records())

# Display the data
print(data.head())

In [None]:
import re
import nltk
from nltk.tokenize import sent_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
from collections import Counter

nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt_tab')  # This line downloads the necessary data

In [None]:
# Predefined stopwords
stop_words = set(stopwords.words('english'))

# Function to preprocess text
def preprocess_text(text):
    if isinstance(text, str):  # Check if the value is a string
        # Remove special characters
        text = re.sub(r'[^a-zA-Z0-9\s-]+', '', text)
        # Remove stopwords
        text = " ".join([word for word in text.split() if word.lower() not in stop_words])
        # Remove additional spaces
        text = re.sub(r'\s+', ' ', text).strip()
    return text

# Apply preprocessing to both columns
columns_to_process = ['data', 'conversation']
for column in columns_to_process:
    data[f'processed_{column}'] = data[column].apply(preprocess_text)

In [None]:
data = data.drop(columns=['data', 'conversation'])

In [None]:
!pip install scispacypip install scispacy
!pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.0/en_core_sci_lg-0.5.0.tar.gz

In [None]:
import spacy

# Load spaCy model (use SciSpaCy for biomedical text)
nlp = spacy.load("en_core_sci_lg")

# Combine data into one text
combined_text = " ".join(data)

# Process the text
doc = nlp(combined_text)

# Extract diseases
diseases = [ent.text for ent in doc.ents if ent.label_ == "DISEASE"]
print("Extracted Diseases:", set(diseases))

In [None]:
symptoms_keyword = [
    "cough", "fever", "pain", "fatigue", "shortness of breath", "allergy", "fungal infection", "sore throat", "numbness", "constipation", 'coughing of blood', "asthenia", "hip pain",
    "wheezing", "headache", "nausea", "vomiting", "depression", "common cold", "pelvic pain", "dizziness", "snoring", "chest pain", 'dyspnea', 'anxiety', "limb pain", "lack of appetite"
]

diseases_keyword = [
    "asthma", "diabetes", "cancer", "hypertension", "covid", "jaundice", "typhoid", "gastroesophageal reflux disease", "malaria", "cervical spondylosis" "diarrhea",
    "pneumonia", "flu", "tuberculosis", "allergy", "acne", "arthritis", "kidney disease", "peptic ulcer disease", "chicken pox", "migraine", "urinary tract infection", "varicose veins"
]

In [None]:
import pandas as pd

# Define keyword extraction function
def extract_keywords(text, keywords):
    found_keywords = [word for word in keywords if word in text.lower()]
    return found_keywords

# Apply the function to extract symptoms and diseases
data['Symptoms'] = data['processed_conversation'].apply(lambda x: extract_keywords(x, symptoms_keyword))
data['Diseases'] = data['processed_conversation'].apply(lambda x: extract_keywords(x, diseases_keyword))

In [None]:
from collections import Counter

# Count occurrences
symptom_counts = Counter([symptom for sublist in data['Symptoms'] for symptom in sublist])
disease_counts = Counter([disease for sublist in data['Diseases'] for disease in sublist])

In [None]:
data.head()

In [None]:
!pip install dash

In [None]:
!pip install vaderSentiment

In [None]:
import dash
from dash import dcc, html, Input, Output
import plotly.express as px
import pandas as pd
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import io
import base64
import numpy as np
from nltk.corpus import stopwords
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from collections import Counter
import seaborn as sns
import matplotlib.pyplot as plt

# Create a correlation matrix between diseases and symptoms
symptom_disease_corr = pd.crosstab(
    index=pd.Series([item for sublist in data['Symptoms'] for item in sublist]),
    columns=pd.Series([item for sublist in data['Diseases'] for item in sublist])
)

# Generate the heatmap as an image for the dashboard
def create_heatmap(data):
    plt.figure(figsize=(10, 8))
    sns.heatmap(data, annot=False, cmap="coolwarm", fmt='d')
    buffer = io.BytesIO()
    plt.title("Disease-Symptom Correlation Heatmap")
    plt.tight_layout()
    plt.savefig(buffer, format='png')
    plt.close()
    buffer.seek(0)
    return base64.b64encode(buffer.read()).decode('utf-8')

heatmap_image = create_heatmap(symptom_disease_corr)

# Sentiment Analysis
analyzer = SentimentIntensityAnalyzer()

def get_sentiment_scores(text):
    return analyzer.polarity_scores(text)

# Add sentiment scores to the data
data['Sentiment'] = data['processed_conversation'].apply(get_sentiment_scores)
data['Positive'] = data['Sentiment'].apply(lambda x: x['pos'])
data['Negative'] = data['Sentiment'].apply(lambda x: x['neg'])

# Aggregate sentiments
positive_sentiment = data['Positive'].sum()
negative_sentiment = data['Negative'].sum()

# Conversation Analysis
data['WordCount'] = data['processed_conversation'].apply(lambda x: len(x.split()))
conversation_stats = {
    'Average': data['WordCount'].mean(),
    'Median': data['WordCount'].median(),
    'Maximum': data['WordCount'].max()
}

# Total conversations, symptoms, and diseases
num_conversations = data.shape[0]
data['Symptoms'] = data['Symptoms'].apply(lambda x: x if isinstance(x, list) and len(x) > 0 and all(pd.notna(i) for i in x) else [])
data['Diseases'] = data['Diseases'].apply(lambda x: x if isinstance(x, list) and len(x) > 0 and all(pd.notna(i) for i in x) else [])

symptom_counts = pd.Series([item for sublist in data['Symptoms'] for item in sublist]).value_counts()
disease_counts = pd.Series([item for sublist in data['Diseases'] for item in sublist]).value_counts()

def assign_severity(row):
    mild_symptoms = ['snoring', 'common cold', 'dry cough', "nausea", "headache", "dizziness", "allergy"]
    moderate_symptoms = ['cough', 'flu', 'pain', 'fatigue', 'shortness of breath', "vomiting", "sore throat"]
    severe_symptoms = ['coughing of blood', 'chest pain', 'shortness of breath', 'fever']

    mild_count = sum([1 for symptom in mild_symptoms if symptom in row['Symptoms']])
    moderate_count = sum([1 for symptom in moderate_symptoms if symptom in row['Symptoms']])
    severe_count = sum([1 for symptom in severe_symptoms if symptom in row['Symptoms']])

    #print(f"Symptoms: {row['Symptoms']} | Mild: {mild_count} | Moderate: {moderate_count} | Severe: {severe_count}")

    if severe_count >= 2:
        return 'Severe'
    elif moderate_count >= 2:
        return 'Moderate'
    elif mild_count >= 2:
        return 'Mild'
    else:
        return 'Unknown'

# Normalize Symptoms column
#data['Symptoms'] = data['Symptoms'].str.lower().str.strip()

# Apply severity assignment
data['Severity'] = data.apply(assign_severity, axis=1)


# Severity Distribution Pie Chart
severity_counts = data['Severity'].value_counts()
severity_fig = px.pie(
    names=severity_counts.index,
    values=severity_counts.values,
    title="Disease Severity Distribution",
    template='plotly_dark',
    color_discrete_sequence=px.colors.sequential.RdBu
)

# --- Create additional columns if needed ---
data['Age'] = np.random.randint(1, 101, size=len(data))  # Random age
data['Gender'] = np.random.choice(['Male', 'Female'], size=len(data))  # Random gender
data['ConversationLength'] = data['processed_conversation'].apply(lambda x: len(x.split()))  # Length in words

# Assuming 'processed_conversation' column contains text conversations
word = Counter(' '.join(data['processed_conversation']).split()).most_common(20)
word_df = pd.DataFrame(word, columns=['word', 'Frequency'])

# Bar Chart for Most Frequent Words occurred with frequency
concerns_fig = px.bar(
    word_df,
    x='word',
    y='Frequency',
    title="Most Frequent Words occurred with frequency",
    template='plotly_dark',
    labels={'word': 'word', 'Frequency': 'Frequency'},
    color='Frequency'
)

# --- Helper functions ---
def create_wordcloud(data, background_color='black'):
    wordcloud = WordCloud(width=800, height=400, background_color=background_color).generate_from_frequencies(data)
    buffer = io.BytesIO()
    plt.figure(figsize=(10, 5))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis('off')
    plt.tight_layout()
    plt.savefig(buffer, format='png')
    plt.close()
    buffer.seek(0)
    return base64.b64encode(buffer.read()).decode('utf-8')

# Word cloud for frequent words in conversations
def generate_frequent_words(data, column, stopwords):
    all_words = ' '.join(data[column])
    words = [word for word in all_words.split() if word.lower() not in stopwords]
    return Counter(words).most_common(50)

# --- Generate visuals ---
symptom_counts = pd.Series([item for sublist in data['Symptoms'] for item in sublist]).value_counts()
disease_counts = pd.Series([item for sublist in data['Diseases'] for item in sublist]).value_counts()
frequent_words = generate_frequent_words(data, 'processed_conversation', stop_words)

symptom_wordcloud = create_wordcloud(symptom_counts.to_dict())
disease_wordcloud = create_wordcloud(disease_counts.to_dict())
wordcloud_conversation = create_wordcloud(dict(frequent_words))

# --- Initialize Dash app ---
app = dash.Dash(__name__)
app.title = "Doctor-Patient Dashboard"

# --- App Layout ---
app.layout = html.Div([
    html.H1("Doctor-Patient Dashboard", style={'text-align': 'center', 'color': 'white'}),

    # Overview Section
    html.Div([
        html.Div([
            html.H4("Total Conversations", style={'color': 'white'}),
            html.P(f"{len(data)}", style={"font-size": "24px", "font-weight": "bold", "color": "#FFDDC1"})
        ], className='card'),
        html.Div([
            html.H4("Unique Symptoms", style={'color': 'white'}),
            html.P(f"{len(symptom_counts)}", style={"font-size": "24px", "font-weight": "bold", "color": "#FFDDC1"})
        ], className='card'),
        html.Div([
            html.H4("Unique Diseases", style={'color': 'white'}),
            html.P(f"{len(disease_counts)}", style={"font-size": "24px", "font-weight": "bold", "color": "#FFDDC1"})
        ], className='card'),
    ], style={'display': 'flex', 'justify-content': 'space-around', 'margin-bottom': '20px'}),

    # Symptom Analysis
    html.Div([
        html.H2("Symptom Analysis", style={'color': 'white'}),
        dcc.Graph(
            figure=px.bar(
                x=symptom_counts.index,
                y=symptom_counts.values,
                labels={'x': 'Symptom', 'y': 'Frequency'},
                title="Most Frequently Mentioned Symptoms",
                template='plotly_dark'
            )
        ),
        html.H3("Symptom Word Cloud", style={'text-align': 'center', 'color': 'white', 'margin-top': '20px'}),
        html.Img(src=f'data:image/png;base64,{symptom_wordcloud}', style={'width': '100%', 'margin-top': '20px'})
    ]),

    # Disease Analysis
    html.Div([
        html.H2("Disease Analysis", style={'color': 'white'}),
        dcc.Graph(
            figure=px.bar(
                x=disease_counts.index,
                y=disease_counts.values,
                labels={'x': 'Disease', 'y': 'Frequency'},
                title="Most Frequently Mentioned Diseases",
                template='plotly_dark'
            )
        ),
        html.H3("Disease Word Cloud", style={'text-align': 'center', 'color': 'white', 'margin-top': '20px'}),
        html.Img(src=f'data:image/png;base64,{disease_wordcloud}', style={'width': '100%', 'margin-top': '20px'})
    ]),

    # Frequent Words in Conversations
    html.Div([
        html.H2("Frequent Words in Conversations", style={'color': 'white'}),
        html.Img(src=f'data:image/png;base64,{wordcloud_conversation}', style={'width': '100%', 'margin-top': '20px'}),
    ]),

    # Patient Demographics
    html.Div([
        html.H2("Patient Demographics", style={'color': 'white'}),
        dcc.Graph(
            figure=px.histogram(
                data,
                x='Age',
                color='Gender',
                barmode='group',
                title="Age and Gender Distribution",
                template='plotly_dark'
            )
        )
    ]),

    # Disease Severity Analysis
    html.Div([
        html.H2("Disease Severity Analysis", style={'color': 'white'}),
        # Placeholder: Update with severity metrics (e.g., disease_severity column)
        dcc.Graph(
            figure=px.pie(
                names=disease_counts.index,
                values=disease_counts.values,
                title="Disease Severity Distribution",
                template='plotly_dark'
            )
        )
    ]),

    # Disease-Symptom Correlation Heatmap
    html.Div([
        html.H2("Disease-Symptom Correlation Heatmap", style={'color': 'white'}),
        html.Img(src=f'data:image/png;base64,{heatmap_image}', style={'width': '100%', 'margin-top': '20px'}),
    ]),

    # Dropdown menu for severity filter
    html.Div([
        html.H2("Symptom Severity", style={'color': 'white'}),
        html.Label("Filter by Severity:", style={'color': 'white', 'font-weight': 'bold'}),
        dcc.Dropdown(
            id='severity-dropdown',
            options=[
                {'label': 'Mild', 'value': 'Mild'},
                {'label': 'Moderate', 'value': 'Moderate'},
                {'label': 'Severe', 'value': 'Severe'}
            ],
            placeholder="Select Severity",
            value='Mild',
            style={'width': '50%', 'margin': 'auto'}
        )
    ], style={'margin-bottom': '20px', 'text-align': 'center'}),

    # Graph for filtered severity
    html.Div([
        dcc.Graph(id='severity-bar-chart')
    ]),

    # Conversation Analysis
    html.Div([
        html.H2("Conversation Analysis", style={'color': 'white'}),
        html.Div([
            html.Div([
                html.H4("Average Words Per Conversation", style={'color': 'white'}),
                html.P(f"{conversation_stats['Average']:.2f}", style={"font-size": "24px", "font-weight": "bold", "color": "#FFDDC1"})
            ], className='card'),
            html.Div([
                html.H4("Median Words Per Conversation", style={'color': 'white'}),
                html.P(f"{conversation_stats['Median']:.0f}", style={"font-size": "24px", "font-weight": "bold", "color": "#FFDDC1"})
            ], className='card'),
            html.Div([
                html.H4("Maximum Words in a Conversation", style={'color': 'white'}),
                html.P(f"{conversation_stats['Maximum']:.0f}", style={"font-size": "24px", "font-weight": "bold", "color": "#FFDDC1"})
            ], className='card'),
        ], style={'display': 'flex', 'justify-content': 'space-around', 'margin-bottom': '20px'})
    ]),

    # Most Frequent word with Frequency
    html.Div([
        html.H2("Most Frequent word with Frequency", style={'color': 'white'}),
        dcc.Graph(figure=concerns_fig),
    ]),

    # Sentiment Analysis
    html.Div([
        html.H2("Sentiment Analysis", style={'color': 'white'}),
        dcc.Graph(
            figure=px.bar(
                x=['Positive Sentiments', 'Negative Sentiments'],
                y=[positive_sentiment, negative_sentiment],
                labels={'x': 'Sentiment Type', 'y': 'Score'},
                title="Sentiment Analysis Overview",
                template='plotly_dark',
                color_discrete_sequence=['#2ecc71', '#e74c3c']
            )
        ),
    ]),

    # Interactive Filters
    html.Div([
        html.H2("Interactive Filters", style={'color': 'white'}),
        html.Label("Filter by Age Range:", style={'color': 'white'}),
        dcc.RangeSlider(
            id='age-slider',
            min=1, max=100, step=1,
            marks={i: str(i) for i in range(1, 101, 10)},
            value=[1, 100]
        ),
        dcc.Graph(id='filtered-symptoms', style={'margin-top': '20px'}),
    ]),
], style={'background-color': '#1E1E1E', 'padding': '20px', 'font-family': 'Arial'})



# --- Callbacks for Interactivity ---
@app.callback(
    Output('filtered-symptoms', 'figure'),
    [Input('age-slider', 'value')]
)
def update_symptom_chart(age_range):
    filtered_data = data[(data['Age'] >= age_range[0]) & (data['Age'] <= age_range[1])]
    filtered_symptom_counts = pd.Series([item for sublist in filtered_data['Symptoms'] for item in sublist]).value_counts()
    fig = px.bar(
        x=filtered_symptom_counts.index,
        y=filtered_symptom_counts.values,
        labels={'x': 'Symptom', 'y': 'Frequency'},
        title="Filtered Symptom Frequencies",
        template='plotly_dark'
    )
    return fig

# Callback to update the bar chart
@app.callback(
    Output('severity-bar-chart', 'figure'),
    Input('severity-dropdown', 'value')
)
def update_bar_chart(selected_severity):
    # Filter the data based on severity
    filtered_data = data[data['Severity'] == selected_severity]

    if filtered_data.empty:
        # Handle empty data by returning a placeholder figure
        fig = px.bar(
            title=f"No Data for {selected_severity} Severity",
            template='plotly_dark'
        )
    else:
        # Flatten the symptoms into a single list and count their occurrences
        filtered_symptoms = pd.Series(
            [symptom for symptoms_list in filtered_data['Symptoms'] for symptom in symptoms_list]
        ).value_counts()

        # Create the bar chart
        fig = px.bar(
            x=filtered_symptoms.index,
            y=filtered_symptoms.values,
            labels={'x': 'Symptom', 'y': 'Frequency'},
            title=f"Symptom Frequency for {selected_severity} Severity",
            template='plotly_dark'
        )
    return fig

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True)