In [2]:
import time
import numpy as np
import pandas as pd
import pycountry as pc

from datetime import datetime, timedelta


import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go


today = datetime.today()
day   = today.day if today.day > 9 else '0' + str(today.day)
month = today.month if today.month > 9 else '0' + str(today.month)
today_str = '{}/{}/{}'.format(day, month, today.year)

# Fetch data
monkeypox_df = pd.read_csv("https://raw.githubusercontent.com/globaldothealth/monkeypox/main/latest.csv")

monkeypox_df['Gender'] = monkeypox_df['Gender'].str.strip().str.capitalize()
monkeypox_df['Confirmation_method'] = monkeypox_df['Confirmation_method'].str.replace('"','')

monkeypox_df.to_csv('monkeypox_df.csv', index = False)

  monkeypox_df = pd.read_csv("https://raw.githubusercontent.com/globaldothealth/monkeypox/main/latest.csv")


In [3]:
### ALL NESTED FUNCTIONS INTEGRATED FOR UMBRELLA TERM SYMPTOM SIMPLIFICATION
# Function to check if simple term of lesion can be returned for a symptom
def lesion_class(item, dictionary, count, status):
    if 'lesion' in item.lower() or 'ulcer' in item.lower() or 'scab' in item.lower():
        status = True
        dictionary['lesion'] += count
    return dictionary, status
# Function to check if simple term of rash can be returned for a symptom
def rash_class(item, dictionary, count, status):
    if 'rash' in item.lower() or 'spots on skin' in item.lower():
        status = True
        dictionary['rash'] += count
    return dictionary, status
# Function to check if simple term of fever can be returned for a symptom
def fever_class(item, dictionary, count, status):
    if 'fever' in item.lower():
        status = True
        dictionary['fever'] += count
    return dictionary, status
# Function to check if simple term of blister can be returned for a symptom
def blister_class(item, dictionary, count, status):
    if 'blister' in item.lower() or 'vesicle' in item.lower():
        status = True
        dictionary['blister'] += count
    return dictionary, status
# Function to check if simple term of headache can be returned for a symptom
def headache_class(item, dictionary, count, status):
    if 'headache' in item.lower():
        status = True
        dictionary['headache'] += count 
    return dictionary, status
# Function to check if simple term of papule can be returned for a symptom
def papule_class(item, dictionary, count, status):
    if 'papule' in item.lower():
        status = True
        dictionary['papule'] += count
    return dictionary, status
# Function to check if simple term of pustule can be returned for a symptom
def pustule_class(item, dictionary, count, status):
    if 'pustule' in item.lower() or 'postule' in item.lower():
        status = True
        dictionary['pustule'] += count
    return dictionary, status
# Function to check if simple term of muscle ache can be returned for a symptom
def muscle_ache_class(item, dictionary, count, status):
    if 'myalgia' in item.lower() or 'muscle' in item.lower() or (('back' in item.lower() or 'joint' in item.lower() or 'body' in item.lower()) and 'pain' in item.lower()):
        status = True
        dictionary['muscle ache'] += count
    return dictionary, status
# Function to check if simple term of fatigue can be returned for a symptom
def fatigue_class(item, dictionary, count, status):
    if 'fatigue' in item.lower() or 'malaise' in item.lower():
        status = True
        dictionary['fatigue'] += count
    return dictionary, status
# Function to check if simple term of swollen lymph node can be returned for a symptom
def lymph_class(item, dictionary, count, status):
    if (('swollen' in item.lower() or 'swell' in item.lower()) and 'lymph' in item.lower()) or 'lymphadenopathy' in item.lower() or 'adenomegaly' in item.lower() or 'inguinal adenopathy' in item.lower() or ('enlarge' in item.lower() and 'lymph' in item.lower()):
        status = True
        dictionary['swollen lymph node'] += count
    return dictionary, status
# Function to check if characteristic symptoms adds to all items in the dictionary
def mp_class(item, dictionary, count, status):
    if 'symptoms' in item.lower() and 'monkeypox' in item.lower():
        status = True
        dictionary['lesion'] += count
        dictionary['rash'] += count
        dictionary['fever'] += count
        dictionary['blister'] += count
        dictionary['headache'] += count 
        dictionary['papule'] += count
        dictionary['pustule'] += count
        dictionary['muscle ache'] += count
        dictionary['fatigue'] += count
        dictionary['swollen lymph node'] += count
    return dictionary, status

In [4]:
def symp2dict(df):
    # Generates a dictionary with umbrella symptoms found via data exploration of raw data
    symp_dict = {'lesion': 0, 'rash': 0, 'fever':0, 'blister': 0, 'headache':0, 'papule':0, 'pustule':0, 'muscle ache':0, 'fatigue':0,'swollen lymph node':0}
    for indx in range(0, df.shape[0]):
        row = df.loc[indx, 'Symptoms'].split(',')
        case = df.loc[indx, 'case']
        for symp in row:
            status = False
            # Categorises symptoms based off common umbrella symptoms
            symp_dict, status = lesion_class(symp, symp_dict, case, status)
            symp_dict, status = rash_class(symp, symp_dict, case, status)
            symp_dict, status = fever_class(symp, symp_dict, case, status)
            symp_dict, status = blister_class(symp, symp_dict, case, status)
            symp_dict, status = headache_class(symp, symp_dict, case, status)
            symp_dict, status = papule_class(symp, symp_dict, case, status)
            symp_dict, status = pustule_class(symp, symp_dict, case, status)
            symp_dict, status = muscle_ache_class(symp, symp_dict, case, status)
            symp_dict, status = fatigue_class(symp, symp_dict, case, status)
            symp_dict, status = lymph_class(symp, symp_dict, case, status)
            symp_dict, status = mp_class(symp, symp_dict, case, status)
            # Adds any cases which are not classified under umbrella symptoms as their own case
            if status == False and symp.strip().lower() not in symp_dict.keys():
                symp_dict[symp.strip().lower()] = case
            elif status == False:
                symp_dict[symp.strip().lower()] += case
    return symp_dict
    

In [7]:
# Finds symptoms experienced globally
mp_symptoms = pd.DataFrame({'case' : monkeypox_df.groupby(['Symptoms']).size()}).reset_index()
# Seperates each symptom with a ','
mp_symptoms['Symptoms'] = mp_symptoms['Symptoms'].str.lower()
# Total number of cases with symptoms
total_symptom_case = sum(mp_symptoms['case'])
# gets total number of cases for each symptom
symp_by_case = symp2dict(mp_symptoms)
del symp_by_case['hands']
del symp_by_case['and chest']
symp_df = pd.DataFrame(symp_by_case.items(), columns=['Symptoms', 'Cases'])
symp_df['percentage'] = symp_df['Cases']/total_symptom_case*100
symp_df.sort_values('percentage')
symp_df.to_csv('cleaned_symptoms.csv')

In [8]:
symp_df.sort_values('percentage')

Unnamed: 0,Symptoms,Cases,percentage
14,pain urinating,1,0.492611
27,severe anemia,1,0.492611
25,mild symptoms,1,0.492611
24,general weakness,1,0.492611
23,outbreak on the skin,1,0.492611
22,asthenia,1,0.492611
21,general discomfort,1,0.492611
20,difficulty breathing,1,0.492611
16,swelling,1,0.492611
15,slight swallowing difficulties and an elevated...,1,0.492611
