What health features most closely correspond with alzheimers

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

from pathlib import Path
from pyspark.shell import spark
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

pd.set_option('display.max_columns', None)

In [None]:
#Assign path to variable
alzheimer_data_path = 'Data/alzheimers_disease_data.csv'

In [None]:
#Create temp view to be able to easily query from DataFrame
spark_test_df = spark.read.csv(alzheimer_data_path, header = 'True', inferSchema=True)
data = spark_test_df.toPandas()

In [None]:
# Drop the Doctor in Charge, As it is confiential data only 'XXXX' is a value
data.drop(['DoctorInCharge'],axis=1, inplace=True)

In [None]:
# Dictonaries of replacement data for Medical History
medical_history_dicts = {
    'Gender'                    : {0:'Male', 1:'Female'},
    'Ethnicity'                 : {0:'Caucasian', 1:'African American', 2:'Asian', 3:'Other'},
    'EducationLevel'            : {0:'None', 1:'High School', 2:"Bachelor's", 3: 'Higher'},
    'Smoking'                   : {0:'Non-Smoker', 1:'Smoker'},
    'FamilyHistoryAlzheimers'   : {0:'No Alzheimers in Family',1:'Alzheimers in Family'},
    'CardiovascularDisease'     : {0:'No Cardiovascular', 1:'Cardiovascular'},
    'Diabetes'                  : {0:'No Diabetes', 1:'Diabetes'},
    'Depression'                : {0:'No Depression', 1:'Depression'},
    'HeadInjury'                : {0:'No History of Head Injury', 1: 'History of Head Injury'},
    'Hypertension'              : {0:'Non-hypertensive', 1: 'Hypertensive'} 
}

# Replaceing of Data for Medical History
for k, v in medical_history_dicts.items():
    data.replace({k : v}, inplace=True)
data.head()

In [None]:
# Dictonaries of replacement data for Cognitive and Functional Assessments

MemoryComplaints_Dict = {0:'No Memory Issues', 1:'Memory Complains'}
BehavioralProblems_Dict = {0:'No Behavioral Issues', 1:'Behavioral Issues'}

In [None]:
# Replaceing of Data for Cognitive and Functional Assessments
data['MemoryComplaints'].replace(to_replace=MemoryComplaints_Dict, inplace=True)
data['BehavioralProblems'].replace(to_replace=BehavioralProblems_Dict, inplace=True)

data.head()

In [None]:
# Dictonaries of replacement data for Symptoms

Confusion_Dict = {0: 'Not Confused', 1: 'Confused'}
Disorientation_Dict = {0: 'Not Disoriented', 1: 'Disoriented'}
PersonalityChanges_Dict = {0: 'No Personality Changes', 1: 'Personality Changes'}
DifficultyCompletingTasks_Dict = {0: 'No Difficulty', 1: 'Difficulty'}
Forgetfulness_Dict = {0: 'Not forgetful', 1: 'Forgetful'}

In [None]:
# Replaceing of Data for Symptoms
data['Confusion'].replace(to_replace=Confusion_Dict, inplace=True)
data['Disorientation'].replace(to_replace=Disorientation_Dict, inplace=True)
data['PersonalityChanges'].replace(to_replace=PersonalityChanges_Dict, inplace=True)
data['DifficultyCompletingTasks'].replace(to_replace=DifficultyCompletingTasks_Dict, inplace=True)
data['Forgetfulness'].replace(to_replace=Forgetfulness_Dict, inplace=True)

data.head()

In [None]:
Diagnosis_Dict = {0: "No Alzheimer's Disease", 1: "Alzheimer's Disease"}

data['Diagnosis'].replace(to_replace=Diagnosis_Dict, inplace=True)

data.head()

In [None]:
data['BMI'] = data['BMI'].astype('int')
values, bins, bars = plt.hist(data['Diagnosis'], bins = 2, rwidth= .5, color= 'purple')
plt.title('Patients in the Study')
plt.xticks(ticks= [.25, .75], labels=['No Alzheimer''s Disease','Alzheimer''s Disease'])
plt.xlabel('Diagnosis')
plt.ylabel('Number of Patients')
plt.bar_label(bars)
plt.show()

In [None]:
bins_BMI = data['BMI'].max() - data['BMI'].min()+1
data['BMI'].hist(bins=bins_BMI, histtype = 'bar', rwidth = .5, color = 'yellowgreen')
plt.xlabel('BMI(Whole Numbers)')
plt.ylabel('Number of Participents')
plt.title('Participent BMI')
plt.show()

In [None]:
bins_Age = data['Age'].astype('int').max() - data['Age'].astype('int').min()+1
data['Age'].hist(bins=bins_Age, rwidth = .5, color = 'darkorange')
plt.xlabel('Age')
plt.ylabel('Number of Participents')
plt.title('Age of Participents')
plt.show()

In [None]:
data.groupby('Gender')['PatientID'].count().plot.pie(autopct='%1.1f%%', title = 'Study Paticipents by Gender',ylabel = "")

In [None]:
data.groupby('Ethnicity')['PatientID'].count().plot.pie(autopct='%1.1f%%', title = 'Study Paticipents by Ethnicity',ylabel = "")

In [None]:
data.groupby('HeadInjury')['PatientID'].count().plot.pie(autopct='%1.1f%%', title = 'Study Paticipents by History',ylabel = "")

In [None]:
data.groupby('FamilyHistoryAlzheimers')['PatientID'].count().plot.pie(autopct='%1.1f%%', title = 'Study Paticipents by History',ylabel = "")

In [None]:
data.groupby(['Diagnosis','PersonalityChanges'])['PatientID'].count().plot.pie(autopct='%1.1f%%', title = 'Study Paticipents by Diagnosis',ylabel = "")

In [None]:
data.groupby('Diagnosis')['PatientID'].count().plot.pie(autopct='%1.1f%%', title = 'Study Paticipents by Diagnosis',ylabel = "")

In [None]:
data.plot(kind='scatter', x= 'DietQuality', y='BMI')

plt.show()