In [1]:
# Import the dependencies
import os
import pandas as pd
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# Import data files
heart_attack = pd.read_csv(Path('./Resources/heart_attacks.csv'))
heart_disease = pd.read_csv(Path('./Resources/hd_by_state.csv'))
fast_food = pd.read_csv(Path('./Resources/FastFoodRestaurants.csv'))

In [3]:
# Find Heart Attack Data Types
heart_attack.dtypes

age           int64
sex           int64
cp            int64
trtbps        int64
chol          int64
fbs           int64
restecg       int64
thalachh      int64
exng          int64
oldpeak     float64
slp           int64
caa           int64
thall         int64
output        int64
dtype: object

In [4]:
# Find Heart Attack Column Names
heart_attack.columns

Index(['age', 'sex', 'cp', 'trtbps', 'chol', 'fbs', 'restecg', 'thalachh',
       'exng', 'oldpeak', 'slp', 'caa', 'thall', 'output'],
      dtype='object')

In [5]:
# Rename Heart Attack Columns
heart_attack.rename(columns={'age' : 'Age', 'sex' : 'Sex', 'cp' : 'Chest_Pain_Type', 'trtbps' : 'Resting_Blood_Pressure', 'chol' : 'Cholestoral', 'fbs' : 'Fasting_Blood_Sugar', 'restecg' : 'Resting_Electrocardiographic', 'thalachh': 'Maximum_Heart_Rate',
       'exng' : 'Exercise_Induced_Angina', 'oldpeak' : 'Previous_Peak', 'slp' : 'Slope', 'caa': 'Number_of_Major_Vessels', 'thall': 'Thalium_Stress_Test', 'output': 'Likelihood_of_Heart_Attack'}, inplace=True)

In [6]:
# Check Heart Attack Columns have been renamed
heart_attack.columns

Index(['Age', 'Sex', 'Chest_Pain_Type', 'Resting_Blood_Pressure',
       'Cholestoral', 'Fasting_Blood_Sugar', 'Resting_Electrocardiographic',
       'Maximum_Heart_Rate', 'Exercise_Induced_Angina', 'Previous_Peak',
       'Slope', 'Number_of_Major_Vessels', 'Thalium_Stress_Test',
       'Likelihood_of_Heart_Attack'],
      dtype='object')

In [7]:
# Check Heart Attack DataFrame
heart_attack.head()

Unnamed: 0,Age,Sex,Chest_Pain_Type,Resting_Blood_Pressure,Cholestoral,Fasting_Blood_Sugar,Resting_Electrocardiographic,Maximum_Heart_Rate,Exercise_Induced_Angina,Previous_Peak,Slope,Number_of_Major_Vessels,Thalium_Stress_Test,Likelihood_of_Heart_Attack
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [8]:
# Check Heart Disease Data Types
heart_disease.dtypes

YEAR        int64
STATE      object
RATE      float64
DEATHS     object
URL        object
dtype: object

In [9]:
# Check Heart Disease Column Names
heart_disease.columns

Index(['YEAR', 'STATE', 'RATE', 'DEATHS', 'URL'], dtype='object')

In [10]:
# Rename Heart Disease Columns
heart_disease.rename(columns={'YEAR' : 'Year', 'STATE' : 'State', 'RATE' : 'Rate_of_Heart_Disease', 'DEATHS' : 'Deaths'}, inplace=True)

In [11]:
# Check Heart Disease Column Names have been updates
heart_disease.columns

Index(['Year', 'State', 'Rate_of_Heart_Disease', 'Deaths', 'URL'], dtype='object')

In [12]:
# Drop URL column from DataFrame - it is unnecessary for our purposes
heart_disease.drop(columns=['URL'])

Unnamed: 0,Year,State,Rate_of_Heart_Disease,Deaths
0,2020,AL,237.5,14739
1,2020,AK,139.8,915
2,2020,AZ,144.8,14196
3,2020,AR,222.5,8621
4,2020,CA,144.0,66538
...,...,...,...,...
395,2005,VA,203.0,14192
396,2005,WA,180.5,10985
397,2005,WV,253.6,5538
398,2005,WI,190.6,11842


In [13]:
# Check Fast Food Data Types
fast_food.dtypes

index           int64
address        object
city           object
country        object
keys           object
latitude      float64
longitude     float64
name           object
postalCode     object
province       object
websites       object
dtype: object

In [14]:
# Check names of Fast Food Columns
fast_food.columns

Index(['index', 'address', 'city', 'country', 'keys', 'latitude', 'longitude',
       'name', 'postalCode', 'province', 'websites'],
      dtype='object')

In [15]:
# Rename Fast Food Columns
fast_food.rename(columns={'index': 'Index', 'address': 'Address', 'city': 'City', 'country': 'Country', 'keys': 'Keys', 'latitude': 'Latitude', 'longitude': 'Longitude',
       'name': 'Name_of_Restaurant', 'postalCode': 'Zip_Code', 'province': 'State'}, inplace=True)

In [16]:
# Check names of Fast Food Columns have been updated
fast_food.columns

Index(['Index', 'Address', 'City', 'Country', 'Keys', 'Latitude', 'Longitude',
       'Name_of_Restaurant', 'Zip_Code', 'State', 'websites'],
      dtype='object')

In [17]:
# Check to see if there is more than one country in country column, otherwise, drop with other columns
fast_food.nunique()

Index                 10000
Address                9934
City                   2775
Country                   1
Keys                  10000
Latitude               9935
Longitude              9956
Name_of_Restaurant      548
Zip_Code               5289
State                    52
websites               3821
dtype: int64

In [18]:
# Drop 'websites' and 'keys' columns - unnecessary information for our purposes
# Drop 'country' column as there is only one country (US), so it is unnecessary
fast_food.drop(columns=['websites', 'Keys', 'Country'])

Unnamed: 0,Index,Address,City,Latitude,Longitude,Name_of_Restaurant,Zip_Code,State
0,0,324 Main St,Massena,44.921300,-74.890210,McDonald's,13662,NY
1,1,530 Clinton Ave,Washington Court House,39.532550,-83.445260,Wendy's,43160,OH
2,2,408 Market Square Dr,Maysville,38.627360,-83.791410,Frisch's Big Boy,41056,KY
3,3,6098 State Highway 37,Massena,44.950080,-74.845530,McDonald's,13662,NY
4,4,139 Columbus Rd,Athens,39.351550,-82.097280,OMG! Rotisserie,45701,OH
...,...,...,...,...,...,...,...,...
9995,9995,3013 Peach Orchard Rd,Augusta,33.415257,-82.024531,Wendy's,30906,GA
9996,9996,678 Northwest Hwy,Cary,42.217300,-88.255800,Lee's Oriental Martial Arts,60013,IL
9997,9997,1708 Main St,Longmont,40.189190,-105.101720,Five Guys,80501,CO
9998,9998,67740 Highway 111,Cathedral City,33.788640,-116.482150,El Pollo Loco,92234,CA


In [19]:
fast_food.to_csv(Path('./Resources/clean_fast_food.csv'), index=False)

In [20]:
heart_disease.to_csv(Path('./Resources/clean_heart_disease.csv'))

In [21]:
heart_attack.to_csv(Path('./Resources/clean_heart_attack.csv'))