In [1]:
import pandas as pd
import numpy as np

# Loading the Data:

### NOAA US Temps

In [2]:
noaa = pd.read_csv('noaa_temp_us.csv', header = 4)
noaa['Date'] =  pd.to_datetime(noaa['Date'], format = '%Y%m')
noaa.head()

Unnamed: 0,Date,Value,Anomaly
0,1900-12-01,52.77,0.75
1,1901-12-01,51.87,-0.15
2,1902-12-01,51.59,-0.43
3,1903-12-01,50.62,-1.4
4,1904-12-01,51.16,-0.86


### CDC Mortality Data (1979 - 2016)

The CDC uses ICD (International Classification of Diseases) codes to categorize causes of death. The ICD codes that include heat-related deaths are ICD-9 and ICD-10. The subsections of ICD-9 and ICD-10 we want to look at are E900.0, E900.1, E900.9, and X30. X30 was recently added as a cause of death is 1999. 
https://www.epa.gov/sites/default/files/2017-01/documents/heat-deaths_documentation.pdf

CDC Query System: https://wonder.cdc.gov/mortSQL.html 

ICD-9:
E900 (Excessive Heat - hyperthermia): 
- E900.0: Due to weather conditions
- E900.1: Of man-made origins
- E900.9: Of unspecified origin

ICD-10:
- X30: (Exposure to excessive natural heat (hyperthermia))

In [3]:
e900_0 = pd.read_csv('E900.0_1979-1998.txt', sep='\t').drop(columns = ['Notes', 'Year Code']).dropna()
e900_1 = pd.read_csv('E900.1_1979-1998.txt', sep='\t').drop(columns = ['Notes', 'Year Code']).dropna()
e900_9 = pd.read_csv('E900.9_1979-1998.txt', sep ='\t').drop(columns = ['Notes', 'Year Code']).dropna()
x30 = pd.read_csv('x30_1999-2016.txt', sep="\t").drop(columns = ['Notes', 'Year Code']).dropna()

e900_0['Cause of Death'] = 'E900 (Excessive Heat)'
e900_1['Cause of Death'] = 'E900.1 (Of man-made origin)'
e900_9['Cause of Death'] = 'E900.9 (Of unspecified origin)'
x30['Cause of Death'] = 'X30 (Exposure to excessive natural heat)'

all_cdc = [e900_0, e900_1, e900_9, x30]
all_deaths = pd.concat(all_cdc).groupby(['Year', 'Cause of Death']).sum()
all_deaths

Unnamed: 0_level_0,Unnamed: 1_level_0,Crude Rate Lower 95% Confidence Interval,Crude Rate Upper 95% Confidence Interval,Deaths,Population
Year,Cause of Death,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1979.0,E900 (Excessive Heat),0.0,0.0,54.0,224635398.0
1979.0,E900.1 (Of man-made origin),0.0,0.0,15.0,224635398.0
1979.0,E900.9 (Of unspecified origin),0.0,0.0,79.0,224635398.0
1980.0,E900 (Excessive Heat),0.3,0.3,651.0,226624371.0
1980.0,E900.1 (Of man-made origin),0.0,0.0,22.0,226624371.0
1980.0,E900.9 (Of unspecified origin),0.4,0.5,1027.0,226624371.0
1981.0,E900 (Excessive Heat),0.0,0.1,112.0,229487512.0
1981.0,E900.1 (Of man-made origin),0.0,0.0,18.0,229487512.0
1981.0,E900.9 (Of unspecified origin),0.1,0.1,193.0,229487512.0
1982.0,E900 (Excessive Heat),0.0,0.0,75.0,231701425.0


### World Bank State Temps (1901-2020)

In [4]:
wb = pd.read_csv('wb_state_temps.csv', header = 1)
wb = wb.rename(columns = {wb.columns[0]:'Year'})
wb

Unnamed: 0,Year,United States,Alabama,Alaska,Arizona,Arkansas,California,Colorado,Connecticut,Delaware,...,South Dakota,Tennessee,Texas,Utah,Vermont,Virginia,Washington,West Virginia,Wisconsin,Wyoming
0,1901,8.58,16.58,-5.18,14.98,16.29,13.56,6.68,8.47,12.12,...,7.77,13.63,18.75,8.58,4.77,12.31,7.79,10.30,6.05,5.64
1,1902,8.40,17.77,-5.55,14.49,16.10,13.00,6.39,8.70,12.64,...,6.62,14.45,19.04,8.14,4.73,12.92,7.44,10.89,6.19,5.06
2,1903,7.95,17.07,-5.52,14.32,15.50,13.11,5.49,8.68,12.39,...,6.18,13.97,17.58,7.24,4.84,12.62,7.31,10.81,5.31,4.16
3,1904,8.13,17.50,-5.92,15.09,16.02,13.70,6.53,7.17,11.02,...,6.38,13.92,18.72,8.31,3.10,11.78,8.20,10.08,4.52,5.42
4,1905,8.40,17.35,-3.84,14.32,15.66,13.35,5.79,8.22,12.14,...,6.41,14.15,17.84,7.83,4.16,12.55,7.87,10.60,5.42,4.60
5,1906,8.46,17.74,-5.50,14.47,16.02,13.46,6.15,9.03,12.99,...,6.80,14.60,18.13,7.81,4.84,13.29,8.32,11.36,6.21,4.87
6,1907,8.33,18.05,-5.68,14.78,16.79,13.07,6.84,7.84,11.75,...,6.07,14.69,19.25,8.51,3.73,12.47,7.52,10.40,5.00,4.96
7,1908,8.66,18.11,-5.12,14.38,16.98,12.96,5.98,9.25,12.87,...,7.38,15.08,18.88,7.19,4.96,13.04,7.82,11.24,6.60,4.51
8,1909,8.11,17.92,-6.80,14.47,16.92,12.83,5.84,8.75,12.53,...,6.15,14.79,19.16,7.56,4.60,12.97,6.96,11.03,5.63,4.31
9,1910,8.67,17.35,-6.25,15.85,16.30,13.61,7.30,8.98,12.51,...,7.63,14.07,19.26,8.90,4.88,12.67,7.89,10.62,6.23,6.02


### Kaggle Dataset: World City Temperatures

In [5]:
cities = pd.read_csv('city_temperature.csv')
cities

Unnamed: 0,Region,Country,State,City,Month,Day,Year,AvgTemperature
0,Africa,Algeria,,Algiers,1,1,1995,64.2
1,Africa,Algeria,,Algiers,1,2,1995,49.4
2,Africa,Algeria,,Algiers,1,3,1995,48.8
3,Africa,Algeria,,Algiers,1,4,1995,46.4
4,Africa,Algeria,,Algiers,1,5,1995,47.9
5,Africa,Algeria,,Algiers,1,6,1995,48.7
6,Africa,Algeria,,Algiers,1,7,1995,48.9
7,Africa,Algeria,,Algiers,1,8,1995,49.1
8,Africa,Algeria,,Algiers,1,9,1995,49.0
9,Africa,Algeria,,Algiers,1,10,1995,51.9


# Project Questions:

1. How has the average temperature changed in the US over the past 100 years?
2. How has heat-related mortality changed in the US over the past 50 years?
3. Has the number of heatwaves in the US increased?
4. What areas of the US are most affected by heatwaves?
5. What areas of the US are most affected by heat-related mortality?
6. How does the US compare to other countries in terms of heatwave prevalence and heat-related mortality?  


# Visualizations:

### Chloropleth Map

### Bar Chart

### Bubble Chart

### Line Chart