# Climate Change and Deaths from Cancer Analysis

In this analysis, we would like to see the correlation between climate change and deaths from Cancer, specifically at the top 5 countries with the highest population in the world.
1. China
2. India
3. United States
4. Indonesia
5. Brazil

In [61]:
# Dependencies and Set Up
import pandas as pd
import numpy as np
import requests
import json
import matplotlib.pyplot as plt


In [62]:
# Read csv for temperature by countries from 1991 to 2016
temp_china = pd.read_csv("./Resources/temperature_1991_2016_China.csv")
temp_india = pd.read_csv("./Resources/temperature_1991_2016_India.csv")
temp_usa = pd.read_csv("./Resources/temperature_1991_2016_USA.csv")
temp_indonesia = pd.read_csv("./Resources/temperature_1991_2016_Indonesia.csv")
temp_brazil = pd.read_csv("./Resources/temperature_1991_2016_Brazil.csv")


In [63]:
# Check and print the temperature data (China)
temp_china.head()

Unnamed: 0,Temperature - (Celsius),Year,Statistics,Country,ISO3
0,-8.0581,1991,Jan Average,China,CHN
1,-4.6373,1991,Feb Average,China,CHN
2,0.59422,1991,Mar Average,China,CHN
3,7.42692,1991,Apr Average,China,CHN
4,13.0076,1991,May Average,China,CHN


In [64]:
# Check and print the temperature data (India)
temp_india.head()

Unnamed: 0,Temperature - (Celsius),Year,Statistics,Country,ISO3
0,16.6354,1991,Jan Average,India,IND
1,19.988,1991,Feb Average,India,IND
2,24.1397,1991,Mar Average,India,IND
3,27.3468,1991,Apr Average,India,IND
4,30.196,1991,May Average,India,IND


In [65]:
# Check and print the temperature data (USA)
temp_usa.head()

Unnamed: 0,Temperature - (Celsius),Year,Statistics,Country,ISO3
0,-5.9938,1991,Jan Average,United States,USA
1,-1.4523,1991,Feb Average,United States,USA
2,0.93311,1991,Mar Average,United States,USA
3,7.22711,1991,Apr Average,United States,USA
4,13.457,1991,May Average,United States,USA


In [66]:
# Check and print the temperature data (Indonesia)
temp_indonesia.head()

Unnamed: 0,Temperature - (Celsius),Year,Statistics,Country,ISO3
0,25.922,1991,Jan Average,Indonesia,IDN
1,25.822,1991,Feb Average,Indonesia,IDN
2,26.2723,1991,Mar Average,Indonesia,IDN
3,26.4782,1991,Apr Average,Indonesia,IDN
4,26.2828,1991,May Average,Indonesia,IDN


In [67]:
# Check and print the temperature data (Brazil)
temp_brazil.head()

Unnamed: 0,Temperature - (Celsius),Year,Statistics,Country,ISO3
0,25.6309,1991,Jan Average,Brazil,BRA
1,25.9331,1991,Feb Average,Brazil,BRA
2,25.6195,1991,Mar Average,Brazil,BRA
3,25.3122,1991,Apr Average,Brazil,BRA
4,24.6685,1991,May Average,Brazil,BRA


In [68]:
# Read the csv for the annual CO2 emission by country
CO2_emission = pd.read_csv("./Resources/annual_co2_emissions_by_region.csv")
CO2_emission.head()

Unnamed: 0,Entity,Code,Year,Annual CO2 emissions (tonnes )
0,Afghanistan,AFG,1949,14656.0
1,Afghanistan,AFG,1950,84272.0
2,Afghanistan,AFG,1951,91600.0
3,Afghanistan,AFG,1952,91600.0
4,Afghanistan,AFG,1953,106256.0


In [69]:
# Rename the column name
CO2_emission = CO2_emission.rename(
    columns = {"Entity": "Country", "Annual CO2 emissions (tonnes )": "CO2 emissions (tonnes)"})
CO2_emission.head()

Unnamed: 0,Country,Code,Year,CO2 emissions (tonnes)
0,Afghanistan,AFG,1949,14656.0
1,Afghanistan,AFG,1950,84272.0
2,Afghanistan,AFG,1951,91600.0
3,Afghanistan,AFG,1952,91600.0
4,Afghanistan,AFG,1953,106256.0


In [70]:
# Extract only China data
columns = ["Country", "Year", "CO2 emissions (tonnes)"]

CO2_emission_china = CO2_emission.loc[(CO2_emission["Country"] == "China"), columns]
CO2_emission_china.head()

Unnamed: 0,Country,Year,CO2 emissions (tonnes)
4231,China,1751,0.0
4232,China,1752,0.0
4233,China,1753,0.0
4234,China,1754,0.0
4235,China,1755,0.0


In [71]:
# Extract China data for 1991 to 2016 only
CO2_emission_china = CO2_emission_china.set_index("Year")

years = np.arange(1991, 2017, 1)

years_91_16 = []

for year in years:
    years_91_16.append(year)
    
# years_91_16

CO2_emission_china = CO2_emission_china.loc[years_91_16]
CO2_emission_china.head(15)

Unnamed: 0_level_0,Country,CO2 emissions (tonnes)
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
1991,China,2538924000.0
1992,China,2657112000.0
1993,China,2835795000.0
1994,China,3010242000.0
1995,China,3265057000.0
1996,China,3408347000.0
1997,China,3414549000.0
1998,China,3265903000.0
1999,China,3258135000.0
2000,China,3349295000.0


In [72]:
# Extract only India data
columns = ["Country", "Year", "CO2 emissions (tonnes)"]

CO2_emission_india = CO2_emission.loc[(CO2_emission["Country"] == "India"), columns]
CO2_emission_india.head()

Unnamed: 0,Country,Year,CO2 emissions (tonnes)
9204,India,1751,0.0
9205,India,1752,0.0
9206,India,1753,0.0
9207,India,1754,0.0
9208,India,1755,0.0


In [73]:
# Extract India data for 1991 to 2016 only
CO2_emission_india = CO2_emission_india.set_index("Year")

CO2_emission_india = CO2_emission_india.loc[years_91_16]
CO2_emission_india.head(15)

Unnamed: 0_level_0,Country,CO2 emissions (tonnes)
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
1991,India,656740900.0
1992,India,698756900.0
1993,India,722894300.0
1994,India,764242000.0
1995,India,811460800.0
1996,India,878827000.0
1997,India,915049300.0
1998,India,933549300.0
1999,India,995233100.0
2000,India,1029638000.0


In [74]:
# Extract only United States data
columns = ["Country", "Year", "CO2 emissions (tonnes)"]

CO2_emission_usa = CO2_emission.loc[(CO2_emission["Country"] == "United States"), columns]
CO2_emission_usa.head()

Unnamed: 0,Country,Year,CO2 emissions (tonnes)
19661,United States,1751,0.0
19662,United States,1752,0.0
19663,United States,1753,0.0
19664,United States,1754,0.0
19665,United States,1755,0.0


In [75]:
# Extract United States data for 1991 to 2016 only
CO2_emission_usa = CO2_emission_usa.set_index("Year")

CO2_emission_usa = CO2_emission_usa.loc[years_91_16]
CO2_emission_usa.head(15)

Unnamed: 0_level_0,Country,CO2 emissions (tonnes)
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
1991,United States,5070839000.0
1992,United States,5174059000.0
1993,United States,5284688000.0
1994,United States,5377987000.0
1995,United States,5439213000.0
1996,United States,5626187000.0
1997,United States,5703891000.0
1998,United States,5752324000.0
1999,United States,5832250000.0
2000,United States,6000606000.0


In [76]:
# Extract only Indonesia data
columns = ["Country", "Year", "CO2 emissions (tonnes)"]

CO2_emission_indonesia = CO2_emission.loc[(CO2_emission["Country"] == "Indonesia"), columns]
CO2_emission_indonesia.head()

Unnamed: 0,Country,Year,CO2 emissions (tonnes)
9460,Indonesia,1889,3664.0
9461,Indonesia,1890,14656.0
9462,Indonesia,1891,21984.0
9463,Indonesia,1892,179536.0
9464,Indonesia,1893,403040.0


In [77]:
# Extract Indonesia data for 1991 to 2016 only
CO2_emission_indonesia = CO2_emission_indonesia.set_index("Year")

CO2_emission_indonesia = CO2_emission_indonesia.loc[years_91_16]
CO2_emission_indonesia.head(15)

Unnamed: 0_level_0,Country,CO2 emissions (tonnes)
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
1991,Indonesia,180695289.5
1992,Indonesia,203659373.7
1993,Indonesia,219856328.5
1994,Indonesia,222967641.4
1995,Indonesia,226669943.6
1996,Indonesia,255207090.1
1997,Indonesia,280896520.2
1998,Indonesia,216104916.5
1999,Indonesia,244101581.6
2000,Indonesia,265983184.0


In [78]:
# Extract only Brazil data
columns = ["Country", "Year", "CO2 emissions (tonnes)"]

CO2_emission_brazil = CO2_emission.loc[(CO2_emission["Country"] == "Brazil"), columns]
CO2_emission_brazil.head()

Unnamed: 0,Country,Year,CO2 emissions (tonnes)
2980,Brazil,1901,2103136.0
2981,Brazil,1902,2506176.0
2982,Brazil,1903,2440224.0
2983,Brazil,1904,2619760.0
2984,Brazil,1905,2799296.0


In [79]:
# Extract Brazil data for 1991 to 2016 only
CO2_emission_brazil = CO2_emission_brazil.set_index("Year")

CO2_emission_brazil = CO2_emission_brazil.loc[years_91_16]
CO2_emission_brazil.head(15)

Unnamed: 0_level_0,Country,CO2 emissions (tonnes)
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
1991,Brazil,217227472.0
1992,Brazil,218383504.0
1993,Brazil,228333216.0
1994,Brazil,239470720.0
1995,Brazil,255583376.0
1996,Brazil,281194784.0
1997,Brazil,296585256.0
1998,Brazil,308305720.0
1999,Brazil,316282440.0
2000,Brazil,324226040.0
