# Seeking for the relationship between 7 awareness variables and GDP per capita in each the states in the US

In this file, we use the carbon emission data of the states in 2014 and 2016, and the climate change awareness data from Yale's study for 2014, 2016 and 2018. We will analyze the correlation between carbon emission and multiple awareness to see if there is a correlation between awareness and carbon dioxide emission in these two years. If there is a correlation, we will create a model to predict the carbon dioxide emission in 2018, using the awareness data; if not, we will not do the prediction.

## Import the possible useful python packages

In [None]:
import pandas as pd
import matplotlib.pyplot as plt; plt.rcdefaults()
import numpy as np
import matplotlib.pyplot as plt

import patsy
import statsmodels.api as sm
import scipy.stats as stats
from scipy.stats import ttest_ind, chisquare, normaltest

## Read in data about population sentiment per state in 2014

In [None]:
df_YPCCC_2014_State = pd.read_csv("Datasets/YPCCC_2014_State.csv")
df_YPCCC_2014_State

### Specify which columns we would like to keep in the dataframe for our analysis:

In [None]:
list(df_YPCCC_2014_State.columns)

In [None]:
columns_keep = ['Statename', 'TotalPop', 'happening', 'human', 'worried', 'personal', 'CO2limits', 
                'regulate', 'fundrenewables']
df_YPCCC_2014_State_new = df_YPCCC_2014_State[columns_keep]
df_YPCCC_2014_State_new

## Read in data about population sentiment per state in 2016

In [None]:
df_YCOM_2016 = pd.read_csv("Datasets/YCOM_2016.csv")
df_YCOM_2016

### Specify which columns in 2016 data we would like to keep in the dataframe for our analysis:

In [None]:
list(df_YCOM_2016.columns)

In [None]:
columns_2016_keep = ['GeoName', 'happening', 'human', 'worried', 'personal', 'CO2limits', 
                     'regulate', 'fundrenewables']
df_YCOM_2016_new = df_YCOM_2016[columns_2016_keep]
df_YCOM_2016_state = df_YCOM_2016_new.loc[0:51]
df_YCOM_2016_state = df_YCOM_2016_state.drop(index = 0)
df_YCOM_2016_state.dropna()
df_YCOM_2016_state


## Read in data about population sentiment per state in 2018

In [None]:
df_YCOM_2018 = pd.read_csv("Datasets/YCOM_2018.csv", encoding='ISO-8859-1')
df_YCOM_2018

### Specify which columns in 2018 data we would like to keep in the dataframe for our analysis:

In [None]:
list(df_YCOM_2018.columns)

In [None]:
columns_2018_keep = ['GeoName', 'happening', 'human', 'worried', 'personal', 'CO2limits', 'regulate', 
                     'fundrenewables']
df_YCOM_2018_new = df_YCOM_2018[columns_2018_keep]
df_YCOM_2018_state = df_YCOM_2018.loc[0:51]
df_YCOM_2018_state = df_YCOM_2018_state.drop(index = 0)
df_YCOM_2018_state

## Read in data about carbon dioxide emissions released per state: 

In [None]:
df_CO2_state = pd.read_excel("Datasets/Carbon_dioxide_emission_by_state.xlsx", header = 4)
df_CO2_state

### Drop the null values in CO2 states data

In [None]:
df_CO2_state_dropped = df_CO2_state.dropna()
df_CO2_state_dropped

### Specify columns 2012, 2014, 2016 in CO2 data we would like to keep

In [None]:
## list all the columns
list(df_CO2_state.columns)

In [None]:
## only keep the data from 2012, 2014, 2016
columns_CO2 = ['State',2012, 2014, 2016]
df_CO2_state_keep = df_CO2_state_dropped[columns_CO2]
df_CO2_state_keep = df_CO2_state_keep.drop(51, axis = 0)
df_CO2_state_keep

## Rank each variable data of awareness and CO2 emission in 2014 and 2016, respectively

### The descending CO2 emission quantity of year 2012

In [None]:
df_CO2_state_12sorted = df_CO2_state_keep.sort_values(2012)
df_CO2_state_12sorted[['State', 2012]].head(10)

In [None]:
df_CO2_state_12sorted[['State', 2012]].tail(10)

In [None]:
fig_size = plt.rcParams["figure.figsize"]

fig_size[0] = 12
fig_size[1] = 10
plt.rcParams["figure.figsize"] = fig_size
y_pos = np.arange(len(df_CO2_state_12sorted['State']))

plt.barh(y_pos, df_CO2_state_12sorted[2012], align='center', alpha=0.5)
plt.yticks(y_pos, df_CO2_state_12sorted['State'])
plt.ylabel(2012)
plt.title('CO2 Rank 2012')
fig = plt.gcf()
fig.set_size_inches(18.5, 32)
plt.show()

### The descending CO2 emission quantity of year 2014

In [None]:
df_CO2_state_14sorted = df_CO2_state_keep.sort_values(2014)
df_CO2_state_14sorted[['State', 2014]].head(10)

In [None]:
df_CO2_state_14sorted[['State', 2014]].tail(10)

In [None]:
fig_size = plt.rcParams["figure.figsize"]

fig_size[0] = 12
fig_size[1] = 10
plt.rcParams["figure.figsize"] = fig_size
y_pos = np.arange(len(df_CO2_state_12sorted['State']))

plt.barh(y_pos, df_CO2_state_12sorted[2012], align='center', alpha=0.5)
plt.yticks(y_pos, df_CO2_state_12sorted['State'])
plt.ylabel(2012)
plt.title('CO2 Rank 2012')
fig = plt.gcf()
fig.set_size_inches(18.5, 32)
plt.show()

### The descending CO2 emission quantity of year 2016

In [None]:
df_CO2_state_16sorted = df_CO2_state_keep.sort_values(2016)
df_CO2_state_16sorted[['State', 2016]].head(10)

In [None]:
df_CO2_state_16sorted[['State', 2016]].tail(10)

In [None]:
fig_size = plt.rcParams["figure.figsize"]

fig_size[0] = 12
fig_size[1] = 10
plt.rcParams["figure.figsize"] = fig_size
y_pos = np.arange(len(df_CO2_state_12sorted['State']))

plt.barh(y_pos, df_CO2_state_12sorted[2016], align='center', alpha=0.5)
plt.yticks(y_pos, df_CO2_state_12sorted['State'])
plt.ylabel(2016)
plt.title('CO2 Rank 2014')
fig = plt.gcf()
fig.set_size_inches(18.5, 32)
plt.show()

### The descending rank of awareness data in 2014

#### The descending rank of "happening" in 2014

In [None]:
df_a14sort_happening = df_YPCCC_2014_State_new.sort_values('happening')
df_a14sort_happening[['Statename', 'happening']].head(10)

In [None]:
df_a14sort_happening[['Statename', 'happening']].tail(10)

In [None]:
fig_size = plt.rcParams["figure.figsize"]

fig_size[0] = 12
fig_size[1] = 10
plt.rcParams["figure.figsize"] = fig_size
y_pos = np.arange(len(df_a14sort_happening['Statename']))

plt.barh(y_pos, df_a14sort_happening['happening'], align='center', alpha=0.5)
plt.yticks(y_pos, df_a14sort_happening['Statename'])
plt.ylabel('states')
plt.title('happening rank 2014')
fig = plt.gcf()
fig.set_size_inches(18.5, 32)
plt.show()

#### The descending rank of "human" 2014

In [None]:
df_a14sort_human = df_YPCCC_2014_State_new.sort_values('human')
df_a14sort_human[['Statename', 'human']].head(10)

In [None]:
df_a14sort_human[['Statename', 'human']].tail(10)

In [None]:
fig_size = plt.rcParams["figure.figsize"]

fig_size[0] = 12
fig_size[1] = 10
plt.rcParams["figure.figsize"] = fig_size
y_pos = np.arange(len(df_a14sort_human['Statename']))

plt.barh(y_pos, df_a14sort_human['human'], align='center', alpha=0.5)
plt.yticks(y_pos, df_a14sort_human['Statename'])
plt.ylabel('states')
plt.title('human rank 2014')
fig = plt.gcf()
fig.set_size_inches(18.5, 32)
plt.show()

#### The descending rank of "worried" 2014

In [None]:
df_a14sort_worried = df_YPCCC_2014_State_new.sort_values('worried')
df_a14sort_worried[['Statename', 'worried']].head(10)

In [None]:
df_a14sort_worried[['Statename', 'worried']].tail(10)

In [None]:
fig_size = plt.rcParams["figure.figsize"]

fig_size[0] = 12
fig_size[1] = 10
plt.rcParams["figure.figsize"] = fig_size
y_pos = np.arange(len(df_a14sort_worried['Statename']))

plt.barh(y_pos, df_a14sort_worried['worried'], align='center', alpha=0.5)
plt.yticks(y_pos, df_a14sort_worried['Statename'])
plt.ylabel('states')
plt.title('worried rank 2014')
fig = plt.gcf()
fig.set_size_inches(18.5, 32)
plt.show()

#### The descending rank of "personal" 2014

In [None]:
df_a14sort_personal = df_YPCCC_2014_State_new.sort_values('personal')
df_a14sort_personal[['Statename', 'personal']].head(10)

In [None]:
df_a14sort_personal[['Statename', 'personal']].tail(10)

In [None]:
fig_size = plt.rcParams["figure.figsize"]

fig_size[0] = 12
fig_size[1] = 10
plt.rcParams["figure.figsize"] = fig_size
y_pos = np.arange(len(df_a14sort_human['Statename']))

plt.barh(y_pos, df_a14sort_personal['personal'], align='center', alpha=0.5)
plt.yticks(y_pos, df_a14sort_personal['Statename'])
plt.ylabel('states')
plt.title('personal rank 2014')
fig = plt.gcf()
fig.set_size_inches(18.5, 32)
plt.show()

#### The descending rank of "CO2limits" 2014

In [None]:
df_a14sort_CO2limits = df_YPCCC_2014_State_new.sort_values('CO2limits')
df_a14sort_CO2limits[['Statename', 'CO2limits']].head(10)

In [None]:
df_a14sort_CO2limits[['Statename', 'CO2limits']].tail(10)

In [None]:
fig_size = plt.rcParams["figure.figsize"]

fig_size[0] = 12
fig_size[1] = 10
plt.rcParams["figure.figsize"] = fig_size
y_pos = np.arange(len(df_a14sort_CO2limits['Statename']))

plt.barh(y_pos, df_a14sort_CO2limits['CO2limits'], align='center', alpha=0.5)
plt.yticks(y_pos, df_a14sort_CO2limits['Statename'])
plt.ylabel('states')
plt.title('CO2limits rank 2014')
fig = plt.gcf()
fig.set_size_inches(18.5, 32)
plt.show()

#### The descending rank of "regulate" 2014

In [None]:
df_a14sort_regulate = df_YPCCC_2014_State_new.sort_values('regulate')
df_a14sort_regulate[['Statename', 'regulate']].head(10)

In [None]:
df_a14sort_regulate[['Statename', 'regulate']].tail(10)

In [None]:
fig_size = plt.rcParams["figure.figsize"]

fig_size[0] = 12
fig_size[1] = 10
plt.rcParams["figure.figsize"] = fig_size
y_pos = np.arange(len(df_a14sort_regulate['Statename']))

plt.barh(y_pos, df_a14sort_regulate['regulate'], align='center', alpha=0.5)
plt.yticks(y_pos, df_a14sort_regulate['Statename'])
plt.ylabel('states')
plt.title('regulate rank 2014')
fig = plt.gcf()
fig.set_size_inches(18.5, 32)
plt.show()

#### The descending rank of "fundrenewable" 2014

In [None]:
df_a14sort_fundrenewables = df_YPCCC_2014_State_new.sort_values('fundrenewables')
df_a14sort_fundrenewables[['Statename', 'fundrenewables']].head(10)

In [None]:
df_a14sort_fundrenewables[['Statename', 'fundrenewables']].tail(10)

In [None]:
fig_size = plt.rcParams["figure.figsize"]

fig_size[0] = 12
fig_size[1] = 10
plt.rcParams["figure.figsize"] = fig_size
y_pos = np.arange(len(df_a14sort_fundrenewables['Statename']))

plt.barh(y_pos, df_a14sort_fundrenewables['fundrenewables'], align='center', alpha=0.5)
plt.yticks(y_pos, df_a14sort_fundrenewables['Statename'])
plt.ylabel('states')
plt.title('fundrenewables rank 2014')
fig = plt.gcf()
fig.set_size_inches(18.5, 32)
plt.show()

### The descending rank of awareness data in 2016

#### The descending rank of "happening" in 2016

In [None]:
df_a16sort_happening = df_YCOM_2016_state.sort_values('happening')
df_a16sort_happening[['GeoName', 'happening']].head(10)

In [None]:
df_a16sort_happening[['GeoName', 'happening']].tail(10)

In [None]:
fig_size = plt.rcParams["figure.figsize"]

fig_size[0] = 12
fig_size[1] = 10
plt.rcParams["figure.figsize"] = fig_size
y_pos = np.arange(len(df_a16sort_happening['GeoName']))

plt.barh(y_pos, df_a16sort_happening['happening'], align='center', alpha=0.5)
plt.yticks(y_pos, df_a16sort_happening['GeoName'])
plt.ylabel('states')
plt.title('happening rank 2016')
fig = plt.gcf()
fig.set_size_inches(18.5, 32)
plt.show()

#### The descending rank of "human" 2016

In [None]:
df_a16sort_human = df_YCOM_2016_state.sort_values('human')
df_a16sort_human[['GeoName', 'human']].head(10)

In [None]:
df_a16sort_human[['GeoName', 'human']].tail(10)

In [None]:
fig_size = plt.rcParams["figure.figsize"]

fig_size[0] = 12
fig_size[1] = 10
plt.rcParams["figure.figsize"] = fig_size
y_pos = np.arange(len(df_a16sort_human['GeoName']))

plt.barh(y_pos, df_a16sort_human['human'], align='center', alpha=0.5)
plt.yticks(y_pos, df_a16sort_human['GeoName'])
plt.ylabel('states')
plt.title('human rank 2016')
fig = plt.gcf()
fig.set_size_inches(18.5, 32)
plt.show()

#### The descending rank of "worried" 2016

In [None]:
df_a16sort_worried = df_YCOM_2016_state.sort_values('worried')
df_a16sort_worried[['GeoName', 'worried']].head(10)

In [None]:
df_a16sort_worried[['GeoName', 'worried']].tail(10)

In [None]:
fig_size = plt.rcParams["figure.figsize"]

fig_size[0] = 12
fig_size[1] = 10
plt.rcParams["figure.figsize"] = fig_size
y_pos = np.arange(len(df_a16sort_worried['GeoName']))

plt.barh(y_pos, df_a16sort_worried['worried'], align='center', alpha=0.5)
plt.yticks(y_pos, df_a16sort_worried['GeoName'])
plt.ylabel('states')
plt.title('worried rank 2016')
fig = plt.gcf()
fig.set_size_inches(18.5, 32)
plt.show()

#### The descending rank of "personal" 2016

In [None]:
df_a16sort_personal = df_YCOM_2016_state.sort_values('personal')
df_a16sort_personal[['GeoName', 'personal']].head(10)

In [None]:
df_a16sort_personal[['GeoName', 'personal']].tail(10)

In [None]:
fig_size = plt.rcParams["figure.figsize"]

fig_size[0] = 12
fig_size[1] = 10
plt.rcParams["figure.figsize"] = fig_size
y_pos = np.arange(len(df_a16sort_personal['GeoName']))

plt.barh(y_pos, df_a16sort_personal['personal'], align='center', alpha=0.5)
plt.yticks(y_pos, df_a16sort_personal['GeoName'])
plt.ylabel('states')
plt.title('human rank 2016')
fig = plt.gcf()
fig.set_size_inches(18.5, 32)
plt.show()

#### The descending rank of "CO2limits" 2016

In [None]:
df_a16sort_CO2limits = df_YCOM_2016_state.sort_values('CO2limits')
df_a16sort_CO2limits[['GeoName', 'CO2limits']].head(10)

In [None]:
df_a16sort_CO2limits[['GeoName', 'CO2limits']].tail(10)

In [None]:
fig_size = plt.rcParams["figure.figsize"]

fig_size[0] = 12
fig_size[1] = 10
plt.rcParams["figure.figsize"] = fig_size
y_pos = np.arange(len(df_a16sort_CO2limits['GeoName']))

plt.barh(y_pos, df_a16sort_CO2limits['CO2limits'], align='center', alpha=0.5)
plt.yticks(y_pos, df_a16sort_CO2limits['GeoName'])
plt.ylabel('states')
plt.title('human rank 2016')
fig = plt.gcf()
fig.set_size_inches(18.5, 32)
plt.show()

#### The descending rank of "regulate" 2016

In [None]:
df_a16sort_regulate = df_YCOM_2016_state.sort_values('regulate')
df_a16sort_regulate[['GeoName', 'regulate']].head(10)

In [None]:
df_a16sort_regulate[['GeoName', 'regulate']].tail(10)

In [None]:
fig_size = plt.rcParams["figure.figsize"]

fig_size[0] = 12
fig_size[1] = 10
plt.rcParams["figure.figsize"] = fig_size
y_pos = np.arange(len(df_a16sort_regulate['GeoName']))

plt.barh(y_pos, df_a16sort_regulate['regulate'], align='center', alpha=0.5)
plt.yticks(y_pos, df_a16sort_regulate['GeoName'])
plt.ylabel('states')
plt.title('regulate rank 2016')
fig = plt.gcf()
fig.set_size_inches(18.5, 32)
plt.show()

#### The descending rank of "fundrenewable" 2016

In [None]:
df_a16sort_fundrenewables = df_YCOM_2016_state.sort_values('fundrenewables')
df_a16sort_fundrenewables[['GeoName', 'fundrenewables']].head(10)

In [None]:
df_a16sort_fundrenewables[['GeoName', 'fundrenewables']].tail(10)

In [None]:
fig_size = plt.rcParams["figure.figsize"]

fig_size[0] = 12
fig_size[1] = 10
plt.rcParams["figure.figsize"] = fig_size
y_pos = np.arange(len(df_a16sort_fundrenewables['GeoName']))

plt.barh(y_pos, df_a16sort_fundrenewables['fundrenewables'], align='center', alpha=0.5)
plt.yticks(y_pos, df_a16sort_fundrenewables['GeoName'])
plt.ylabel('states')
plt.title('fundrenewables rank 2016')
fig = plt.gcf()
fig.set_size_inches(18.5, 32)
plt.show()

## Find the correlation between each awareness variable and carbon dioxide emission quantity in each state

### Merge the carbon dioxide emission data and awareness vaiebles data by state in 2014

In [None]:
df_YPCCC_2014_State_renamed = df_YPCCC_2014_State_new.rename(columns = {'Statename': 'State'})
State_Awareness_CO2 = pd.merge(df_YPCCC_2014_State_renamed, df_CO2_state_keep, on='State')
State_Awareness_CO2

In [None]:
list(State_Awareness_CO2.columns)

In [None]:
columns2014 = ['State','TotalPop','happening','human','worried','personal','CO2limits','regulate',
               'fundrenewables', 2014]
State_Awareness_CO2_2014 = State_Awareness_CO2[columns2014]
State_Awareness_CO2_2014

### Merge the carbon dioxide emission data and awareness vaiebles data by state in 2016

In [None]:
df_YCOM_2016_State_renamed = df_YCOM_2016_state.rename(columns = {'GeoName': 'State'})
State_Awareness_CO2_2016 = pd.merge(df_YCOM_2016_State_renamed, df_CO2_state_keep, on='State')
State_Awareness_CO2_2016

In [None]:
list(State_Awareness_CO2_2016.columns)

In [None]:
State_Awareness_CO2_2016col = ['State','happening','human','worried','personal','CO2limits','regulate',
                               'fundrenewables',2016]
State_Awareness_CO2_2016 = State_Awareness_CO2_2016[State_Awareness_CO2_2016col]
State_Awareness_CO2_2016

## Correlations

## Correlation of awareness variables and  carbon dioxide emission in 2014

The correlation of each awareness data variable witht the carbon dioxide emission in 2014

In [None]:
corrs_2014 = State_Awareness_CO2_2014.corr()
corrs_2014[[2014]]

### Visualizing the correlation in 2014
1. A scatter point of the correlation between "the awareness of whether climate change is happening" and "the carbon dioxide emission" in 2014 

In [None]:
x = State_Awareness_CO2_2014[2014]
y = State_Awareness_CO2_2014['happening']
types = State_Awareness_CO2_2014['State']

for i,j in enumerate(types):
    x_coor = x[i]
    y_coor = y[i]
    plt.plot(x_coor, y_coor, marker = '.', color = 'red', markersize = 20)
    plt.text(x_coor + 0.5, y_coor + 0.5, j, fontsize = 20)
    plt.xlabel('2016 year emission in state', fontsize = 30)
    plt.ylabel('Awareness: happening', fontsize = 30)

## adjust the size of the size
plt.gcf().set_size_inches((30, 20))
plt.show()

2. A scatter point of the correlation between "the awareness of whether human causeses climate change " and "the carbon dioxide emission" in 2014 

In [None]:
x = State_Awareness_CO2_2014[2014]
y = State_Awareness_CO2_2014['human']
types = State_Awareness_CO2_2014['State']

for i,j in enumerate(types):
    x_coor = x[i]
    y_coor = y[i]
    plt.plot(x_coor, y_coor, marker = '.', color = 'red', markersize = 20)
    plt.text(x_coor + 0.5, y_coor + 0.5, j, fontsize = 20)
    plt.xlabel('2016 year emission in state', fontsize = 30)
    plt.ylabel('Awareness: human', fontsize = 30)

## adjust the size of the size
plt.gcf().set_size_inches((30, 20))
plt.show()

3. A scatter point of the correlation between "the awareness of whether people worry about climate change" and "the carbon dioxide emission" in 2014 

In [None]:
3. A scatter point of the correlation between "the awareness of whether people worry about climate change" and "the carbon dioxide emission" in 2014 x = State_Awareness_CO2_2014[2014]
y = State_Awareness_CO2_2014['worried']
types = State_Awareness_CO2_2014['State']

for i,j in enumerate(types):
    x_coor = x[i]
    y_coor = y[i]
    plt.plot(x_coor, y_coor, marker = '.', color = 'red', markersize = 20)
    plt.text(x_coor + 0.5, y_coor + 0.5, j, fontsize = 20)
    plt.xlabel('2016 year emission in state', fontsize = 30)
    plt.ylabel('Awareness: worried', fontsize = 30)

## adjust the size of the size
plt.gcf().set_size_inches((30, 20))
plt.show()

4. A scatter point of the correlation between "the awareness of whether ???? " and "the carbon dioxide emission" in 2014 
(to be finished)

In [None]:
x = State_Awareness_CO2_2014[2014]
y = State_Awareness_CO2_2014['personal']
types = State_Awareness_CO2_2014['State']

for i,j in enumerate(types):
    x_coor = x[i]
    y_coor = y[i]
    plt.plot(x_coor, y_coor, marker = '.', color = 'red', markersize = 20)
    plt.text(x_coor + 0.5, y_coor + 0.5, j, fontsize = 20)
    plt.xlabel('2016 year emission in state', fontsize = 30)
    plt.ylabel('Awareness: personal', fontsize = 30)

## adjust the size of the size
plt.gcf().set_size_inches((30, 20))
plt.show()

5. A scatter point of the correlation between "the awareness of whether we should limit the carbon dioxide emission " and "the carbon dioxide emission" in 2014 
(to be confrim)

In [None]:
x = State_Awareness_CO2_2014[2014]
y = State_Awareness_CO2_2014['CO2limits']
types = State_Awareness_CO2_2014['State']

for i,j in enumerate(types):
    x_coor = x[i]
    y_coor = y[i]
    plt.plot(x_coor, y_coor, marker = '.', color = 'red', markersize = 20)
    plt.text(x_coor + 0.5, y_coor + 0.5, j, fontsize = 20)
    plt.xlabel('2016 year emission in state', fontsize = 30)
    plt.ylabel('Awareness: CO2limits', fontsize = 30)

## adjust the size of the size
plt.gcf().set_size_inches((30, 20))
plt.show()

6. A scatter point of the correlation between "the awareness of whether we should regulate the carbon emission" and "the carbon dioxide emission" in 2014 

In [None]:
x = State_Awareness_CO2_2014[2014]
y = State_Awareness_CO2_2014['regulate']
types = State_Awareness_CO2_2014['State']

for i,j in enumerate(types):
    x_coor = x[i]
    y_coor = y[i]
    plt.plot(x_coor, y_coor, marker = '.', color = 'red', markersize = 20)
    plt.text(x_coor + 0.5, y_coor + 0.5, j, fontsize = 20)
    plt.xlabel('2016 year emission in state', fontsize = 30)
    plt.ylabel('Awareness: regulate', fontsize = 30)

## adjust the size of the size
plt.gcf().set_size_inches((30, 20))
plt.show()

7. A scatter point of the correlation between "the awareness of whether we should fund renewable resources" and "the carbon dioxide emission" in 2014 

In [None]:
x = State_Awareness_CO2_2014[2014]
y = State_Awareness_CO2_2014['fundrenewables']
types = State_Awareness_CO2_2014['State']

for i,j in enumerate(types):
    x_coor = x[i]
    y_coor = y[i]
    plt.plot(x_coor, y_coor, marker = '.', color = 'red', markersize = 20)
    plt.text(x_coor + 0.5, y_coor + 0.5, j, fontsize = 20)
    plt.xlabel('2016 year emission in state', fontsize = 30)
    plt.ylabel('Awareness: fundrenewables', fontsize = 30)

## adjust the size of the size
plt.gcf().set_size_inches((30, 20))
plt.show()

## Correlation of awareness variables and  carbon dioxide emission in 2016

The correlation of each awareness data variable witht the carbon dioxide emission in 2016

In [None]:
corrs_2016 = State_Awareness_CO2_2016.corr()
corrs_2016[[2016]]

### Visualizing the correlation in 2016

1. A scatter point of the correlation between "the awareness of whether climate change is happening" and "the carbon dioxide emission" in 2016

In [None]:
x = State_Awareness_CO2_2016[2016]
y = State_Awareness_CO2_2016['happening']
types = State_Awareness_CO2_2016['State']

for i,j in enumerate(types):
    x_coor = x[i]
    y_coor = y[i]
    plt.plot(x_coor, y_coor, marker = '.', color = 'red', markersize = 20)
    plt.text(x_coor + 0.5, y_coor + 0.5, j, fontsize = 20)
    plt.xlabel('2016 year emission in state', fontsize = 30)
    plt.ylabel('Awareness: happening', fontsize = 30)

## adjust the size of the size
plt.gcf().set_size_inches((30, 20))
plt.show()

2. A scatter point of the correlation between "the awareness of whether human causeses climate change " and "the carbon dioxide emission" in 2016

In [None]:
x = State_Awareness_CO2_2016[2016]
y = State_Awareness_CO2_2016['human']
types = State_Awareness_CO2_2016['State']

for i,j in enumerate(types):
    x_coor = x[i]
    y_coor = y[i]
    plt.plot(x_coor, y_coor, marker = '.', color = 'red', markersize = 20)
    plt.text(x_coor + 0.5, y_coor + 0.5, j, fontsize = 20)
    plt.xlabel('2016 year emission in state', fontsize = 30)
    plt.ylabel('Awareness: human', fontsize = 30)
## adjust the size of the size
plt.gcf().set_size_inches((30, 20))
plt.show()

3. A scatter point of the correlation between "the awareness of whether people worry about climate change" and "the carbon dioxide emission" in 2016

In [None]:
x = State_Awareness_CO2_2016[2016]
y = State_Awareness_CO2_2016['worried']
types = State_Awareness_CO2_2016['State']

for i,j in enumerate(types):
    x_coor = x[i]
    y_coor = y[i]
    plt.plot(x_coor, y_coor, marker = '.', color = 'red', markersize = 20)
    plt.text(x_coor + 0.5, y_coor + 0.5, j, fontsize = 20)
    plt.xlabel('2016 year emission in state', fontsize = 30)
    plt.ylabel('Awareness: worried', fontsize = 30)
## adjust the size of the size
plt.gcf().set_size_inches((30, 20))
plt.show()

4. A scatter point of the correlation between "the awareness of whether ???? " and "the carbon dioxide emission" in 2014 
(to be finished)

In [None]:
x = State_Awareness_CO2_2016[2016]
y = State_Awareness_CO2_2016['personal']
types = State_Awareness_CO2_2016['State']

for i,j in enumerate(types):
    x_coor = x[i]
    y_coor = y[i]
    plt.plot(x_coor, y_coor, marker = '.', color = 'red', markersize = 20)
    plt.text(x_coor + 0.5, y_coor + 0.5, j, fontsize = 20)
    plt.xlabel('2016 year emission in state', fontsize = 30)
    plt.ylabel('Awareness: personal', fontsize = 30)
    
## adjust the size of the size
plt.gcf().set_size_inches((30, 20))
plt.show()

5. A scatter point of the correlation between "the awareness of whether we should limit the carbon dioxide emission " and "the carbon dioxide emission" in 2014 
(to be confrim)

In [None]:
x = State_Awareness_CO2_2016[2016]
y = State_Awareness_CO2_2016['CO2limits']
types = State_Awareness_CO2_2016['State']

for i,j in enumerate(types):
    x_coor = x[i]
    y_coor = y[i]
    plt.plot(x_coor, y_coor, marker = '.', color = 'red', markersize = 20)
    plt.text(x_coor + 0.5, y_coor + 0.5, j, fontsize = 20)
    plt.xlabel('2016 year emission in state', fontsize = 30)
    plt.ylabel('Awareness: CO2limits', fontsize = 30)
## adjust the size of the size
plt.gcf().set_size_inches((30, 20))
plt.show()

6. A scatter point of the correlation between "the awareness of whether we should regulate the carbon emission" and "the carbon dioxide emission" in 2016

In [None]:
x = State_Awareness_CO2_2016[2016]
y = State_Awareness_CO2_2016['regulate']
types = State_Awareness_CO2_2016['State']

for i,j in enumerate(types):
    x_coor = x[i]
    y_coor = y[i]
    plt.plot(x_coor, y_coor, marker = '.', color = 'red', markersize = 20)
    plt.text(x_coor + 0.5, y_coor + 0.5, j, fontsize = 20)
    plt.xlabel('2016 year emission in state', fontsize = 30)
    plt.ylabel('Awareness: regulate', fontsize = 30)

## adjust the size of the size
plt.gcf().set_size_inches((30, 20))
plt.show()

7. A scatter point of the correlation between "the awareness of whether we should fund renewable resources" and "the carbon dioxide emission" in 2016

In [None]:
x = State_Awareness_CO2_2016[2016]
y = State_Awareness_CO2_2016['fundrenewables']
types = State_Awareness_CO2_2016['State']

for i,j in enumerate(types):
    x_coor = x[i]
    y_coor = y[i]
    plt.plot(x_coor, y_coor, marker = '.', color = 'red', markersize = 20)
    plt.text(x_coor + 0.5, y_coor + 0.5, j, fontsize = 20)
    plt.xlabel('2016 year emission in state', fontsize = 30)
    plt.ylabel('Awareness: fundrenewables', fontsize = 30)
## adjust the size of the size
plt.gcf().set_size_inches((30, 20))
plt.show()

## Data cleaning for GDP per capita of states in 2014 and 2016

In [None]:
## todo

## Using geographics to visualize the carbon emission and awareness data

In [None]:
conda install geopandas

In [None]:
!pip install descartes
!pip install PySAL
!pip install mapclassify