In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import json
from scipy.stats import linregress

# Import API key
#from config import weather_api_key

In [26]:
# Define function to run linear regression.
def lin_regress(independent, dependent): 
    
    # Use scipy to calculate values for regression
    (slope, intercept, rvalue, pvalue, stderr) = linregress(independent, dependent)

    # Calculate the regression values
    regress_vals = independent*slope + intercept

    # Create string variable that holds the regression line's equation
    line_eq = f"y = {round(slope, 2)}x + {round(intercept, 2)}"

    # Plot scatter and regression line on top of scatter plot
    plt.scatter(independent, dependent, edgecolors = "black")
    plt.plot(independent, regress_vals, color = "r", label = line_eq)
    plt.legend(loc = "best")
    print(f"The correlation coefficient is {round(rvalue, 2)}")
    print(f"The r-squared value is {round(rvalue**2,2)}")

In [19]:
# Importing csv files and creating data frames

# CO2 by Country
co2_country_df = pd.read_csv("co2_emission.csv")

# CO2 by State
co2_state_df = pd.read_csv("carbon_emission_data.csv")

# GDP by Country (GPD per capita PPP)
gdp_country_df = pd.read_csv("GDP-countries-1990-2018.csv")

# GDP by State
gdp_state_df = pd.read_csv("bea-gdp-by-state.csv")

# Countries Historic climate
#climate_country_df = pd.read_csv("co2_emission.csv")

# BRIC Historic climate
bric_climate_df = pd.read_csv("BRIC-climate-1991-2016.csv")

In [33]:
gdp_country_df.head()
gdp_country_df.columns

gdp_country_df = gdp_country_df.rename(columns = {"Country " : "Country"})

In [35]:
gdp_country_2016_df = gdp_country_df[["Country", "2016"]]
gdp_country_2016_df = gdp_country_2016_df.rename(columns = {"2016" : "gdp_2016"})
gdp_country_2016_df.head()

Unnamed: 0,Country,gdp_2016
0,Aruba,38390.27165
1,Afghanistan,1896.99252
2,Angola,6756.935074
3,Albania,11868.17897
4,Arab World,16935.3833


In [5]:
co2_country_df.head()

Unnamed: 0,Entity,Code,Year,Annual CO₂ emissions (tonnes )
0,Afghanistan,AFG,1949,14656.0
1,Afghanistan,AFG,1950,84272.0
2,Afghanistan,AFG,1951,91600.0
3,Afghanistan,AFG,1952,91600.0
4,Afghanistan,AFG,1953,106256.0


In [20]:
co2_country_df.Entity.unique()

array(['Afghanistan', 'Africa', 'Albania', 'Algeria', 'Americas (other)',
       'Andorra', 'Angola', 'Anguilla', 'Antarctic Fisheries',
       'Antigua and Barbuda', 'Argentina', 'Armenia', 'Aruba',
       'Asia and Pacific (other)', 'Australia', 'Austria', 'Azerbaijan',
       'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus',
       'Belgium', 'Belize', 'Benin', 'Bermuda', 'Bhutan', 'Bolivia',
       'Bonaire Sint Eustatius and Saba', 'Bosnia and Herzegovina',
       'Botswana', 'Brazil', 'British Virgin Islands', 'Brunei',
       'Bulgaria', 'Burkina Faso', 'Burundi', 'Cambodia', 'Cameroon',
       'Canada', 'Cape Verde', 'Cayman Islands',
       'Central African Republic', 'Chad', 'Chile', 'China',
       'Christmas Island', 'Colombia', 'Comoros', 'Republic of the Congo',
       'Cook Islands', 'Costa Rica', "Cote d'Ivoire", 'Croatia', 'Cuba',
       'Curacao', 'Cyprus', 'Czech Republic', 'Czechoslovakia',
       'Democratic Republic of Republic of the Congo', 'Denmark',
 

In [22]:
# Removing 

co2_country_df = co2_country_df[(co2_country_df.Entity != 'Africa') &
                                (co2_country_df.Entity != 'Americas (other)') &
                                (co2_country_df.Entity != 'Antarctic Fisheries') &
                                (co2_country_df.Entity != 'Asia and Pacific (other)') &
                                (co2_country_df.Entity != 'EU-28') &
                                (co2_country_df.Entity != 'Europe (other)') &
                                (co2_country_df.Entity != 'International transport') &
                                (co2_country_df.Entity != 'Statistical differences') &
                                (co2_country_df.Entity != 'World')]


In [13]:
co2_country_df.Entity.unique()
co2_country_df.head()

Unnamed: 0,Entity,Code,Year,Annual CO₂ emissions (tonnes )
0,Afghanistan,AFG,1949,14656.0
1,Afghanistan,AFG,1950,84272.0
2,Afghanistan,AFG,1951,91600.0
3,Afghanistan,AFG,1952,91600.0
4,Afghanistan,AFG,1953,106256.0


In [23]:
co2_country_df = co2_country_df.loc[(co2_country_df.Year >= 1991), :]
co2_country_df.head()

Unnamed: 0,Entity,Code,Year,Annual CO₂ emissions (tonnes )
42,Afghanistan,AFG,1991,2426486.16
43,Afghanistan,AFG,1992,1381521.84
44,Afghanistan,AFG,1993,1333837.11
45,Afghanistan,AFG,1994,1282488.38
46,Afghanistan,AFG,1995,1231139.65


In [37]:
co2_country_2016_df = co2_country_df.loc[(co2_country_df.Year == 2016), :]
co2_country_2016_df = co2_country_2016_df.rename(columns = {"Entity" : "Country"})

In [38]:
co2_country_2016_df.head(10)

Unnamed: 0,Country,Code,Year,Annual CO₂ emissions (tonnes )
67,Afghanistan,AFG,2016,12257910.0
419,Albania,ALB,2016,6272858.0
527,Algeria,DZA,2016,150378000.0
822,Andorra,AND,2016,511506.8
890,Angola,AGO,2016,35351000.0
918,Anguilla,AIA,2016,152072.2
1000,Antigua and Barbuda,ATG,2016,565396.8
1131,Argentina,ARG,2016,207064100.0
1190,Armenia,ARM,2016,5960813.0
1249,Aruba,ABW,2016,928030.6


In [40]:
co2_gdp_merge_df = co2_country_2016_df.merge(gdp_country_2016_df, how = 'inner', on = "Country")
co2_gdp_merge_df.head(10)

Unnamed: 0,Country,Code,Year,Annual CO₂ emissions (tonnes ),gdp_2016
0,Afghanistan,AFG,2016,12257910.0,1896.99252
1,Albania,ALB,2016,6272858.0,11868.17897
2,Algeria,DZA,2016,150378000.0,15036.36415
3,Angola,AGO,2016,35351000.0,6756.935074
4,Antigua and Barbuda,ATG,2016,565396.8,23670.30226
5,Argentina,ARG,2016,207064100.0,20130.40803
6,Armenia,ARM,2016,5960813.0,8808.572714
7,Aruba,ABW,2016,928030.6,38390.27165
8,Australia,AUS,2016,413369900.0,47305.88002
9,Austria,AUT,2016,67402080.0,51809.51363


In [None]:
lin_regress()

plt.title("Northern Hemisphere: Latitude vs. Temp (4/26/21)")
plt.xlabel("Latitude")
plt.ylabel("Temperature (F)")
plt.grid(True)
plt.savefig("output_data/north_lat_vs_temp.png")
plt.show()