In [None]:
# Jupyter interactive notebook (Python v3.x)
#
# Downloads CO2 data from MG's GitHub repository
# Some exloratory data analysis, with comments
#
# For use with Software Carpentry Python module

In [None]:
import pycurl

In [None]:
def get_file(url,fname):
    print('Downloading %s' % fname)
    print('from %s' % url)
    c = pycurl.Curl()
    f = open(fname,'wb')
    c.setopt(pycurl.SSL_VERIFYPEER, 0)
    c.setopt(pycurl.SSL_VERIFYHOST, 0)
    c.setopt(c.URL, url)
    c.setopt(c.WRITEDATA, f)
    c.perform()
    responsecode = c.getinfo(c.RESPONSE_CODE)
    if responsecode == 200:
        print('- Status: OK')
        print('- Elapsed time: %f sec' % c.getinfo(c.TOTAL_TIME))
    else:
        print('- Status: ERROR, response code %d' % responsecode)
    c.close()
    print(' ')
    return

In [None]:
base_url = 'https://raw.githubusercontent.com/megarcia/SWC_Python/master/'

In [None]:
file_names = ['MaunaLoa_CO2_monthly_filled_1959-1975.csv',
              'MaunaLoa_CO2_monthly_filled_1976-2000.csv',
              'MaunaLoa_CO2_monthly_filled_2001-2015.csv']

In [None]:
for fname in file_names:
    url = base_url + fname
    get_file(url,fname)

In [None]:
import glob

In [None]:
filelist = glob.glob('MaunaLoa_*.csv')

In [None]:
len(filelist)

In [None]:
import numpy

In [None]:
data1 = numpy.loadtxt(fname=filelist[0],delimiter=',')

In [None]:
data1

In [None]:
data1.shape

In [None]:
data1_vals = data1[:,2]

In [None]:
data1_vals

In [None]:
%matplotlib inline

In [None]:
import matplotlib.pyplot

In [None]:
matplotlib.pyplot.plot(data1_vals)

In [None]:
# OK, that's the first file. How about the rest? One at a time, or loop it?

In [None]:
# Let's make a loop!

In [None]:
all_data = numpy.loadtxt(fname=filelist[0],delimiter=',')
for i in range(1,len(filelist)):
    new_data = numpy.loadtxt(fname=filelist[i],delimiter=',')
    all_data = numpy.append(all_data,new_data,axis=0)

In [None]:
all_data

In [None]:
data_shape = numpy.shape(all_data)

In [None]:
data_shape

In [None]:
nyears = data_shape[0] / 12

In [None]:
nyears

In [None]:
CO2_vals = all_data[:,2]

In [None]:
numpy.shape(CO2_vals)

In [None]:
nmonths = numpy.shape(CO2_vals)[0]

In [None]:
CO2_vals

In [None]:
x = numpy.linspace(1959,2016,nmonths)

In [None]:
matplotlib.pyplot.plot(x, CO2_vals)

In [None]:
# It's the Keeling Curve! 
# See http://scrippsco2.ucsd.edu/
# and http://en.wikipedia.org/wiki/Keeling_Curve

In [None]:
# There are two things about this plot that we'll explore further:
# 1. the seasonal variation
# 2. the annual trend

In [None]:
# Instead of finding a new dataset in the right shape, we can use what 
# we already have!

In [None]:
data_arr = CO2_vals.reshape(nyears,12)

In [None]:
numpy.shape(data_arr)

In [None]:
seasonal = numpy.mean(data_arr, axis=0)

In [None]:
seasonal

In [None]:
x = numpy.linspace(1,12,12)

In [None]:
matplotlib.pyplot.plot(x, seasonal)

In [None]:
# These plot values inclued the mean over all of the years. We can subtract 
# that out to get an idea of the variation within any single year.

In [None]:
mean = numpy.mean(seasonal)

In [None]:
seasonal = seasonal - mean

In [None]:
matplotlib.pyplot.plot(x, seasonal)

In [None]:
# Notice the CO2 concentration decreases between late Spring and early Autumn 
# (in the Northern Hemisphere). There is more land area in the NH, and all 
# that plant growth is drawing CO2 from the atmosphere. At other times, plant 
# respiration, cement production, and fossil fuels cause a net increase in CO2 
# concentration.
#
# The Earth breathes!
#
# Watch: http://1.bp.blogspot.com/-LemiCA8B_H4/UfLN63QLXdI/AAAAAAAACyM/Xc3HtckubEg/s640/Animated.gif

In [None]:
annual = numpy.mean(data_arr, axis=1)

In [None]:
annual

In [None]:
x = numpy.linspace(1959,2015,nyears)

In [None]:
matplotlib.pyplot.plot(x, annual)

In [None]:
# Notice there are a couple of times when the trend gets shallow:
# early 1970s: Inflation, oil crisis (possibly)
# early 1990s: Mount Pinatubo eruption in 1991 
#
# Notice also the most recent milestone: graph crosses 400 ppm in 2015!