In [None]:
# Jupyter interactive notebook (Python v3.x)
#
# Downloads CO2 data from MG's GitHub repository
# Some exloratory data analysis, with comments
#
# For use with Software Carpentry Python module

In [None]:
import pycurl

In [None]:
def get_file(url,fname):
    print('Downloading %s' % fname)
    print('from %s' % url)
    c = pycurl.Curl()
    f = open(fname,'wb')
    c.setopt(pycurl.SSL_VERIFYPEER, 0)
    c.setopt(pycurl.SSL_VERIFYHOST, 0)
    c.setopt(c.URL, url)
    c.setopt(c.WRITEDATA, f)
    c.perform()
    responsecode = c.getinfo(c.RESPONSE_CODE)
    if responsecode == 200:
        print('- Status: OK')
        print('- Elapsed time: %f sec' % c.getinfo(c.TOTAL_TIME))
    else:
        print('- Status: ERROR, response code %d' % responsecode)
    c.close()
    print(' ')
    return

In [None]:
base_url = 'https://raw.githubusercontent.com/megarcia/SWC_Python/master/'

In [None]:
file_names = ['MaunaLoa_CO2_monthly_filled_1959-1975.csv',
              'MaunaLoa_CO2_monthly_filled_1976-2000.csv',
              'MaunaLoa_CO2_monthly_filled_2001-2015.csv']

In [None]:
for fname in file_names:
    url = base_url + fname
    get_file(url,fname)

In [None]:
# Portion given to students before lesson ends here
#
# Step through remainder with students

In [None]:
import glob

In [None]:
# Get the file names and put them in a list variable

In [None]:
filelist = glob.glob('MaunaLoa_*.csv')

In [None]:
# How many files do we have?

In [None]:
len(filelist)

In [None]:
# Look at the list

In [None]:
filelist

In [None]:
# Note: no alias here, but typically "import numpy as np" is used

In [None]:
import numpy

In [None]:
# There are several ways to get CSV data into Python, this is one of the easiest

In [None]:
data1 = numpy.loadtxt(fname=filelist[0],delimiter=',')

In [None]:
# Note that this syntax works for lots of data files
#     For space-delimited, use "delimiter=' '"
#     For tab-delimited, use "delimiter='\t'"

In [None]:
# Look at the loaded data

In [None]:
data1

In [None]:
# Get the dimensions of the array, and note that time is axis 0!

In [None]:
numpy.shape(data1)

In [None]:
# Isolate and extract the values of interest

In [None]:
data1_vals = data1[:,2]

In [None]:
# Look at the single-variable time series

In [None]:
data1_vals

In [None]:
# Magic function to show plots here in the notebook, instead of a pop-up window

In [None]:
%matplotlib inline

In [None]:
# Note: no alias here, but typically "import matplotlib.pyplot as plt" is used

In [None]:
import matplotlib.pyplot

In [None]:
# Make a simple (exploratory) line plot of our time series

In [None]:
matplotlib.pyplot.plot(data1_vals)

In [None]:
# There are ways to prettify the plot; most are easier when we run a command-line script
#     Add axis specs and labels
#     Add title and legend
#     Make multiple-plot figures
#     Add annotations

In [None]:
# OK, that's the first file. How about the rest? One at a time, or loop it?
#
# Sometimes we have tons of files, so let's make a loop!

In [None]:
# Since we already have the original data array from the 1st file still in memory, use that
all_data = np.copy(data1) 
#
# Now loop through the remainder of the files and append them to the existing array
for filename in filelist[1:]:
    new_data = numpy.loadtxt(fname=filename,delimiter=',')
    all_data = numpy.append(all_data,new_data,axis=0)

In [None]:
# Look at the loaded data

In [None]:
all_data

In [None]:
# Get the dimensions of the array

In [None]:
data_shape = numpy.shape(all_data)

In [None]:
data_shape

In [None]:
# Recall that the data is monthly, so how many years do we have?

In [None]:
nyears = data_shape[0] / 12

In [None]:
nyears

In [None]:
# Isolate and extract the values of interest

In [None]:
CO2_vals = all_data[:,2]

In [None]:
# Check the dimensions of our variable time series

In [None]:
numpy.shape(CO2_vals)

In [None]:
nmonths = numpy.shape(CO2_vals)[0]

In [None]:
# Look at the single-variable time series

In [None]:
CO2_vals

In [None]:
# From looking at the full data array, we know that the time series
# starts in 1959 and ends at the end of 2015. For plotting, it would 
# be nice to put those dates on the x-axis. The function to make those
# x-axis locations is stored in a variable for passing to the plot.
#
# Note that the length of this array must match the length of our time 
# series variable array.

In [None]:
x = numpy.linspace(1959,2016,nmonths)

In [None]:
# Make a slightly less-simple line plot of our time series, now with dates

In [None]:
matplotlib.pyplot.plot(x, CO2_vals)

In [None]:
# It's the Keeling Curve! 
#
# See http://scrippsco2.ucsd.edu/
# and http://en.wikipedia.org/wiki/Keeling_Curve
#
# There are two things about this plot that we'll explore further:
# 1. the seasonal variation
# 2. the annual trend
#
# Instead of finding a new dataset in the right shape, we can use what 
# we already have and "reshape" it. 
#
# We want an array that has each year in a row and each month in a column. 
# You'll see why in a few more lines.

In [None]:
data_arr = CO2_vals.reshape(nyears,12)

In [None]:
# Check the resulting array shape to see that it's right

In [None]:
numpy.shape(data_arr)

In [None]:
# To look at the seasonal variation, we want the mean values by month
# over all years. That means that we're averaging over axis 0.

In [None]:
seasonal = numpy.mean(data_arr, axis=0)

In [None]:
# Look at the result to see that it's what we wanted

In [None]:
seasonal

In [None]:
# Create an array for the months on the x-axis

In [None]:
x = numpy.linspace(1,12,12)

In [None]:
# Make a plot of our monthly averages

In [None]:
matplotlib.pyplot.plot(x, seasonal)

In [None]:
# These plot values inclued the mean over all of the years. We can subtract 
# that out to get an idea of the variation within any single year.

In [None]:
mean = numpy.mean(seasonal)

In [None]:
# Basic math operations on arrays proceed element-wise

In [None]:
seasonal = seasonal - mean

In [None]:
# Plot the adjusted values

In [None]:
matplotlib.pyplot.plot(x, seasonal)

In [None]:
# Notice the CO2 concentration decreases between late Spring and early Autumn 
# (in the Northern Hemisphere). There is more land area in the NH, and all 
# that plant growth is drawing CO2 from the atmosphere. At other times, plant 
# respiration, cement production, and fossil fuels cause a net increase in CO2 
# concentration.
#
# The Earth breathes!
#
# Watch: http://1.bp.blogspot.com/-LemiCA8B_H4/UfLN63QLXdI/AAAAAAAACyM/Xc3HtckubEg/s640/Animated.gif
#
# To look at the annual variation, we want the mean values over all months
# for each year. That means that we can use the same data array, but average 
# over axis 1 this time.

In [None]:
annual = numpy.mean(data_arr, axis=1)

In [None]:
# Look at the result to see that it's what we wanted

In [None]:
annual

In [None]:
# Create an array for the years on the x-axis

In [None]:
x = numpy.linspace(1959,2015,nyears)

In [None]:
# Plot the annual mean values

In [None]:
matplotlib.pyplot.plot(x, annual)

In [None]:
# Notice there are a couple of times when the trend gets shallow:
# early 1970s: Inflation, oil crisis (possibly)
# early 1990s: Mount Pinatubo eruption in 1991 (cooler summers, less energy use)
#
# Notice also the most recent milestone: graph crosses 400 ppm in 2015!