# Course Data Analysis using Python and matplotlib.pyplot
Here we show how to do some analysis of Brandeis course data from 2004 to 2021 using matplotlib.pyplt to plot the results.

References:
https://matplotlib.org/stable/tutorials/introductory/usage.html#sphx-glr-tutorials-introductory-usage-py


In [None]:
import json
import csv
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np

# Reading in the data
We have a data set of all Brandeis courses from 2004-2021
Let's load it into a list of dictionaries and look at the first entry courses[0]

In [None]:
with open("data/courses.csv",'r') as csvfile:
    reader = csv.DictReader(csvfile)
    courses = [row for row in reader]
    csvfile.close()
print(len(courses))
courses[0]

# Clean the data
In this case, it will help us if we make the enrollments integers instead of strings

In [None]:
for c in courses:
    c['enr']=int(c['enr'])
courses[-1]

# Calculating enrollments by term
Next lets look at the terms and the calculate the enrollments for each term
which we can then plot in various ways

In [None]:
vals = sorted({(c['code'],c['term']) for c in courses})
codes = [t[0] for t in vals]
terms = [t[1] for t in vals]

print('len=',len(vals))
print('code/term tuples=',vals)
print('....')

print('codes = ',codes)
print('....')

print('terms = ',terms)
print('....')

enrolled_by_term = [sum(  [c['enr'] for c in courses if c['code']==t]  ) 
                    for t in codes]
print('number enrolled by term=',len(enrolled_by_term))
print('....')
print(enrolled_by_term)


# Object-Oriented Style of plotting
There are two approaches to using matplotlib to plot. 
The first is the object-oriented style shown below, where we explicitly create the figure and axes
and use their methods to do the plotting.

In [None]:
fig,ax = plt.subplots(figsize=(20,15))

ax.barh(terms, enrolled_by_term)
ax.set_xlabel("enrollments",fontsize=20)
ax.set_ylabel("term",fontsize=20)
ax.set_title("Enrollments by term",fontsize=32)
fig.savefig("images/enrollments_by_term.png")
'done'

# Pyplot approach
The other approach is to let pyplot create the figure and axes for you and to do everything with calls to pyplot methods. It is good to know both approaches so you can understand matplotlib code when you see it.

In [None]:

plt.figure(figsize=(20,15))
plt.barh(terms,enrolled_by_term)
plt.grid()
plt.title("enrollments by term Fall 2004 - Summer 2021",fontsize=16)
plt.savefig("images/enrollments by term")

# Enrollments by academic year
Next lets fold the fall, spring, and summer semesters together and plot by academic year!

In [None]:
def academic_year(term):
    ''' returns the academic year given the term, e.g. 1173, 1181, 1182 all go to 2017-18 '''
    year1 = int(term[1:-1])
    year2 = year1
    semester = term[-1]
    if (semester=="3"):
        year2 = year1+1
    else:
        year1 = year2-1
    return "20%02d-%02d"%(year1,year2)

academic_years = [academic_year(t) for t in codes]
print(academic_years)
academic_years = academic_years[0::3]
print(academic_years)

In [None]:

enrolled_by_year = [sum([int(c['enr']) for c in courses if academic_year(c['code'])==year]) for year in academic_years]
print(enrolled_by_year)

In [None]:
plt.figure(figsize=(20,10))
#plt.plot(range(2004,2021),enrolled_by_year)
plt.bar(range(2004,2021),enrolled_by_year)
plt.grid()
plt.xticks(range(2003,2021))
plt.axis([2003,2022,0,50000])
'done'