HW 1 Solutions

Complete the NFL Ticket Price Analysis described at the end of the Lesson 1 Notebook.

In [None]:
%matplotlib inline
import csv
import requests
import numpy as np
import pandas as pd
from random import randint
from __future__ import division

In [None]:
r = requests.get('https://raw.githubusercontent.com/fivethirtyeight/data/master/nfl-ticket-prices/2014-average-ticket-price.csv')
data = [row for row in csv.reader(r.iter_lines())]

In [None]:
# Prune out unneeded rows. 
data = data[1:97]

In [None]:
data[0]

In [None]:
# Function to transform the game string into a list of home and away teams
def get_team_names(game_string):
    return game_string[:game_string.find("Tickets")].split(" at ") 


In [None]:
# List like: [[home team, away team, ticket price]
games_list = [get_team_names(r[0]) + [int(r[2])] for r in data]
print games_list[0]
print len(games_list)

In [None]:
# str.strip() removes extra whitespace
games_dicts = [{"away": r[0].strip(), 
               "home": r[1].strip(),
               "price": r[2]} for r in games_list]

In [None]:
games_dicts[0]

In [None]:
df = pd.DataFrame(games_dicts)

In [None]:
df.head()

In [None]:
df['price'].describe()

In [None]:
# Here's the tricky part. See: http://stackoverflow.com/questions/17071871/select-rows-from-a-dataframe-based-on-values-in-a-column-in-pandas
ravens_home_df = df.loc[df['home']=="Baltimore Ravens"]
ravens_home_df.head()

In [None]:
ravens_away_df = df.loc[df['away']=="Baltimore Ravens"]
ravens_away_df.head()

In [None]:
ravens_home_df.describe()

In [None]:
ravens_away_df.describe()

The average cost of a Ravens home game was \$157 and the average cost of a Ravens away game was \$122. The average cost of a NFL ticket in this dataset was \$135.

---

Complete the Coin Flip and Die-rolling simulations.

In [None]:
def flip_coin():
    return randint(0,1)
    

In [None]:
def flip_n_coins(n):
    tosses = [flip_coin() for x in range(n)]
    return {"heads": sum(tosses),
            "tails": len(tosses) - sum(tosses)}

In [None]:
flip_n_coins(10)

In [None]:
number_of_trials = 10000
trials = [flip_n_coins(4) for trial in range(number_of_trials)]
trials_df = pd.DataFrame(trials)
trials_df.head()

In [None]:
trials_df.hist()

In [None]:
grped_df = trials_df.groupby('heads').agg(np.size).reset_index().rename(columns={"heads": "heads_count", "tails": "occurances"})
grped_df

In [None]:
# probability of getting exactly 3 heads
grped_df['occurances'].loc[3]/number_of_trials

---

Complete the Matrix Addition and Multiplication Lab 

In [None]:
def matrix_vector_mult1(m, v):
    """
    basic function to multiply matrix with a vector
    prints each step for inspection.
    @param list m     matrix of m*n items
    @param list v     vector of n items
    """
    rows = len(m)
    w = [0]*rows
    print 'Result Vector Shape', w
    irange = range(len(v))
    print 'Irange,', irange
    sum = 0
    for j in range(rows):
        print 'iteration j ', j
        r = m[j]
        print 'row, ', r
        for i in irange:
            prod = r[i]*v[i]
            print r[i], '*', v[i], '=', prod
            sum += prod
        w[j],sum = sum,0
    return w

In [None]:
def dot(x, y):
    """
    dot product of a matrix row and a vector
    checks for equal length
    @param list x     matrix row n items
    @param list y     vector of n items
    """
    assert len(x) == len(y)
    return sum(itertools.starmap(operator.mul, itertools.izip(x, y)))

In [None]:
def matrix_vector_mult2(m, v):
    """
    advanced function to multiply a matrix with a vector
    @param list m     matrix of m*n items
    @param list v     vector of n items
    """
    return [dot(row, v) for row in m]

In [None]:
def matrix_mult1(a, b):
    """
    basic function to multiply two matrices
    @param list a     matrix of i*k items
    @param list b     matrix of k*j items
    """
    rows_a, cols_a = len(a), len(a[0])
    rows_b, cols_b = len(b), len(b[0])

    assert cols_a == rows_b

    # create the result matrix
    # Dimensions would be rows_a x cols_b
    c = [[0 for row in range(cols_b)] for col in range(rows_a)]

    for i in range(rows_a):
        for j in range(cols_b):
            for k in range(cols_a):
                c[i][j] += a[i][k]*b[k][j]
    return c

In [None]:
def matrix_mult2(a,b):
    """
    advanced function to multiply two matrices
    @param list a     matrix of i*k items
    @param list b     matrix of k*j items
    """
    zip_b = zip(*b)
    return [[sum(ele_a*ele_b for ele_a, ele_b in zip(row_a, col_b)) for col_b in zip_b] for row_a in a]

In [None]:
def transpose_matrix1(matrix):
    """
    verbose function to transpose matrix
    @param list matrix     matrix to be transposed
    """
    return [[row[i] for row in matrix] for i in range(len(matrix[0]))]

In [None]:
def transpose_matrix2(matrix):
    """
    concise function to transpose matrix
    @param list matrix     matrix to be transposed
    """
    return zip(*matrix)

In [None]:
def i_matrix(size):
    """
    @param int size     size of the matrix to generate
    """
    size = range(size)
    return [[ 1 if  x == y else 0 for x in size] for y in size]

In [None]:
def is_identity_matrix(matrix):
    """
    @param list matrix     matrix to be checked for identity
    """
    return all(val == (x == y)
        for y, row in enumerate(matrix)
            for x, val in enumerate(row))

In [None]:
# Commented version of the Matrix * Vector Multiplication

def matrix_vector_mult3(m, v):
    # Count the number of rows in the matrix     
    rows = len(m)
    # Prepare a vector to store the results, it will be the same length as the matrix has rows.
    result = [0]*rows
    # Count the number of colums in the matrix, it's the same as the length of the vector
    cols = len(v)
    # Matrix multiplication is about summing the item-by-item results of a matrix-row and vector multiplication
    # So initialise the counter to 0     
    sum = 0
    # Now comes the iteration logic. We multiple row for row with the vector, so we first iterate through the matrix' rows
    for row in range(rows):
        # 'row' isn't actually the row, but merely refers to the position of the row, so let's save the actual row.         
        r = m[row]
        # The second step of the iteration logic is to loop through all the items in both the matrix' row and the vector,
        # and to multiple them
        for col in range(cols):
            # with each iteration, the result of m[row][col] and v[col] is added to the sum
            # the sum is 'complete' when all items are iterated through.             
            sum = sum + r[col] * v[col]
        # Now we drop back to the outer loop, and store the result of the inner loop, that is the 'sum' into the right 
        # position in the result vector. The sum is reset to 0 and the next iteration of the outer loop is ready to begin.
        # If this is still cryptic, read up on multiple assignment in python.
        result[row], sum = sum, 0
    # Now that also the outer loop is finished, we can return the full result.
    return result