In [3]:
# Let's take a look at the files
import os
sorted(os.listdir('../input/collegefootballstatistics'))

In [4]:
# We have the root dir, then a bunch of sub folders by year
# Let's explore a particular year
sorted(os.listdir('../input/collegefootballstatistics/cfbstats-com-2013-1-5-20'))

In [5]:
def remove_csv(inp):
    return inp.replace('.csv', '')

In [6]:
# We want to construct a mapping year -> type -> fname
ROOT_DIR = '../input/collegefootballstatistics/'
dirs = filter(lambda i: '__' not in i, os.listdir(ROOT_DIR))
result = dict()
START_YEAR = 2005
for offset, basename in enumerate(sorted(dirs)): # Ignore MACOSX file
    year = START_YEAR + offset
    # Now, for each subkey, add paths
    current = result[year] = dict()
    sub_dir = os.path.join(ROOT_DIR, basename)
    for file in os.listdir(sub_dir):
        if file.rfind('.csv') > -1:
            current[remove_csv(file)] = os.path.join(sub_dir, file)

In [7]:
print(result[2006]['pass'])

In [8]:
import pandas as pd

In [9]:
# Now we have a way to look up stats and years
# Let's play around
# Let's print the keys we can work with again
result[2005].keys()

In [10]:
# Let's look at all kickoff returns for 2009
ret = pd.read_csv(result[2009]['kickoff-return'])

In [11]:
ret.head()

In [12]:
# Let's plot the distribution of yards here
%matplotlib inline
import seaborn as sns
sns.countplot(x=ret.Yards.name, data=ret)

In [13]:
ret.Yards.hist()

In [14]:
sns.distplot(ret.Yards)

In [15]:
ret.describe()

In [16]:
# Let's look at some more recent player data
players = pd.read_csv(result[2013]['player'])

In [17]:
players.head()

In [18]:
# Let's look at the class distribution
players.Class.hist()

In [19]:
import matplotlib.pyplot as plt

In [20]:
# And their height and weight
for k in ['Height', 'Weight']:
    ax = sns.distplot(players[k].dropna())
    ax.set(xlabel=k, ylabel='Ratio')

    plt.show()

In [21]:
# Let's examine some conferences to finish up
conf = pd.read_csv(result[2010]['conference'])
conf.head()

In [22]:
conf.shape

In [23]:
conf.Subdivision.unique()

In [24]:
# Let's examine distribution in FCS/FBS
conf.Subdivision.hist()