# MFE 9815 - Software Engineering in Finance

## Fall 2016 - Python Class - Part 4

### Alain Ledon

# Introduction to Numpy

[http://numpy.scipy.org/](http://numpy.scipy.org/)

* A powerful N-dimensional array object (ndarray)
* Sophisticated (broadcasting) functions
* Tools for integrating C/C++ and Fortran code
* Useful linear algebra, Fourier transform, and random number capabilities

# NumPy Examples

## Creating vectors (ndarrays) and matrices:

In [None]:
%matplotlib inline

In [None]:
import numpy as np
import numpy as alain
alain.version.version

In [None]:
np.arange?

In [None]:
x = np.arange(10)
type(x), x

In [None]:
np.arange(0,10,.5)

In [None]:
x[5]

In [None]:
len(x)

In [None]:
x

In [None]:
y =x.reshape((2,5))

In [None]:
y

In [None]:
y[1,0]

In [None]:
np.zeros((3,3))

In [None]:
np.ones((3,2))

In [None]:
np.empty((2,2))

In [None]:
np.eye(4)

# NumPy Examples (cont.)

## Creating random vectors (ndarrays) and matrices:

In [None]:
np.random?

In [None]:
np.random.randn(4)

In [None]:
np.random.randn(2,2)

In [None]:
np.random.randint?

In [None]:
for i in xrange(5):
    print np.random.randint(10)

# NumPy Operations

In [None]:
np.arange(5)

In [None]:
np.arange(5)**2

In [None]:
a = np.arange(4).reshape(2,2)
a

In [None]:
b = np.ones((2,2))
b

In [None]:
a * b

In [None]:
np.dot(a, b)

In [None]:
a = np.arange(4).reshape(2,2)
a

In [None]:
a *= 2
a

In [None]:
b + a

In [None]:
np.linspace?

In [None]:
np.arange(0, np.pi+1, np.pi/2.)

In [None]:
b = np.linspace(0, np.pi, 3)
b

In [None]:
print a
a[1, 1]

In [None]:
x = a[:, 1]
print x.shape
x

In [None]:
print x.reshape((2,1))
print x.transpose()

In [None]:
a[0, :]

In [None]:
a = np.arange(16)
a

In [None]:
a.reshape(2,8)

In [None]:
b = a.reshape(4,4)
b

In [None]:
c = b
c[0,0] = 32
c

In [None]:
b

In [None]:
a

In [None]:
a1 = np.arange(16).reshape(4,4)
c1 = a1.copy()
c1[0,0] = 32
print a1
print c1

In [None]:
c.transpose()

In [None]:
print c
print c[1:3]

In [None]:
k = c > 7
print c
print k

In [None]:
c[k]

# NumPy Linear Algebra Operations

In [None]:
import numpy.linalg as npl

In [None]:
npl?

In [None]:
c = np.array([1, 0, -2, 4, 1, 0, 1, 1, 7]).reshape(3, 3)
c

In [None]:
npl.inv(c)

In [None]:
np.dot(c, npl.inv(c))

In [None]:
b = c[:3,:3]
b

In [None]:
npl.solve?

In [None]:
a, b = npl.eig(b)
print "a = {0}".format(a)
print "b = {0}".format(b)

In [None]:
y = np.array([3, 5, 6])
npl.solve(c, y)

# NumPy References

* [http://www.scipy.org/Tentative_NumPy_Tutorial](http://www.scipy.org/Tentative_NumPy_Tutorial)
* [http://wiki.scipy.org/Cookbook](http://wiki.scipy.org/Cookbook)
* [http://wiki.scipy.org/NumPy_for_Matlab_Users](http://wiki.scipy.org/NumPy_for_Matlab_Users)
* [http://mathesaurus.sourceforge.net/](http://mathesaurus.sourceforge.net/)

# Introduction to SciPy

* SciPy is a collection of mathematical algorithms and convenience functions built on the Numpy extension for Python. 
* SciPy adds significant power to the interactive Python session by exposing the user to high-level commands and classes for the manipulation and visualization of data. 
* IPython with Numpy, SciPy, Scikits and Pandas becomes a data-processing and system-prototyping environment rivaling sytems such as MATLAB, IDL, Octave, R-Lab, and SciLab.

You install SciPy with *apt-get* or *pip*

    [~] sudo apt-get install python-scipy

or

    [~] sudo pip install scipy

# SciPy Examples - Linear Algebra

[http://docs.scipy.org/doc/scipy/reference/tutorial/linalg.html](http://docs.scipy.org/doc/scipy/reference/tutorial/linalg.html)

In [None]:
import scipy as sp
from scipy import linalg
sp.version.version

In [None]:
print c
C = sp.mat(c)
C

In [None]:
print npl.inv(c)
print "-----------------"
print linalg.inv(c)

In [None]:
linalg.inv(C)

In [None]:
la,v = linalg.eig(C)
la, v

# SciPy Examples - Statistics

[http://docs.scipy.org/doc/scipy/reference/tutorial/stats.html](http://docs.scipy.org/doc/scipy/reference/tutorial/stats.html)

In [None]:
from scipy import stats

In [None]:
discrete = [d for d in dir(stats) if isinstance(getattr(stats,d), stats.rv_discrete)]
discrete

In [None]:
continu = [d for d in dir(stats) if isinstance(getattr(stats,d), stats.rv_continuous)]
continu

In [None]:
stats.norm?

In [None]:
np.random.seed(12345)
stdnorm = stats.norm(0, 1)
stdnorm

In [None]:
stdnorm.rvs?

In [None]:
stdsample = stdnorm.rvs(20)
stdsample

In [None]:
stdsample.var(), stdsample.mean(), stdsample.std()

In [None]:
stdsample = stdnorm.rvs(1000000)
stdsample.var(), stdsample.mean(), stdsample.std()

# SciPy Examples - Interpolation

[http://docs.scipy.org/doc/scipy/reference/tutorial/interpolate.html](http://docs.scipy.org/doc/scipy/reference/tutorial/interpolate.html)

In [None]:
from scipy.interpolate import interp1d

In [None]:
interp1d?

In [None]:
x = np.linspace(0, 10, 10)
y = np.exp(-x/3.0)
f = interp1d(x, y)
f2 = interp1d(x, y, kind='cubic')
xnew = np.linspace(0, 10, 40)

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.plot(x,y,'o',xnew,f(xnew),'-', xnew, f2(xnew),'--')
plt.legend(['data', 'linear', 'cubic'], loc='best')

In [None]:
plt.plot(x,y,'o', xnew, f2(xnew),'--')

In [None]:
from scipy import interpolate
x = np.arange(0,2*np.pi+np.pi/4,2*np.pi/8)
y = np.sin(x)
tck = interpolate.splrep(x,y,s=0)
xnew = np.arange(0,2*np.pi,np.pi/50)
ynew = interpolate.splev(xnew,tck,der=0)
plt.plot(x,y,'x',xnew,ynew,xnew,np.sin(xnew),x,y,'b')
plt.legend(['Linear','Cubic Spline', 'True'])
plt.axis([-0.05,6.33,-1.05,1.05])
plt.title('Cubic-spline interpolation')

# SciPy - Other Packages

* [Optimization](http://docs.scipy.org/doc/scipy-0.12.0/reference/tutorial/optimize.html)
* [Integration](http://docs.scipy.org/doc/scipy-0.12.0/reference/tutorial/integrate.html)
* [Signal Processing](http://docs.scipy.org/doc/scipy-0.12.0/reference/tutorial/signal.html)
* [Fourier Transform](http://docs.scipy.org/doc/scipy-0.12.0/reference/tutorial/fftpack.html)
* [more...](http://docs.scipy.org/doc/scipy-0.12.0/reference/)

# Scipy References

- [http://docs.scipy.org/doc/](http://docs.scipy.org/doc/)
- [http://docs.scipy.org/doc/scipy/reference/tutorial/index.html](http://docs.scipy.org/doc/scipy/reference/tutorial/index.html)
- [http://www.scipy.org/Cookbook](http://www.scipy.org/Cookbook)

# Introduction to matplotlib

* **matplotlib** is a python 2D plotting library which produces publication quality figures in a variety of hardcopy formats and interactive environments across platforms
* **matplotlib** can be used in python scripts, the python and ipython shell (ala MATLAB® or Mathematica®), web application servers, and six graphical user interface toolkits.
* It's fully integrated with **numpy**, **scipy** and **pandas**

[http://matplotlib.org/](http://matplotlib.org/)

In [None]:
x = np.random.randn(10000)
plt.hist(x, 100)

In [None]:
mu = 0
sigma = 1
x = mu + sigma * np.random.randn(10000)
# the histogram of the data
n, bins, patches = plt.hist(x, 50, normed=1, facecolor='g', alpha=0.75)
plt.xlabel('Smarts')
plt.ylabel('Probability')
plt.title('Histogram of IQ')
plt.text(1, .33, r'$\mu={0},\ \sigma={1}$'.format(mu, sigma), fontsize=18)
plt.grid(True)

In [None]:
plt.text?

# Introduction to pandas

* Python Data Analysis library
* Modeled after R dataframes but with lot of capabilities (and python)
* [Python for Data Analysis](http://shop.oreilly.com/product/0636920023784.do)
* [http://pandas.pydata.org/](http://pandas.pydata.org/)


In [None]:
import pandas as pd
import numpy as np

In [None]:
# Create a dataframe - It can handle numpy arrays, lists, dictionaries
my_df = pd.DataFrame([1,2,3])
my_df

In [None]:
# lets add some columns
cols = ['colA', 'colB']
idx = pd.Index(list('name'), name='index_name')
data = np.random.normal(0, 1., (4, 2))
df = pd.DataFrame(data, columns=cols, index=idx)
df

In [None]:
df.colA

In [None]:
df['colA']

In [None]:
df

In [None]:
df[df.colA > 0]

In [None]:
# write to file .to_csv(...), .to_excel(...), .to_html(...), to_string(...)
df.to_csv('mydf.csv')

In [None]:
df.to_excel('mydf.xlsx')

In [None]:
df

In [None]:
mydf = pd.read_csv('mydf.csv', index_col='index_name')
mydf

In [None]:
# data manipulation
dim = (10, 3)
df = pd.DataFrame(np.random.normal(0, 1, dim), columns=['one', 'two', 'three'])
df

In [None]:
# Select the first two rows of the column named 'one'.
col1_2rows = df['one'][:2]
type(col1_2rows), col1_2rows

In [None]:
# array of column names instead of 'one'.
df[['one', 'two']][:2]

In [None]:
# negative indexes
df[['one', 'two']][-3:-2]

In [None]:
# df[column names][rows]
print df["two"][:3]
df.head()

In [None]:
df[["two","one"]][:3]

In [None]:
# direct access
df.ix[0:2, 0:2]

In [None]:
pd.DataFrame?

In [None]:
# Create from dict
d = {'normOne': np.random.normal(100, 5, 100),
        'NormTwo': np.random.normal(50, 5, 100),
        'NormA': np.random.choice(['a', 'b', 'c'], 100),
        'NormB': np.random.choice(['e', 'f', 'g'], 100)}
df = pd.DataFrame(d)

In [None]:
df[3:10]

In [None]:
# Pass a Boolean value to the DataFrame to select only those rows that evaluate to True
newdf = df[df.NormA == 'a'][:5]

In [None]:
newdf

In [None]:
# Similar but using a mask
mask = np.logical_and(df.NormA=='a', df.NormB=='e')
df[mask][:5]

In [None]:
df[(df.NormA == 'a') & (df.NormB == 'e')][:5]

In [None]:
# only a's and e's in NormA and NormB columns
a_e = ['a', 'e']
NormA_a_e = df[df.NormA.isin(a_e)]
only_a_e = NormA_a_e[NormA_a_e.NormB.isin(a_e)]
only_a_e

In [None]:
df.reindex?

In [None]:
df["doubleNormTwo"] = df.NormTwo * 2
df["OnePlusTwo"] = df["normOne"] + df["NormTwo"]
df.head()

In [None]:
dfnumbers = df[["NormTwo", "normOne", "doubleNormTwo", "OnePlusTwo"]]
dfnumbers.head()

In [None]:
dfnumbers.cov()

In [None]:
dfnumbers.corr()

In [None]:
QUANDL_KEY = ""
import Quandl

In [None]:
Quandl.get?

In [None]:
aapl_data = Quandl.get("WIKI/AAPL", trim_start="2013-09-09", trim_end="2014-09-09", authtoken=QUANDL_KEY)
aapl_data.tail()

In [None]:
aapl_data.shape

In [None]:
stock_tickers = ["GOOG/NASDAQ_FB.4", "GOOG/NYSE_BAC.4", "GOOG/NYSE_XOM.4", "GOOG/NYSE_JPM.4", "GOOG/AAPL.4"]
portfolio = Quandl.get(stock_tickers, trim_start="2013-09-09", trim_end="2014-09-09", authtoken=QUANDL_KEY)
portfolio.columns = ["FB", "BAC", "XOM", "JPM", "AAPL"]

In [None]:
portfolio.tail()

In [None]:
#####################
!head -n 10 ~/Baruch/names/yob1880.txt