In [47]:
# Numy, the engine of scientific computing in Python
import numpy as np

# We'll demo a bit of Scipy
import scipy.special

In [2]:
np.array()

TypeError: Required argument 'object' (pos 1) not found

In [3]:
np.array([1,2,3,4])

array([1, 2, 3, 4])

In [12]:
my_arr = np.array([1,2,3,4])

In [7]:
my_arr

array([1, 2, 3, 4])

In [8]:
my_arr.dtype

dtype('int64')

In [13]:
my_arr.shape

(4,)

In [15]:
my_arr.astype(float)

array([1., 2., 3., 4.])

In [16]:
my_arr.mean()

2.5

In [17]:
my_arr.std()

1.118033988749895

In [18]:
np.sum(my_arr)

10

In [22]:
n = 10

np.zeros(n).astype(list)

array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0], dtype=object)

In [23]:
n = 10

np.zeros(n)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [26]:
my_arr = np.ones(n)

my_arr

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [27]:
my_arr * 5

array([5., 5., 5., 5., 5., 5., 5., 5., 5., 5.])

In [28]:
my_fives = my_arr * 5

In [29]:
my_fives

array([5., 5., 5., 5., 5., 5., 5., 5., 5., 5.])

In [30]:
#gives you an array of the same shape as the input array
np.zeros_like(my_fives)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [31]:
my_arr = np.array([1, 2, 3, 4, 5])

np.exp(my_arr)

array([  2.71828183,   7.3890561 ,  20.08553692,  54.59815003,
       148.4131591 ])

In [37]:
np.arange?

[0;31mDocstring:[0m
arange([start,] stop[, step,], dtype=None)

Return evenly spaced values within a given interval.

Values are generated within the half-open interval ``[start, stop)``
(in other words, the interval including `start` but excluding `stop`).
For integer arguments the function is equivalent to the Python built-in
`range <http://docs.python.org/lib/built-in-funcs.html>`_ function,
but returns an ndarray rather than a list.

When using a non-integer step, such as 0.1, the results will often not
be consistent.  It is better to use ``linspace`` for these cases.

Parameters
----------
start : number, optional
    Start of interval.  The interval includes this value.  The default
    start value is 0.
stop : number
    End of interval.  The interval does not include this value, except
    in some cases where `step` is not an integer and floating point
    round-off affects the length of `out`.
step : number, optional
    Spacing between values.  For any output `out`, this is

In [39]:
my_arr = np.arange(5)
my_arr

array([0, 1, 2, 3, 4])

In [52]:
my_arr = np.arange(1, 6)
my_arr

array([1, 2, 3, 4, 5])

In [43]:
np.dot(my_arr)

TypeError: Required argument 'b' (pos 2) not found

In [44]:
np.dot(my_arr, my_arr)

55

In [45]:
np.pi

3.141592653589793

In [53]:
# We'll demo a bit of Scipy, sometimes needs to import specific submodules 
#if we import scipy, it won't import special
#importing scipy.special won't import all of scipy
import scipy.special

In [55]:
#error function - cumulative 
#The error function is related to the cumulative distribution {\displaystyle \Phi } \Phi , the integral of the standard normal distribution
scipy.special.erf(my_arr)

array([0.84270079, 0.99532227, 0.99997791, 0.99999998, 1.        ])

In [56]:
# good stats program, has t tests, pearsons, anova?
import scipy.stats
    #this also works by taking in panda dfs, don't need to first put them into a np array

In [57]:
my_slow_arr = [0, 1, 2, 3, 4]

for value in my_slow_arr:
    print(value * 2)

0
2
4
6
8


In [59]:
my_fast_arr = np.array([0, 1, 2, 3, 4])
my_fast_arr * 2

array([0, 2, 4, 6, 8])

In [61]:
def multiply_python(arr):
    """Multiplies the array by two"""
    new_arr = np.zeros_like(arr)
    for i, value in enumerate(arr):
        new_arr[i] = value * 2
    return new_arr

In [62]:
my_arr = np.arange(1000)

In [63]:
%timeit multiply_python(my_arr)

310 µs ± 1.57 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [64]:
new_np_array = np.arange(1000)

In [65]:
%timeit new_np_array * 2

1.18 µs ± 2.08 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


Clearly, using numpy's higher level language works much, MUCH faster than python's lower level coding

This is because these big packages (numpy, scipy, pandas) are written in a compiled language like C, rather than an interpretive language like Python, so you're using Python interpretive language to interact with the compiled language packages, and it can then just work much faster