# SciPy in Data Science

### single integration

In [1]:
from scipy.integrate import quad   # import scipy library

In [2]:
def integrateFunction(x):
    return x   # function returns the argument value

In [3]:
quad(integrateFunction,0,1)  # quad with limits set to 0 to 1

(0.5, 5.551115123125783e-15)

In [4]:
def integrateFunc(x,a,b):  # another functions with multiple arguments
    return x*a+b

In [5]:
a=3
b=2

In [6]:
quad(integrateFunc,0,1,args=(a,b))   # providing the desired arguments

(3.5, 3.885780586188048e-14)

### double integration

In [11]:
import scipy.integrate as integrate

In [12]:
def f(x,y):
    return x+y
integrate.dblquad(f,0,1,lambda x:0, lambda x:2)   # double integration

(3.0, 4.436070580899685e-14)

### optimization 
> Used to improve the performance of the system mathematically by fine tuning the process parameters

In [14]:
import numpy as np
from scipy import optimize

In [15]:
def f(x):
    return x**2 + 5*np.sin(x)   # define function for the x square plus 5 sin(x)

In [21]:
minimal_value = optimize.minimize(f,x0=0,method='bfgs',options={'disp':True})   # perform optimize minimize function
                                                                                # using bfgs method and options

Optimization terminated successfully.
         Current function value: -3.246394
         Iterations: 5
         Function evaluations: 12
         Gradient evaluations: 6


In [22]:
minimalValueWithoutOutput=optimize.minimize(f,x0=0,method='bfgs')  # perform optimize minimize function
minimalValueWithoutOutput                                          # using bfgs method and without options

      fun: -3.2463942726915387
 hess_inv: array([[0.15435217]])
      jac: array([-2.98023224e-08])
  message: 'Optimization terminated successfully.'
     nfev: 12
      nit: 5
     njev: 6
   status: 0
  success: True
        x: array([-1.11051052])

In [26]:
from scipy.optimize import root

In [27]:
def rootFunc(x):
    return x + 3.5* np.cos((x))   # define function

In [29]:
rootValue = root(rootFunc,0.3)   # pass x value in argument for root
rootValue       # function values and array values 

    fjac: array([[-1.]])
     fun: array([0.])
 message: 'The solution converged.'
    nfev: 14
     qtf: array([-8.32889313e-13])
       r: array([-4.28198145])
  status: 1
 success: True
       x: array([-1.21597614])

### Inverse of matrix

In [4]:
import numpy as np
from scipy import linalg

matrix = np.array([[10,3],[2,8]])
matrix

array([[10,  3],
       [ 2,  8]])

In [13]:
type(matrix)

numpy.ndarray

In [8]:
linalg.inv(matrix)   # inversing the elements of matrix

array([[ 0.10810811, -0.04054054],
       [-0.02702703,  0.13513514]])

### finding determinant

In [11]:
linalg.det(matrix)   # method to find determinant

74.0

### Solve linear functions

In [16]:
# Linear Equations:
# 2x + 3y + z = 21
# -x + 5y + 4z = 9
# 3x + 2y + 9z = 6

In [14]:
numArray = np.array([[2,3,1],[-1,5,4],[3,2,9]])
numArrayValue = np.array([21,9,6])

In [15]:
linalg.solve(numArray, numArrayValue)    # method to solve linear equations

array([ 4.95,  4.35, -1.95])

### Single value decomposition

In [17]:
matrix.shape

(2, 2)

In [18]:
linalg.svd(matrix)

(array([[-0.84330347, -0.53743768],
        [-0.53743768,  0.84330347]]),
 array([11.70646059,  6.32129579]),
 array([[-0.8121934 , -0.58338827],
        [-0.58338827,  0.8121934 ]]))

In [19]:
# (array([[-0.84330347, -0.53743768],
#         [-0.53743768,  0.84330347]]),    <- Unitary matrix

#  array([11.70646059,  6.32129579]),     <- Sigma or Square root of eigenvalues

#  array([[-0.8121934 , -0.58338827],
#         [-0.58338827,  0.8121934 ]]))   <- VH is values collected into unitary matrix

## Eigenvalues & EigenVector

In [25]:
# import the required libraries
import numpy as np
from scipy import linalg

In [27]:
# test_data matrix
test_rating_data = np.array([[5,8],[7,9]])
eigenValues, eigenVector = linalg.eig(test_rating_data)
first_eigen, second_eigen = eigenValues

In [28]:
# print eigenvalues
first_eigen, second_eigen

((-0.745966692414834+0j), (14.745966692414834+0j))

In [29]:
# print first eigenvector
eigenVector[:,0]

array([-0.81220939,  0.58336601])

In [30]:
# print second eigenvector
eigenVector[:,1]

array([-0.63447346, -0.77294465])

## Scipy sub-package - statistics

In [31]:
from scipy.stats import norm   # import norm for normal distribution 

In [35]:
norm.rvs(loc=0,scale=1,size=10)   # rvs for ransom variables

array([-2.241751  ,  0.49747882, -0.31184421, -1.97855208,  0.1268317 ,
        1.24429959,  0.73440685,  1.04435889,  1.02582785, -1.07049541])

In [37]:
norm.cdf(5,loc=1,scale=2)   # cds for Commulative Distribution Function

0.9772498680518208

In [38]:
norm.pdf(9,loc=0,scale=1)   # pdf for Probability Density Function for random distribution

1.0279773571668917e-18