# Scipy 
* Souse : <https://wizardforcel.gitbooks.io/python-quant-uqer/content/8.html>
* All Distribution : <https://docs.scipy.org/doc/scipy/reference/stats.html> (Random variables generating)
* More stats model : <http://www.statsmodels.org/stable/index.html>

In [2]:
import numpy as np
import scipy.stats as stats
import scipy.optimize as opt

## Statistics
* ###  Random variables generating

In [31]:
# uniform
rv_unif = stats.uniform.rvs(size=10)
print('Uniform distribution : {}'.format(rv_unif))

#beta
rv_beta = stats.beta.rvs(size=10, a=4, b=2)
print('Beta distribution : {}'.format(rv_beta))

Uniform distribution : [0.18136453 0.25266944 0.25088364 0.85272771 0.02037953 0.32904574
 0.25282822 0.52155707 0.01371629 0.58341721]
Beta distribution : [0.71734196 0.7298477  0.67172984 0.72825931 0.67197403 0.56086508
 0.64419362 0.74821534 0.9475687  0.54748015]


In [32]:
#set seed
np.random.seed(2001)
rv_unif = stats.uniform.rvs(size=10)
print('Uniform distribution : {}'.format(rv_unif))

Uniform distribution : [0.16712564 0.08099804 0.96022574 0.93845024 0.62085885 0.42560197
 0.68978129 0.24203462 0.36660064 0.14195103]


* ### Hypothesis Testing
* KS test : <https://www.itl.nist.gov/div898/handbook/eda/section3/eda35g.htm>

In [114]:
# normal distribution
norm_rv = stats.norm.rvs(size = 200,loc=0.5, scale=2)
n = norm_rv
mean = n.mean()
std = n.std()
print('mean : {0} ,  std : {1}'.format(mean,std))

# KS test
stat_val, p_val = stats.kstest(n,'norm',(0.5,2))
print('KS-statistic D = {0:10.10f} , p-value = {1:10.10f}'.format(stat_val,p_val))
    # normal distribution
    
# test mean is 0 or not t test
stat_val, p_val = stats.ttest_1samp(n, 0)
print('KS-statistic D = {0:10.10f} , p-value = {1:10.10f}'.format(stat_val,p_val))
    # the mean is not 0

# test two distribution
norm_rv2 = stats.norm.rvs(size = 200,loc=0.6, scale=2.5)
n2 = norm_rv
mean = n2.mean()
std = n2.std()

stat_val, p_val = stats.ttest_ind(n,n2,equal_var=True)
    # equal_var=False if sample size are different

mean : 0.5194843780984614 ,  std : 2.0309377247752662
KS-statistic D = 0.0443238843 , p-value = 0.8268858566
KS-statistic D = 3.6082982152 , p-value = 0.0003898700


* ### Quantile & Moment 
     No use in sample

In [141]:
# quantile
g_dist = stats.gamma(a=2)
print ("quantiles of 2, 4 and 5:")
print (g_dist.cdf([2, 4, 5]))
print ("Values of 25%, 50% and 90%:")
print (g_dist.pdf([0.25, 0.5, 0.95]))

quantiles of 2, 4 and 5:
[0.59399415 0.90842181 0.95957232]
Values of 25%, 50% and 90%:
[0.1947002  0.30326533 0.36740397]


In [172]:
# sample quantile
g_dist = stats.gamma.rvs(size = 20,a=2)
np.quantile(g_dist,0.5)

2.523441684133423

In [157]:
# moment
z = 4
stats.norm.moment(z,loc = 0,scale = 1)

3.0

* ### Describe & MLE estimator

In [170]:
n = stats.norm.rvs(size = 200,loc = 0, scale = 1)
inf = stats.describe(n)
print(inf)
mean,std = stats.norm.fit(n)
print('Mean in MLE : {} \nStd in MLE : {}'.format(mean,std))

DescribeResult(nobs=200, minmax=(-2.3254495411201828, 2.72689721881964), mean=0.042909875816325355, variance=0.8103611381600083, skewness=-0.018306509480757297, kurtosis=0.40421361891736884)
Mean in MLE : 0.042909875816325355 
Std in MLE : 0.8979472882464807


* ### Pearsonr & Spearmanr

In [5]:
norm_dist = stats.norm.rvs(size = 200,loc = 0,scale = 1)
expo_dist = stats.expon.rvs(size = 200)
corr , p_value = stats.pearsonr(norm_dist,expo_dist)
print("Pearsonr's :\nThe correlation of two distribution is {},\nThe p-value is {}.".format(corr,p_value))
corr , p_value = stats.spearmanr(norm_dist,expo_dist)
print("Spearmanr's :\nThe correlation of two distribution is {},\nThe p-value is {}.".format(corr,p_value))

Pearsonr's :
The correlation of two distribution is -0.0908752649858539,
The p-value is 0.2006279800281026.
Spearmanr's :
The correlation of two distribution is -0.053608340208505224,
The p-value is 0.4508932227085941.


* ### Regression

In [6]:
x = stats.chi2.rvs(3, size=50)
y = 2.5 + 1.2 * x + stats.norm.rvs(size=50, loc=0, scale=1.5)
stats.linregress(x, y)

LinregressResult(slope=1.180770628326481, intercept=2.812558012619495, rvalue=0.907712343740936, pvalue=9.88617160867286e-20, stderr=0.07878162503357605)

## Optimization Problem
### convex optimization
#### 1、Nelder-Mead method

In [16]:
# example
def rosen(x):
    """The Rosenbrock function"""
    return sum(100.0*(x[1:]-x[:-1]**2.0)**2.0 + (1-x[:-1])**2.0)

# Nelder-Mead method
x_0 = np.array([0.5, 1.6, 1.1, 0.8, 1.2])
res = opt.minimize(rosen, x_0, method='nelder-mead', options={'xtol': 1e-8, 'disp': True})
    # xtol表示迭代收斂的容忍誤差上界
    # disp : bool,Set to True to print convergence messages.
res

Optimization terminated successfully.
         Current function value: 0.000000
         Iterations: 436
         Function evaluations: 706


 final_simplex: (array([[1.        , 1.        , 1.        , 1.        , 1.        ],
       [1.        , 1.        , 1.        , 1.        , 1.        ],
       [1.        , 1.        , 1.        , 1.        , 0.99999999],
       [1.        , 1.        , 1.        , 1.        , 1.        ],
       [1.        , 1.        , 1.        , 1.        , 1.00000001],
       [1.        , 1.        , 1.        , 1.        , 1.00000001]]), array([1.66149699e-17, 6.32117429e-17, 7.44105349e-17, 8.24396866e-17,
       9.53208876e-17, 1.07882961e-16]))
           fun: 1.6614969876635003e-17
       message: 'Optimization terminated successfully.'
          nfev: 706
           nit: 436
        status: 0
       success: True
             x: array([1., 1., 1., 1., 1.])