In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sympy import *
from scipy import stats
import math
import rpy2
from rpy2.robjects import r, pandas2ri

In [2]:
%matplotlib inline
init_printing(use_unicode=True)
pandas2ri.activate()

## 1.  rv_continuous 代表连续型随机变量，rv_discrete 代表离散型随机变量

In [3]:
print("连续型：", [k for k, v in stats.__dict__.items() if isinstance(v, stats.rv_continuous)], sep = "\t", end = "")
print("")
print("离散型：", [k for k, v in stats.__dict__.items() if isinstance(v, stats.rv_discrete)], sep = "\t", end = "")

连续型：	['ksone', 'kstwobign', 'norm', 'alpha', 'anglit', 'arcsine', 'beta', 'betaprime', 'bradford', 'burr', 'burr12', 'fisk', 'cauchy', 'chi', 'chi2', 'cosine', 'dgamma', 'dweibull', 'expon', 'exponnorm', 'exponweib', 'exponpow', 'fatiguelife', 'foldcauchy', 'f', 'foldnorm', 'weibull_min', 'weibull_max', 'frechet_r', 'frechet_l', 'genlogistic', 'genpareto', 'genexpon', 'genextreme', 'gamma', 'erlang', 'gengamma', 'genhalflogistic', 'gompertz', 'gumbel_r', 'gumbel_l', 'halfcauchy', 'halflogistic', 'halfnorm', 'hypsecant', 'gausshyper', 'invgamma', 'invgauss', 'invweibull', 'johnsonsb', 'johnsonsu', 'laplace', 'levy', 'levy_l', 'levy_stable', 'logistic', 'loggamma', 'loglaplace', 'lognorm', 'gilbrat', 'maxwell', 'mielke', 'kappa4', 'kappa3', 'nakagami', 'ncx2', 'ncf', 't', 'nct', 'pareto', 'lomax', 'pearson3', 'powerlaw', 'powerlognorm', 'powernorm', 'rdist', 'rayleigh', 'reciprocal', 'rice', 'recipinvgauss', 'semicircular', 'skewnorm', 'trapz', 'triang', 'truncexpon', 'truncnorm', 'tukey

In [28]:
X = stats.norm(loc = 1.0, scale = 2.0)    ##µ = 1，σ = 2
X.stats()

(array(1.), array(4.))

In [33]:
x = X.rvs(size = 10000)    #对随机变量取10000个值
print("对μ=1，σ=2的总体取样1w次得样本：{0}，均值：{1}，方差：{2}".format(x, np.mean(x), np.var(x)))    #期望值与方差
x.max(),x.min()

对μ=1，σ=2的总体取样1w次得样本：[ 0.51730975  1.55220676 -1.01756729 ...  1.08735124  0.655663
  3.17025961]，均值：0.9857030111139947，方差：3.911267164469408


(10.387076877953072, -7.617745525839489)

In [30]:
stats.norm.fit(x)

(1.0198506833318957, 2.0030892243669363)

In [31]:
pdf, t = np.histogram(x, bins = 100, normed = True)    #分为100个取样区间，pdf为对应区间频数
pdf, t

(array([0.00060542, 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.00121084, 0.        ,
        0.00060542, 0.00181626, 0.00060542, 0.00181626, 0.00060542,
        0.        , 0.00060542, 0.        , 0.00242168, 0.00665963,
        0.00181626, 0.00423795, 0.00544879, 0.01089758, 0.00787048,
        0.011503  , 0.01210842, 0.02179516, 0.02482227, 0.02300601,
        0.02603311, 0.03753612, 0.03693069, 0.03450901, 0.04782828,
        0.04903912, 0.06720176, 0.0684126 , 0.08960234, 0.08475897,
        0.10534329, 0.11503003, 0.1198734 , 0.12653304, 0.14166857,
        0.14530109, 0.16527999, 0.16043662, 0.18344263, 0.17799384,
        0.18586432, 0.18586432, 0.19434021, 0.188286  , 0.19494563,
        0.188286  , 0.21310827, 0.22097875, 0.18949684, 0.18404805,
        0.17315047, 0.15862036, 0.18525889, 0.14651194, 0.13803604,
        0.14287941, 0.12411135, 0.1168463 , 0.09020776, 0.10655414,
        0.07809934, 0.0768885 , 0.05388249, 0.06

## 绘制分布直方图

In [None]:
pdf, t = np.histogram(x, bins = 100, normed = True)
t = (t[])

## 混合R编程

In [4]:
type(r.data('iris'))

rpy2.robjects.vectors.StrVector

In [5]:
r['iris'].head()

Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
1,5.1,3.5,1.4,0.2,setosa
2,4.9,3.0,1.4,0.2,setosa
3,4.7,3.2,1.3,0.2,setosa
4,4.6,3.1,1.5,0.2,setosa
5,5.0,3.6,1.4,0.2,setosa


In [6]:
df = pd.DataFrame({'A': [1, 2, 3], 'B': [4, 5, 6], 'C':[7,8,9]},index=["one", "two", "three"])
df

Unnamed: 0,A,B,C
one,1,4,7
two,2,5,8
three,3,6,9


In [7]:
r_dataframe = pandas2ri.py2ri(df)
print(type(r_dataframe))
print(r_dataframe)

<class 'rpy2.robjects.vectors.DataFrame'>
      A B C
one   1 4 7
two   2 5 8
three 3 6 9



## 生成正态分布

In [3]:
rate = np.random.normal(3.7, 0.97, 21)
rate

array([5.44823344, 4.50984656, 4.09589632, 2.72015206, 3.81074024,
       3.70268298, 4.26975545, 3.59589388, 4.18839841, 2.87466325,
       2.8452787 , 4.42399088, 4.85951065, 4.06048702, 3.21268982,
       3.36034882, 2.94150103, 5.37665686, 3.73944429, 3.92773814,
       5.57644013])

In [None]:
x = np.linspace