In [1]:
import numpy as np
zip('cat dog apple lion NYC love'.split(), np.random.rand(6))

<zip at 0x7fe119f7db40>

In [2]:
list(zip('cat dog apple lion NYC love'.split(), np.random.rand(6)))

[('cat', 0.7385134223129501),
 ('dog', 0.18660946591739314),
 ('apple', 0.19279064615127883),
 ('lion', 0.0886422493017307),
 ('NYC', 0.3194626604706511),
 ('love', 0.848490477522399)]

# 4.2 Latent Semantic Analysis

In [3]:
from nlpia.book.examples.ch04_catdog_lsa_3x6x16 import word_topic_vectors
word_topic_vectors.T.round(1)

100%|██████████| 263/263 [00:00<00:00, 261460.52it/s]


Unnamed: 0,cat,dog,apple,lion,nyc,love
top0,-0.6,-0.4,0.5,-0.3,0.4,-0.1
top1,-0.1,-0.3,-0.4,-0.1,0.1,0.8
top2,-0.3,0.8,-0.1,-0.5,0.0,0.1


In [4]:
from nlpia.book.examples.ch04_catdog_lsa_sorted import lsa_models, prettify_tdm
bow_svd, tfidf_svd = lsa_models()
prettify_tdm(**bow_svd)

100%|██████████| 263/263 [00:00<00:00, 446599.98it/s]
100%|██████████| 263/263 [00:00<00:00, 463878.03it/s]


Unnamed: 0,cat,dog,apple,lion,nyc,love,text
0,,,1.0,,1.0,,NYC is the Big Apple.
1,,,1.0,,1.0,,NYC is known as the Big Apple.
2,,,,,1.0,1.0,I love NYC!
3,,,1.0,,1.0,,I wore a hat to the Big Apple party in NYC.
4,,,1.0,,1.0,,Come to NYC. See the Big Apple!
5,,,1.0,,,,Manhattan is called the Big Apple.
6,1.0,,,,,,New York is a big city for a small cat.
7,1.0,,,1.0,,,"The lion, a big cat, is the king of the jungle."
8,1.0,,,,,1.0,I love my pet cat.
9,,,,,1.0,1.0,I love New York City (NYC).


In [5]:
tdm = bow_svd['tdm']
tdm

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
cat,0,0,0,0,0,0,1,1,1,0,1
dog,0,0,0,0,0,0,0,0,0,0,1
apple,1,1,0,1,1,1,0,0,0,0,0
lion,0,0,0,0,0,0,0,1,0,0,0
nyc,1,1,1,1,1,0,0,0,0,1,0
love,0,0,1,0,0,0,0,0,1,1,0


# Singular Value Decomposition

In [6]:
import numpy as np
U, s, Vt = np.linalg.svd(tdm)
import pandas as pd
pd.DataFrame(U, index=tdm.index).round(2)

Unnamed: 0,0,1,2,3,4,5
cat,-0.04,0.83,-0.38,-0.0,0.11,-0.38
dog,-0.0,0.21,-0.18,-0.71,-0.39,0.52
apple,-0.62,-0.21,-0.51,0.0,0.49,0.27
lion,-0.0,0.21,-0.18,0.71,-0.39,0.52
nyc,-0.75,0.0,0.24,-0.0,-0.52,-0.32
love,-0.22,0.42,0.69,0.0,0.41,0.37


In [7]:
s.round(1)

array([3.1, 2.2, 1.8, 1. , 0.8, 0.5])

In [8]:
S = np.zeros((len(U), len(Vt)))
S

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [9]:
err = []
for numdim in range(len(s), 0, -1):
    S[numdim-1, numdim-1] = 0
    reconstructed_tdm = U.dot(S).dot(Vt)
    err.append(np.sqrt(((reconstructed_tdm - tdm).values.flatten() ** 2).sum() / np.product(tdm.shape)))
np.array(err).round(2)

array([0.55, 0.55, 0.55, 0.55, 0.55, 0.55])

# 4.4 Principal Component Analysis

## Load SMS Messages

In [10]:
import pandas as pd
from nlpia.data.loaders import get_data
sms = get_data('sms-spam')
index = ['sms{}{}'.format(i, '!'*j)
        for (i,j) in zip(range(len(sms)), sms.spam)]
index

['sms0',
 'sms1',
 'sms2!',
 'sms3',
 'sms4',
 'sms5!',
 'sms6',
 'sms7',
 'sms8!',
 'sms9!',
 'sms10',
 'sms11!',
 'sms12!',
 'sms13',
 'sms14',
 'sms15!',
 'sms16',
 'sms17',
 'sms18',
 'sms19!',
 'sms20',
 'sms21',
 'sms22',
 'sms23',
 'sms24',
 'sms25',
 'sms26',
 'sms27',
 'sms28',
 'sms29',
 'sms30',
 'sms31',
 'sms32',
 'sms33',
 'sms34!',
 'sms35',
 'sms36',
 'sms37',
 'sms38',
 'sms39',
 'sms40',
 'sms41',
 'sms42!',
 'sms43',
 'sms44',
 'sms45',
 'sms46',
 'sms47',
 'sms48',
 'sms49',
 'sms50',
 'sms51',
 'sms52',
 'sms53',
 'sms54!',
 'sms55',
 'sms56!',
 'sms57',
 'sms58',
 'sms59',
 'sms60',
 'sms61',
 'sms62',
 'sms63',
 'sms64',
 'sms65!',
 'sms66',
 'sms67!',
 'sms68!',
 'sms69',
 'sms70',
 'sms71',
 'sms72',
 'sms73',
 'sms74',
 'sms75',
 'sms76',
 'sms77',
 'sms78',
 'sms79',
 'sms80',
 'sms81',
 'sms82',
 'sms83',
 'sms84',
 'sms85',
 'sms86',
 'sms87',
 'sms88',
 'sms89',
 'sms90',
 'sms91',
 'sms92',
 'sms93!',
 'sms94',
 'sms95!',
 'sms96',
 'sms97',
 'sms98',
 's

In [11]:
sms.index = index
sms.head(6)

Unnamed: 0,spam,text
sms0,0,"Go until jurong point, crazy.. Available only in bu..."
sms1,0,Ok lar... Joking wif u oni...
sms2!,1,Free entry in 2 a wkly comp to win FA Cup final tkt...
sms3,0,U dun say so early hor... U c already then say...
sms4,0,"Nah I don't think he goes to usf, he lives around h..."
sms5!,1,FreeMsg Hey there darling it's been 3 week's now an...


## Calculate TF-IDF vectors for each messages

In [12]:
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.tokenize.casual import casual_tokenize

tfidf = TfidfVectorizer(tokenizer=casual_tokenize)
tfidf_docs = tfidf.fit_transform(raw_documents=sms.text)
tfidf_docs

<4837x9232 sparse matrix of type '<class 'numpy.float64'>'
	with 82353 stored elements in Compressed Sparse Row format>

In [13]:
tfidf_docs = tfidf_docs.toarray()

In [14]:
len(tfidf.vocabulary_)

9232

In [15]:
tfidf_docs = pd.DataFrame(tfidf_docs)
tfidf_docs = tfidf_docs - tfidf_docs.mean()
tfidf_docs.shape

(4837, 9232)

In [16]:
sms.spam.sum()

638

### We have an imbalanced dataset and more features (words) than messages. Therefore, we are more likely to overfit and our spam filter will only dependent on spammy words being in the spammy messages. But the spammers could just use synonyms. 

In [17]:
from sklearn.decomposition import PCA
pca = PCA(n_components=16)
pca = pca.fit(tfidf_docs)
pca_topic_vectors = pca.transform(tfidf_docs)
columns = ['topic{}'.format(i) for i in range(pca.n_components)]
pca_topic_vectors = pd.DataFrame(pca_topic_vectors, columns=columns, index=index)

In [18]:
pca_topic_vectors.round(3).head(6)

Unnamed: 0,topic0,topic1,topic2,topic3,topic4,topic5,topic6,topic7,topic8,topic9,topic10,topic11,topic12,topic13,topic14,topic15
sms0,0.201,0.003,0.037,0.011,-0.019,-0.053,0.039,-0.065,0.012,-0.081,0.01,-0.009,-0.008,-0.03,-0.016,0.033
sms1,0.404,-0.094,-0.078,0.051,0.1,0.047,0.023,0.065,0.024,-0.024,-0.006,0.033,0.039,-0.031,0.054,-0.046
sms2!,-0.03,-0.048,0.09,-0.067,0.091,-0.043,-0.0,-0.002,-0.057,0.05,0.127,0.021,0.019,-0.019,-0.055,0.031
sms3,0.329,-0.033,-0.035,-0.016,0.052,0.056,-0.166,-0.073,0.063,-0.106,0.022,0.02,0.066,-0.068,0.019,-0.074
sms4,0.002,0.031,0.038,0.034,-0.075,-0.092,-0.044,0.062,-0.046,0.031,0.029,-0.01,0.029,0.029,-0.081,-0.017
sms5!,-0.016,0.059,0.014,-0.006,0.122,-0.04,0.005,0.168,-0.024,0.066,0.039,0.063,-0.01,0.071,0.011,0.03


In [19]:
pca.components_

array([[-7.11076313e-02,  8.18235007e-03, -1.21138177e-03, ...,
         5.71865954e-04,  5.71865954e-04,  5.71865954e-04],
       [ 6.35352949e-02,  7.64904390e-03,  2.67509525e-04, ...,
         1.02158841e-03,  1.02158841e-03,  1.02158841e-03],
       [ 7.08131424e-02,  2.68897828e-02,  1.32488772e-04, ...,
        -9.53033699e-04, -9.53033699e-04, -9.53033699e-04],
       ...,
       [ 1.65419479e-01, -3.86049644e-02,  2.68376203e-03, ...,
         2.07670044e-06,  2.07670044e-06,  2.07670044e-06],
       [-1.01381106e-02, -1.66832820e-02,  8.46081756e-04, ...,
         6.02090474e-04,  6.02090474e-04,  6.02090474e-04],
       [ 2.01244376e-02, -2.91443661e-02,  1.33792835e-04, ...,
        -4.04553954e-04, -4.04553954e-04, -4.04553954e-04]])

In [20]:
# TFIDFVectorizer stores the vocab as a dicgtionary that maps each term to an index number
tfidf.vocabulary_

{'go': 3807,
 'until': 8487,
 'jurong': 4675,
 'point': 6296,
 ',': 13,
 'crazy': 2549,
 '..': 21,
 'available': 1531,
 'only': 5910,
 'in': 4396,
 'bugis': 1973,
 'n': 5594,
 'great': 3894,
 'world': 8977,
 'la': 4811,
 'e': 3056,
 'buffet': 1971,
 '...': 25,
 'cine': 2277,
 'there': 8071,
 'got': 3855,
 'amore': 1296,
 'wat': 8736,
 'ok': 5874,
 'lar': 4848,
 'joking': 4642,
 'wif': 8875,
 'u': 8395,
 'oni': 5906,
 'free': 3604,
 'entry': 3195,
 '2': 471,
 'a': 1054,
 'wkly': 8933,
 'comp': 2386,
 'to': 8192,
 'win': 8890,
 'fa': 3328,
 'cup': 2608,
 'final': 3450,
 'tkts': 8180,
 '21st': 497,
 'may': 5272,
 '2005': 487,
 '.': 15,
 'text': 8020,
 '87121': 948,
 'receive': 6688,
 'question': 6574,
 '(': 9,
 'std': 7651,
 'txt': 8379,
 'rate': 6628,
 ')': 10,
 't': 7889,
 '&': 7,
 "c's": 2020,
 'apply': 1383,
 '08452810075': 115,
 'over': 6003,
 '18': 438,
 "'": 8,
 's': 6959,
 'dun': 3041,
 'say': 7034,
 'so': 7438,
 'early': 3069,
 'hor': 4207,
 'c': 2019,
 'already': 1268,
 'then': 

In [21]:
tfidf.vocabulary_.values()

dict_values([3807, 8487, 4675, 6296, 13, 2549, 21, 1531, 5910, 4396, 1973, 5594, 3894, 8977, 4811, 3056, 1971, 25, 2277, 8071, 3855, 1296, 8736, 5874, 4848, 4642, 8875, 8395, 5906, 3604, 3195, 471, 1054, 8933, 2386, 8192, 8890, 3328, 2608, 3450, 8180, 497, 5272, 487, 15, 8020, 948, 6688, 6574, 9, 7651, 8379, 6628, 10, 7889, 7, 2020, 1383, 115, 6003, 438, 8, 6959, 3041, 7034, 7438, 3069, 4207, 2019, 1268, 8065, 5606, 4311, 2948, 8092, 4048, 3819, 8537, 5004, 1435, 4104, 8111, 3613, 4116, 2666, 4535, 1693, 591, 8788, 5784, 1310, 5732, 8967, 1584, 0, 4312, 4954, 7454, 3677, 9158, 8489, 3552, 4533, 7674, 1037, 7955, 9097, 2230, 7127, 9216, 344, 6641, 3240, 5584, 1942, 4519, 5769, 7529, 8918, 5281, 8083, 8312, 1214, 6106, 1452, 6148, 9171, 6796, 5315, 5968, 5386, 5807, 8599, 4022, 7154, 2047, 1253, 2046, 6418, 11, 982, 2489, 3634, 8900, 8569, 5678, 2620, 4036, 7113, 6689, 986, 6450, 6851, 2283, 2038, 263, 2344, 4771, 8565, 384, 4226, 3965, 5441, 371, 5484, 5946, 5489, 6590, 3192, 8495, 8052

In [22]:
zip(tfidf.vocabulary_.values(), tfidf.vocabulary_.keys())

<zip at 0x7fe0dbf8d5f0>

In [23]:
sorted(zip(tfidf.vocabulary_.values(), tfidf.vocabulary_.keys()))

[(0, '!'),
 (1, '"'),
 (2, '#'),
 (3, '#150'),
 (4, '#5000'),
 (5, '$'),
 (6, '%'),
 (7, '&'),
 (8, "'"),
 (9, '('),
 (10, ')'),
 (11, '*'),
 (12, '+'),
 (13, ','),
 (14, '-'),
 (15, '.'),
 (16, '. .'),
 (17, '. . .'),
 (18, '. . . .'),
 (19, '. . . . .'),
 (20, '. ..'),
 (21, '..'),
 (22, '.. .'),
 (23, '.. . . .'),
 (24, '.. ... ...'),
 (25, '...'),
 (26, '... . . . .'),
 (27, '/'),
 (28, '0'),
 (29, '00'),
 (30, '00870405040'),
 (31, '0089'),
 (32, '01'),
 (33, '0121 2025050'),
 (34, '01223585236'),
 (35, '01223585334'),
 (36, '01256987'),
 (37, '02'),
 (38, '02/06'),
 (39, '02/09'),
 (40, '0207 153 9153'),
 (41, '0207 153 9996'),
 (42, '0207-083-6089'),
 (43, '02072069400'),
 (44, '02073162414'),
 (45, '02085076972'),
 (46, '03'),
 (47, '03530150'),
 (48, '04'),
 (49, '04/09'),
 (50, '05'),
 (51, '050703'),
 (52, '06'),
 (53, '06.05'),
 (54, '06/11'),
 (55, '07/11'),
 (56, '07008009200'),
 (57, '07046744435'),
 (58, '07090201529'),
 (59, '07090298926'),
 (60, '07099833605'),
 (61, 

In [24]:
column_nums, terms = zip(*sorted(zip(tfidf.vocabulary_.values(), tfidf.vocabulary_.keys())))

In [25]:
terms

('!',
 '"',
 '#',
 '#150',
 '#5000',
 '$',
 '%',
 '&',
 "'",
 '(',
 ')',
 '*',
 '+',
 ',',
 '-',
 '.',
 '. .',
 '. . .',
 '. . . .',
 '. . . . .',
 '. ..',
 '..',
 '.. .',
 '.. . . .',
 '.. ... ...',
 '...',
 '... . . . .',
 '/',
 '0',
 '00',
 '00870405040',
 '0089',
 '01',
 '0121 2025050',
 '01223585236',
 '01223585334',
 '01256987',
 '02',
 '02/06',
 '02/09',
 '0207 153 9153',
 '0207 153 9996',
 '0207-083-6089',
 '02072069400',
 '02073162414',
 '02085076972',
 '03',
 '03530150',
 '04',
 '04/09',
 '05',
 '050703',
 '06',
 '06.05',
 '06/11',
 '07/11',
 '07008009200',
 '07046744435',
 '07090201529',
 '07090298926',
 '07099833605',
 '07123456789',
 '07732584351',
 '07734396839',
 '07742676969',
 '07753741225',
 '0776xxxxxxx',
 '07786200117',
 '077xxx',
 '078',
 '07801543489',
 '07808',
 '07808247860',
 '07808726822',
 '07815296484',
 '07821230901',
 '078498',
 '07880867867',
 '0789xxxxxxx',
 '07946746291',
 '0796xxxxxx',
 '07973788240',
 '07xxxxxxxxx',
 '08',
 '0800',
 '0800 0721072',
 '

In [26]:
weights = pd.DataFrame(pca.components_, columns=terms, index=['topic{}'.format(i) for i in range(16)])
pd.options.display.max_columns = 8
weights.head(4).round(3)

Unnamed: 0,!,"""",#,#150,...,…,┾,〨ud,鈥
topic0,-0.071,0.008,-0.001,-0.0,...,-0.002,0.001,0.001,0.001
topic1,0.064,0.008,0.0,-0.0,...,0.003,0.001,0.001,0.001
topic2,0.071,0.027,0.0,0.001,...,0.002,-0.001,-0.001,-0.001
topic3,-0.059,-0.032,-0.001,-0.0,...,0.001,0.001,0.001,0.001


In [28]:
pd.options.display.max_columns = 12
deals = weights['! ;) :) half off free crazy deal only $ 80 %'.split()].round(3) * 100
deals

Unnamed: 0,!,;),:),half,off,free,crazy,deal,only,$,80,%
topic0,-7.1,0.1,-0.5,-0.0,-0.4,-2.0,-0.0,-0.1,-2.2,0.3,-0.0,-0.0
topic1,6.4,0.0,7.4,0.1,0.4,-2.3,-0.2,-0.1,-3.8,-0.1,-0.0,-0.2
topic2,7.1,0.2,-0.1,0.0,0.3,4.4,0.1,-0.1,0.7,0.0,0.0,0.1
topic3,-5.9,-0.3,-7.1,0.2,0.3,-0.2,0.0,0.1,-2.3,0.1,-0.1,-0.3
topic4,38.1,-0.1,-12.4,-0.1,-0.2,9.9,0.1,-0.2,3.0,0.3,0.1,-0.1
topic5,-26.5,0.1,-1.5,-0.3,-0.7,-1.4,-0.6,-0.2,-1.8,-0.9,0.0,-0.0
topic6,-10.9,-0.5,19.9,-0.4,-0.9,-0.6,-0.2,-0.1,-1.4,-0.0,-0.0,-0.1
topic7,16.3,0.1,-18.0,0.7,0.8,-2.9,0.0,0.0,-1.8,-0.3,0.0,-0.1
topic8,34.1,0.2,4.9,-0.4,-0.5,-0.0,-0.4,-0.4,3.1,-0.6,-0.0,-0.2
topic9,7.7,-0.3,16.4,1.5,-0.9,6.2,-0.5,-0.4,3.1,-0.5,-0.0,-0.0


In [29]:
deals.T.sum()

topic0    -11.9
topic1      7.6
topic2     12.7
topic3    -15.5
topic4     38.4
topic5    -33.8
topic6      4.8
topic7     -5.2
topic8     39.8
topic9     32.3
topic10   -28.5
topic11    50.2
topic12    29.3
topic13    38.2
topic14    21.7
topic15    -3.5
dtype: float64

In [30]:
from sklearn.decomposition import TruncatedSVD
svd = TruncatedSVD(n_components=16, n_iter=100)
svd_topic_vectors = svd.fit_transform(tfidf_docs.values)
svd_topic_vectors = pd.DataFrame(svd_topic_vectors, columns=columns, index=index)
svd_topic_vectors.round(3).head(6)

Unnamed: 0,topic0,topic1,topic2,topic3,topic4,topic5,...,topic10,topic11,topic12,topic13,topic14,topic15
sms0,0.201,0.003,0.037,0.011,-0.019,-0.053,...,0.007,-0.007,0.002,-0.036,-0.014,0.037
sms1,0.404,-0.094,-0.078,0.051,0.1,0.047,...,-0.004,0.036,0.043,-0.021,0.051,-0.042
sms2!,-0.03,-0.048,0.09,-0.067,0.091,-0.043,...,0.125,0.023,0.026,-0.02,-0.042,0.052
sms3,0.329,-0.033,-0.035,-0.016,0.052,0.056,...,0.022,0.023,0.073,-0.046,0.022,-0.07
sms4,0.002,0.031,0.038,0.034,-0.075,-0.093,...,0.028,-0.009,0.027,0.034,-0.083,-0.021
sms5!,-0.016,0.059,0.014,-0.006,0.122,-0.04,...,0.041,0.055,-0.037,0.075,-0.001,0.02


In [33]:
svd_topic_vectors = (svd_topic_vectors.T / np.linalg.norm(svd_topic_vectors, axis=1)).T
svd_topic_vectors.iloc[:10].dot(svd_topic_vectors.iloc[:10].T).round(1)

Unnamed: 0,sms0,sms1,sms2!,sms3,sms4,sms5!,sms6,sms7,sms8!,sms9!
sms0,1.0,0.6,-0.1,0.6,-0.0,-0.3,-0.3,-0.1,-0.3,-0.3
sms1,0.6,1.0,-0.2,0.8,-0.2,0.0,-0.2,-0.2,-0.1,-0.1
sms2!,-0.1,-0.2,1.0,-0.2,0.1,0.4,0.0,0.3,0.5,0.4
sms3,0.6,0.8,-0.2,1.0,-0.2,-0.3,-0.1,-0.3,-0.2,-0.1
sms4,-0.0,-0.2,0.1,-0.2,1.0,0.2,0.0,0.1,-0.4,-0.2
sms5!,-0.3,0.0,0.4,-0.3,0.2,1.0,-0.1,0.1,0.3,0.4
sms6,-0.3,-0.2,0.0,-0.1,0.0,-0.1,1.0,0.1,-0.2,-0.2
sms7,-0.1,-0.2,0.3,-0.3,0.1,0.1,0.1,1.0,0.1,0.4
sms8!,-0.3,-0.1,0.5,-0.2,-0.4,0.3,-0.2,0.1,1.0,0.3
sms9!,-0.3,-0.1,0.4,-0.1,-0.2,0.4,-0.2,0.4,0.3,1.0


In [34]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
LDA??

[0;31mInit signature:[0m
[0mLDA[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0msolver[0m[0;34m=[0m[0;34m'svd'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mshrinkage[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mpriors[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mn_components[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mstore_covariance[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mtol[0m[0;34m=[0m[0;36m0.0001[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mcovariance_estimator[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mSource:[0m        
[0;32mclass[0m [0mLinearDiscriminantAnalysis[0m[0;34m([0m[0mLinearClassifierMixin[0m[0;34m,[0m[0;34m[0m
[0;34m[0m                                 [0mTransformerMixin[0m[0;34m,[0m[0;34m[0m
[0;34m[0m                                 [0mBaseEstimat

In [35]:
np.random.random(2)

array([0.02435253, 0.48411019])