* Code in this file is intended to show:
    1. how to generate random matrix with a desired rank
    2. how to sample random linear combinations of a matrix's columns
    3. that randomly sampled linear combinations of a matrix's columns are very likely to be linearly independent
* Example Snippets are for syntax tests. You can ignore them.

In [2]:
import pandas as pandas
import random as rd
import numpy as np

# Generate Random Matrix 10,000 * 10,000

* **A = UPV**
    * U: m×m invertible

    * V: n×n invertible and

    * P: the m×n matrix with rank k ones on the diagonal

    -Reference:
https://math.stackexchange.com/questions/757671/generate-some-random-matrix-with-given-rank

In [6]:
#Generating randomly gives a high chance of making U and V invertible (full rank)
#Set rank of P to be 50 by changing code to k = 50. This means we want our result A to have rank 50.

#Below code is modified for a quick check. Below Generates 2000x2000 matrix with rank 20.
#If you desire to generate 10,000x10,000 sized matrix, modify m and n by corresponding values with desired rank.
m = 2000
n = 2000
k = 20
randomRange = 100
randU = np.zeros((m,m))
for i in list(range(0,m)):
    randU[i] = np.random.choice(randomRange,m) #m*m by random range 1~100
randV = np.zeros((n,n))
for i in list(range(0,n)):
    randV[i] = np.random.choice(randomRange,n) #n*n by random range 1~100
P = np.diag([1]*k + [0]*(m-k))

#matmul is quite slow...
A = np.matmul(randU,P) #You can't just use *!!! You have to use matmul function.
A = np.matmul(A,randV)
print(A)

[[51155. 42738. 47944. ... 54735. 50954. 46709.]
 [52178. 42461. 41680. ... 56644. 57445. 61317.]
 [47771. 40639. 48520. ... 47623. 43982. 44907.]
 ...
 [46288. 37753. 40303. ... 43297. 54682. 50933.]
 [60025. 44543. 51643. ... 57208. 62223. 59914.]
 [45355. 36996. 44641. ... 44832. 48962. 44845.]]


# Guess Rank of A

In [7]:
#example snippet
temp1 = np.matrix([1,2,3,4])
temp2 = np.matrix([[1,1,1,1],[2,2,2,2]])
print(temp1)
print(temp2)
print(np.matmul(temp2,np.transpose(temp1)))

[[1 2 3 4]]
[[1 1 1 1]
 [2 2 2 2]]
[[10]
 [20]]


In [8]:
#Algorithm for measuring rank of A
listAw = []
cnt = 1;
for i in list(range(0,100)):
    w = (np.random.choice(int(n/10),n)) #Generate one random vector
    listAw.append(np.matmul(A,w))
    tempA = np.transpose(np.mat(np.vstack(listAw)))
    if np.linalg.matrix_rank(tempA) < cnt:
        break
    cnt = cnt+1
# matrix including last dependent vector
#result = np.transpose(np.mat(np.vstack(listAw)))
resultRank = cnt-1
print(resultRank)

20


In [9]:
#example snippet
print(np.transpose(temp1))
print(np.transpose(temp2))
np.column_stack([np.transpose(temp1),np.transpose(temp2)])

[[1]
 [2]
 [3]
 [4]]
[[1 2]
 [1 2]
 [1 2]
 [1 2]]


matrix([[1, 1, 2],
        [2, 1, 2],
        [3, 1, 2],
        [4, 1, 2]])

In [10]:
#example snippet
tempL = []
for i in list(range(0,10)):
    w = (np.random.choice(int(n/10),n))
    print(w)
    tempL.append(w)
what=np.transpose(np.mat(np.vstack(tempL)))
print(np.linalg.matrix_rank(what))
print(what)

[162 190 107 ...  84 179 139]
[107  66 173 ...  64 135 194]
[  4 149  16 ...  75 177 141]
[130 158 159 ... 156 198  44]
[ 70  79 134 ... 198 106 121]
[132 131  35 ... 171  33 110]
[ 39 161 107 ...   9 166  23]
[ 52 154  86 ... 147 145 171]
[ 11 153  19 ...  44  59  88]
[ 89  35 185 ...  46 142  16]
10
[[162 107   4 ...  52  11  89]
 [190  66 149 ... 154 153  35]
 [107 173  16 ...  86  19 185]
 ...
 [ 84  64  75 ... 147  44  46]
 [179 135 177 ... 145  59 142]
 [139 194 141 ... 171  88  16]]


In [None]:
#Test 100 times
for i in list(range(0,100)):
    m = 2000
    n = 2000
    k = 20
    randomRange = 100
    randU = np.zeros((m,m))
    for i in list(range(0,m)):
        randU[i] = np.random.choice(randomRange,m)
    randV = np.zeros((n,n))
    for i in list(range(0,n)):
        randV[i] = np.random.choice(randomRange,n) 
    P = np.diag([1]*k + [0]*(m-k))

    #matmul is quite slow...
    A = np.matmul(randU,P) #You can't just use *!!! You have to use matmul function.
    A = np.matmul(A,randV)

    listAw = []
    cnt = 1;
    for i in list(range(0,100)):
        w = (np.random.choice(int(n/10),n)) #Generate one random vector
        listAw.append(np.matmul(A,w))
        tempA = np.transpose(np.mat(np.vstack(listAw)))
        if np.linalg.matrix_rank(tempA) < cnt:
            break
        cnt = cnt+1
    # matrix including last dependent vector
    #result = np.transpose(np.mat(np.vstack(listAw)))
    resultRank = cnt-1
    print(resultRank)
    

20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
20
