# Numpy

In [1]:
import numpy as np

# Generate A Random Array With n_rows And n_columns

arr = np.random.random((5,5)) # 5 x 5 array
print (arr)

[[ 0.05973887  0.33920437  0.86776089  0.91122371  0.68416402]
 [ 0.38619777  0.51503866  0.2995845   0.8733827   0.04890102]
 [ 0.43595139  0.57045487  0.26263718  0.76275375  0.63021022]
 [ 0.83910836  0.7271145   0.78085236  0.36426871  0.48011417]
 [ 0.74657582  0.57134724  0.70180225  0.42929149  0.548452  ]]


In [2]:
# Accessing elements in the array

print (arr[0,0]) # prints single element in the array
print (arr[0,:]) # prints first row in the array
print (arr[-1,:]) # prints last row in the array

print (arr[:,0]) # prints the first column in the array
print (arr[:,-1]) # prints the last column in the array


0.0597388735592
[ 0.05973887  0.33920437  0.86776089  0.91122371  0.68416402]
[ 0.74657582  0.57134724  0.70180225  0.42929149  0.548452  ]
[ 0.05973887  0.38619777  0.43595139  0.83910836  0.74657582]
[ 0.68416402  0.04890102  0.63021022  0.48011417  0.548452  ]


In [3]:
# Transpose an Array

print(arr.T) # Switches The Rows And Columns Of An Array With Each Other
# (e.g. Row Vector Becomes Column Vector And Column Vector Becomes Row Vector)

[[ 0.05973887  0.38619777  0.43595139  0.83910836  0.74657582]
 [ 0.33920437  0.51503866  0.57045487  0.7271145   0.57134724]
 [ 0.86776089  0.2995845   0.26263718  0.78085236  0.70180225]
 [ 0.91122371  0.8733827   0.76275375  0.36426871  0.42929149]
 [ 0.68416402  0.04890102  0.63021022  0.48011417  0.548452  ]]


In [4]:
# Np.Linspace(Start Number, End Number, N_Points) Prints Out As Row Vector
x = np.linspace(0,20,3)
print(x)

# Convert x Into A Column Vector
print (x[:, np.newaxis])



[  0.  10.  20.]
[[  0.]
 [ 10.]
 [ 20.]]


# Scipy Sparse Matrices

In [5]:
# Storing Zeros Are Inefficient When Working With Data That Contains Mostly Zeros

In [6]:
arr = np.random.random((5,5))
print(arr)

[[ 0.10219105  0.00807995  0.25238057  0.78644653  0.85109568]
 [ 0.38073905  0.80326788  0.99691422  0.22651962  0.88913953]
 [ 0.99494056  0.56344062  0.08829096  0.77484485  0.38070652]
 [ 0.09162603  0.13031729  0.73736719  0.87985604  0.62423776]
 [ 0.67400671  0.40427328  0.88670577  0.93569795  0.29418025]]


In [7]:
# Converts Any Data In Variable X Into 0 When Less Than 0.5
# Makes Data Less Dense 
arr[arr < 0.5] = 0
print(arr)

[[ 0.          0.          0.          0.78644653  0.85109568]
 [ 0.          0.80326788  0.99691422  0.          0.88913953]
 [ 0.99494056  0.56344062  0.          0.77484485  0.        ]
 [ 0.          0.          0.73736719  0.87985604  0.62423776]
 [ 0.67400671  0.          0.88670577  0.93569795  0.        ]]


In [8]:
from scipy import sparse

"""Converts arr Into A CSR (Compressed_Sparse_Row) Matrix By Removing 
Zeros From The Data.
Left Column Provides The Position Of A Data Point.
Right Column Provides The Value.""" 

arr_csr = sparse.csr_matrix(arr)
print (arr_csr)

  (0, 3)	0.786446527316
  (0, 4)	0.851095679393
  (1, 1)	0.803267883673
  (1, 2)	0.996914223057
  (1, 4)	0.889139526574
  (2, 0)	0.99494055737
  (2, 1)	0.5634406181
  (2, 3)	0.774844847797
  (3, 2)	0.737367186821
  (3, 3)	0.879856037797
  (3, 4)	0.624237757874
  (4, 0)	0.674006713113
  (4, 2)	0.886705771859
  (4, 3)	0.935697953878


In [9]:
# Convert The Sparse Matrix Back To A Dense Array
print(arr_csr.toarray())

[[ 0.          0.          0.          0.78644653  0.85109568]
 [ 0.          0.80326788  0.99691422  0.          0.88913953]
 [ 0.99494056  0.56344062  0.          0.77484485  0.        ]
 [ 0.          0.          0.73736719  0.87985604  0.62423776]
 [ 0.67400671  0.          0.88670577  0.93569795  0.        ]]


In [10]:
# Sparse Matrices Supporting Linear Algebra
x = np.random.random(arr_csr.shape[1])
print(x)

# Apply Dot Product
arr1 = arr_csr.dot(x)
print(arr1)
arr2 = arr.dot(x)
print (arr2)

# Check If Arr1 And Arr2 Match
np.allclose(arr1, arr2)

[ 0.75822016  0.75105484  0.534959    0.57583548  0.38072888]
[ 0.77690052  1.47512756  1.62374195  1.13877887  1.52420479]
[ 0.77690052  1.47512756  1.62374195  1.13877887  1.52420479]


True

CSR Representation is efficient for computations, but not efficient for adding elements. Using the LIL (List-In-List) representation is better:

In [11]:
# Creating An Empty List-in-List (LIL) Matrix And Adding Items In A 7 x 7 Size Array.
lil = sparse.lil_matrix((7,7))

# Sample Out Random Element Values At And Between i[0] And i[6] Up To 8 x 2 Samples. 
for i, j in np.random.randint(0, 7, (8, 2)):
    lil[i, j] = i + j

print (lil)

# Convert LIL to an Array
print (lil.toarray())

  (0, 3)	3.0
  (1, 4)	5.0
  (1, 6)	7.0
  (2, 2)	4.0
  (2, 6)	8.0
  (4, 2)	6.0
  (5, 1)	6.0
  (5, 4)	9.0
[[ 0.  0.  0.  3.  0.  0.  0.]
 [ 0.  0.  0.  0.  5.  0.  7.]
 [ 0.  0.  4.  0.  0.  0.  8.]
 [ 0.  0.  0.  0.  0.  0.  0.]
 [ 0.  0.  6.  0.  0.  0.  0.]
 [ 0.  6.  0.  0.  9.  0.  0.]
 [ 0.  0.  0.  0.  0.  0.  0.]]


In [12]:
# Converting Lil Matrix To Csr Format For Scikit-Learn Algorithms
lil_csr = lil.tocsr()
print(lil_csr)

  (0, 3)	3.0
  (1, 4)	5.0
  (1, 6)	7.0
  (2, 2)	4.0
  (2, 6)	8.0
  (4, 2)	6.0
  (5, 1)	6.0
  (5, 4)	9.0
