In [1]:
import numpy as np

#### Importing data with numpy
- np.loadtxt() & np.genfromtxt()
- Both functions are part of the Numpy package
- "load" implies the data is ready to be directly imported  and used
- "generate" indicates that the function creates the dataset from the text file
- Generating requires constructing the array as we go through the text file

In [3]:
lending_co_data_numeric_1 = np.loadtxt("Lending-Company-Numeric-Data.csv", delimiter= ',')
lending_co_data_numeric_1

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [ 2000.,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [4]:
lending_co_data_numeric_2 = np.genfromtxt("Lending-Company-Numeric-Data.csv", delimiter= ',')
lending_co_data_numeric_2

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [ 2000.,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [5]:
np.array_equal(lending_co_data_numeric_1, lending_co_data_numeric_2)

True

NAN = Not a Number
- Refers to missing values within a NumPy array

In [7]:
lending_co_data_numeric_NAN = np.genfromtxt("Lending-Company-Numeric-Data-NAN.csv", delimiter= ';')
lending_co_data_numeric_NAN
#loadtxt will give error cz data has nan values.

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [   nan,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [9]:
#lending_co_data_numeric_NAN = np.genfromtxt("Lending-Company-Numeric-Data-NAN.csv", delimiter= ';' , dtype = np.str)
#lending_co_data_numeric_NAN

### Partial Cleaning While Importing

- "skip_header" = n, should remove the first 2 lines of the dataset
- Becomes convenient when text files include several lines of comments and notes from the authors
- We can omit as many lines as we want from the top of the dataset

- "skip_footer" removes last n lines

In [10]:
lending_co_data_numeric_NAN = np.genfromtxt("Lending-Company-Numeric-Data-NAN.csv", delimiter= ';', skip_header= 2)
lending_co_data_numeric_NAN

array([[ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       [ 2000.,    40.,   365.,  3041.,  4241., 15321.],
       [ 2000.,    50.,   365.,  3470.,  4820., 13720.],
       ...,
       [   nan,    40.,   365.,  4201.,  5001., 16600.],
       [ 1000.,    40.,   365.,  2080.,  3320., 15600.],
       [ 2000.,    40.,   365.,  4601.,  4601., 16600.]])

In [12]:
lending_co_data_numeric_NAN = np.genfromtxt("Lending-Company-Numeric-Data-NAN.csv", delimiter= ';', skip_footer= 2)
lending_co_data_numeric_NAN

array([[ 2000.,    40.,   365.,  3121.,  4241., 13621.],
       [ 2000.,    40.,   365.,  3061.,  4171., 15041.],
       [ 1000.,    40.,   365.,  2160.,  3280., 15340.],
       ...,
       [ 2000.,    40.,   365.,  3401.,    nan, 16600.],
       [ 2000.,    40.,   365.,    nan,  5440., 16600.],
       [   nan,    40.,   365.,  4201.,  5001., 16600.]])

#### usecols
- "usecols" = use columns
- will tell Python we're only interested in the so and so column
- put all the values in parentheses (a tuple)
- for different order we can just mention the column index accordingly

In [13]:
lending_co_data_numeric_NAN = np.genfromtxt("Lending-Company-Numeric-Data-NAN.csv", delimiter= ';', usecols= (0,1))
lending_co_data_numeric_NAN

array([[2000.,   40.],
       [2000.,   40.],
       [1000.,   40.],
       ...,
       [  nan,   40.],
       [1000.,   40.],
       [2000.,   40.]])

- For excel -- pd.read_excel("filename.xlsx") 

### Exporting data (Pandas)
- my_data.to_csv("filename.csv")
- my_data.to_json("filename.json")
- my_data.to_excel("filename.xlsx")
- my_data.to_excel("filename.xlsx", index = False)

### Saving Files with NumPy 
- np.save("Filename-Saving", series_name)
#### np.save() 
- Creates an "file-name.npy" file in the same directory (folder) as your notebook(.ipytnb) document.
- NPY is a special type of text file native to NumPy 

### Indexing

In [3]:
array_a = np.array([[1,2,3],[4,5,6]])
array_a

array([[1, 2, 3],
       [4, 5, 6]])

In [4]:
array_a[0]

array([1, 2, 3])

In [5]:
array_a[1]

array([4, 5, 6])

### Specific Value

In [6]:
array_a[0][1]

2

In [8]:
array_a[1,0]

4

In [9]:
array_a[:,0]

array([1, 4])

### Negative Values

In [10]:
array_b = np.array([1,2,3])
array_b[-1]

3

In [11]:
 array_a[-1]

array([4, 5, 6])

### Assigning Values

In [12]:
array_a = np.array([[1,2,3],[4,5,6]])
array_a

array([[1, 2, 3],
       [4, 5, 6]])

In [13]:
array_a[0,1]= 8 #row and r_value

In [14]:
array_a

array([[1, 8, 3],
       [4, 5, 6]])

In [15]:
#So far we've examined how to assign identical values
#what if we wanted to assign different ones?
# Well, we can do so with a help of a list

list_a = [8,7,8]

array_a[0] = list_a

In [16]:
array_a

array([[8, 7, 8],
       [4, 5, 6]])

In [17]:
type(array_a[0])

numpy.ndarray

In [18]:
array_a[:] = 9

In [19]:
array_a

array([[9, 9, 9],
       [9, 9, 9]])

### Elementswise Properties
- Whatever mathematical computation we are conducting, we are doing it to each element of the array

In [21]:
array_a = np.array([[1,2,3],[4,5,6]])
array_a

array([[1, 2, 3],
       [4, 5, 6]])

In [23]:
array_b = np.array([7,8,9])
array_b

array([7, 8, 9])

In [25]:
array_b + 2  #+,-,*

array([ 9, 10, 11])

 ### List vs Arrays
 - Lists and arrays serve different purposes
 - The main purpose for lists is to store data
 - The main purpose of arrays is to compute mathematical operations

In [26]:
array_a + array_b

array([[ 8, 10, 12],
       [11, 13, 15]])

### NumPy Data Types

In [27]:
array_a = np.array([[1,2,3],[4,5,6]])
array_a

array([[1, 2, 3],
       [4, 5, 6]])

In [28]:
array_a = np.array([[1,2,3],[4,5,6]], dtype = "float32")
array_a

array([[1., 2., 3.],
       [4., 5., 6.]], dtype=float32)

In [30]:
array_a = np.array([[1,2,3],[4,5,6]], dtype = "int")
array_a

array([[1, 2, 3],
       [4, 5, 6]])

In [32]:
array_a = np.array([[1,2,3],[4,5,6]], dtype = "complex64")
array_a

array([[1.+0.j, 2.+0.j, 3.+0.j],
       [4.+0.j, 5.+0.j, 6.+0.j]], dtype=complex64)

In [34]:
array_a = np.array([[1,2,0],[4,5,6]], dtype = "bool")
array_a

array([[ True,  True, False],
       [ True,  True,  True]])

In [35]:
array_a = np.array([[1,2,3],[4,5,6]], dtype = "str")
array_a

array([['1', '2', '3'],
       ['4', '5', '6']], dtype='<U1')

In [36]:
array_a = np.array([[10,2,3],[4,5,6]], dtype = "str")
array_a

array([['10', '2', '3'],
       ['4', '5', '6']], dtype='<U2')

### Ndarrays
- Originate from the NumPy package
- A special datatype in Python
- Can store multiple numeric values in a sequence
- Elementwise properties
- A need for a unfied "array" type

In [4]:
array_a = np.array([1,2,3])
array_a

array([1, 2, 3])

In [5]:
print(array_a)

[1 2 3]


In [41]:
type(array_a)

numpy.ndarray

In [44]:
array_a.shape

(3,)

In [45]:
array_b = np.array([[1,2,3],[4,5,6]])
array_b #2d-array

array([[1, 2, 3],
       [4, 5, 6]])

In [46]:
array_b.shape

(2, 3)

### Strings vs Object vs Numbers

In [47]:
print(4+3%5)

7


0

In [2]:
import numpy as np

### Universal Functions
- Work with Ndarrays on an element-by-element bases
- An extension of the elementwise operations
- Mathematical operations, trigonometric functions, comparison functions
- Broadcasting
- Type Casting