# Python Programming for Machine learning

## Code In AI - NumPy, Matplotlib, and Pandas Package

## NumPy Package

See the document 
- https://docs.scipy.org/doc/numpy/reference/index.html

### Check versions of installed packages

In [None]:
import numpy as np
import pandas as pd
print("Numpy version ", np.__version__)
print("Pandas version {}".format(pd.__version__))

### NumPy array

In [None]:
import numpy as np

cvalues = [20.1, 20.8, 21.9, 22.5, 22.7, 
           22.3, 21.8, 21.2, 20.9, 20.1]

C = np.array(cvalues)
print(C)

### 2-D & multidimensional NumPy array

In [None]:
A = np.array([[3.4,8.7,9.9],
              [1.1,-7.8,-0.7],
              [4.1,12.3,4.8]])
print(A)
print(A.ndim)

In [None]:
B = np.array([[[111,112],[121,122]],
              [[211,212],[221,222]],
              [[311,312],[321,322]]])
print(B)
print(B.ndim)

### Array Shape 

In [None]:
x = np.array([[67, 63, 87],
              [77, 69, 59],
              [85, 87, 99],
              [79, 72, 71],
              [63, 89, 93],
              [68, 92, 78]])

print(np.shape(x))

In [None]:
print(x.shape)

### Indexing

In [None]:
A = np.array([
    [11,12,13,14,15],
    [21,22,23,24,25],
    [31,32,33,34,35],
    [41,42,43,44,45],
    [51,52,53,54,55]
])
print(A[:3,2:])

### Numerical operations on NumPy array

In [None]:
lst = [1,2,5,10]
v = np.array(lst)
v = v + 2
print(v)

In [None]:
print(v * 2.2)

In [None]:
print(v - 1.38)

In [None]:
print(v ** 2)

### Arithmetic operations with two arrays

In [None]:
A = np.array([[11, 12, 13],
              [21, 22, 23],
              [31, 32, 33]])
B = np.ones((3,3))
print("Adding to arrays:")
print(A + B)

In [None]:
print("\nMultiplying two arrays:")
print(A * (B + 1))

### Matrix multiplication

In [None]:
np.dot(A,B)

### Definition of dot product

In [None]:
# For 1-D arrays, dot product is computed as vector dot product
x = np.array([3, -2])
y = np.array([-4, 1])
print(np.dot(x,y))

In [None]:
# For 2-D arrays, dot product is computed as matrix multiplication
A = np.array([[1, 2, 3],
              [3, 2, 1]])
B = np.array([[2, 3],
              [1, -1],
              [1, 2]])
print(np.dot(A,B))

Dot product is basically a projection/elemination of one dimension out.

### Comparison operator

In [None]:
A = np.array([[11, 12, 13],
              [21, 22, 23],
              [31, 32, 33]])
B = np.array([[11, 102, 13],
              [201, 22, 203],
              [31, 32, 303]])
A == B

### Compare two arrays

In [None]:
print(np.array_equal(A, B))

In [None]:
print(np.array_equal(A, A))

### Broadcasting

In [None]:
B = np.array([1, 2, 3])
print("Multiplication with broadcasting: ")
print(A*B)

In [None]:
print("... and now addition with broadcasting: ")
print(A+B)

### Concatenating arrays

In [None]:
x = np.array([11,22])
y = np.array([18,7,6])
z = np.array([1,3,5])
c = np.concatenate((x,y,z))
print(c)

### Vector stacking

In [None]:
A = np.array([3, 4, 5])
B = np.array([1, 9, 0])
print(np.row_stack((A, B)))

In [None]:
print(np.column_stack((A, B)))

In [None]:
np.shape(A)

## Pandas Package
See the document
- http://pandas.pydata.org/pandas-docs/stable/

In [None]:
import pandas as pd

In [None]:
s = pd.Series(np.random.randn(5), index=['a','b','c','d','e'])
print(s)
print(type(s))

In [None]:
d = {'one': [1,2,3,4],
     'two':[5,6,7,8]}
df=pd.DataFrame(d)
print(df)

In [None]:
df.iloc[0:2,1]

### Creating DataFrame from NumPy array

In [None]:
data = np.array([['','Col1','Col2'],
                 ['Row1',1,2],
                 ['Row2',3,4]])
df = pd.DataFrame(data=data[1:,1:],
                  index=data[1:,0],
                  columns=data[0,1:])
print(df)

### Creating DataFrame from Dictionary

In [None]:
my_dict = {'First': ['1','3'],
           'Second': ['1','2'],
           'Third': ['2','4']}
df = pd.DataFrame(my_dict)
print(df)

### Creating DataFrame from files

In [None]:
bankData = pd.read_csv("bank-data.csv",sep=";")
print(bankData.shape)
bankData.info()

### DataFrame dimension

In [None]:
df = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6]]))

In [None]:
# Use the shape property
print(df.shape)

In [None]:
# or use the len() function with the index property
print(len(df.index))

### Viewing DataFrame: head()

In [None]:
bankData.head()

### Viewing DataFrame: tail()

In [None]:
bankData.tail()

### Viewing DataFrame: columns

In [None]:
bankData.columns

### Viewing DataFrame: describe()

In [None]:
bankData.describe()

### Sorting data by column name

In [None]:
bankData.sort_values(by='age').head()

### Select a column

In [None]:
bankData['job'].head()

In [None]:
bankData.job.head()

### Select multiple columns: [['c1','c2']]

In [None]:
bankData[['job','age']].head()

### Indexing (aka filter rows): [Start:End] 

In [None]:
bankData.loc[0:5]

In [None]:
bankData[0:5]

### More complex selection - .loc and .iloc
- Filter both rows and columns using .loc and .iloc

In [None]:
bankData.loc[1:5,['job','age','education']]

In [None]:
bankData.iloc[:5,:3]

### Boolean indexing by isin

In [None]:
bankData[bankData.job.isin(['management'])].head()

### Boolean indexing by condition

In [None]:
bankData[bankData.age > 30].head()

### Add column

In [None]:
oneColumn = np.ones(len(bankData))
bankData['one'] = oneColumn
bankData.head()

### Delete column using del

In [None]:
del bankData['one']
bankData.head()

### Delete column using .drop()
- .drop() will return the dataframe that the specify column had been remove.

In [None]:
bankData['one'] = oneColumn #add column
bankData2 = bankData.drop('one',axis=1)
bankData2.head()


### Missing data

In [None]:
# Read flith data
flightData = pd.read_csv('flights.csv')

In [None]:
# Show rows with missing data
flightData[flightData.dep_delay.isnull()].head()

### Remove missing data rows

In [None]:
flightData.shape

In [None]:
flightData.dropna(how='any').shape

In [None]:
flightData.dropna(how='any').head()

### Filling missing data

In [None]:
x = np.mean(flightData.dep_delay)
print("%1.1f"%x)

In [None]:
flightData.fillna(value={'dep_delay':x}).loc[835:840]

### Statistical operations

In [None]:
bankData.mean()

In [None]:
bankData.std()

In [None]:
bankData.median()

### Dummy variables
- https://pandas.pydata.org/pandas-docs/stable/generated/pandas.get_dummies.html

In [None]:
df = pd.DataFrame({'key':list('bbacab'),'data1':range(6)})
print(df)

In [None]:
pd.get_dummies(df['key'])

In [None]:
pd.get_dummies(bankData, columns=['education']).head()