#### This is a growing tutorial demonstrates some MATLAB examples and their equivalent code in Python
##### Each Matlab block function corresponds a block in conversion.ipynb, i.e., the block heading start with "block_"
##### written by Richard Xu (yida.xu@uts.edu.au)
##### Feb 2018

# block_read_csv

In [None]:
import pandas as pd
import numpy as np
import time

store = pd.read_excel('superstore.xls');

# print the first row
print store[:1]


# --------------------------------------------------------------
# METHOD 1: insert element by element into DataFrame
# --------------------------------------------------------------

t1 = time.time()

val = pd.unique(store['Customer ID'])

# in-place function, i.e., the val changes its values internally
val.sort()

profit_1 = pd.DataFrame(columns = ['Customer ID', 'Profit' ] )

i = 0

for v in val:
    #cus_list = store[store['Customer ID'].str.contains(v,na=False)]
    index = store['Customer ID']==v
    
    p_series = store[index].Profit
    
    #print type(profit_series), type(profit_series.values)    
    
    profit_1.loc[i] = [v, p_series.values.sum()]
    
    i = i + 1

print profit_1[:5]
print "time times = %0.3f" % (time.time() - t1)


# get that Series into array
print type(store['Profit']), type(store['Profit'].values)

print '\n'


# --------------------------------------------------------------
# METHOD 2: construct array first, then put into DataFrame
# --------------------------------------------------------------

t1 = time.time()

val = pd.unique(store['Customer ID'])


profit = np.zeros(len(val))
customer_ID = []

i = 0

for v in val:
    index = store['Customer ID']==v
    p_series = store[index].Profit
    customer_ID.append(v)
    profit[i] = sum(p_series)
    i = i + 1

    
profit_2 = pd.DataFrame(columns = ['customer ID', 'Profit' ] )
profit_2['customer ID'] = customer_ID
profit_2['Profit'] = profit

   
print profit_2[:5]

print "time times = %0.3f" % (time.time() - t1)


print '\n'


# --------------------------------------------------------------
# METHOD 3: use groupby => get Series 
# instead of DataFrame, Series has a (key, value) pair like Dictionary
# --------------------------------------------------------------

t1 = time.time()

profit_3 = store.groupby('Customer ID', sort=True).Profit.sum()
   
print profit_3[:5]
print "time times = %0.3f" % (time.time() - t1)




# block_numpy_multiply

In [None]:
import numpy as np

A = [[3, 4],[2, 5]]
B = [[5, 6],[3, 2]]
C = [[7, 2],[1, 5]]


# before cast them to numpy types, they are type "list" 
print "type (A) = ", type (A)


# these are matrix multipications

print "np.dot(A,B) = \n", np.dot(A,B)
print "np.matmul(A,B) = \n", np.matmul(A,B)

# to make "np.dot" to take more than two inputs:
print "reduce(np.dot, [A, B, C]) = \n", reduce(np.dot, [A, B, C])


# after cast them to numpy types, they are type "ndarray'" 

A = np.array(A)
B = np.array(B)

print "type (A) = ", type (A)

# the following does element-wise 
print "np.multiply(A,B) = \n", np.multiply(A,B)

# then you can perform things such as element-wise multipication:
print " A * B * C = \n",  A * B * C


# then we cast them into matrix

A = np.mat(A)
B = np.mat(B)
C = np.mat(C)

print "type (A) = ", type (A)

W2 = A * B * C

print " A * B * C = \n",  A * B * C

# block_matrix_find

In [None]:
## hello

a_list = [[ 8, 1, 6], [3, 5, 7], [4, 9, 2]]
print a_list,"\n"
print type(a_list), a_list > 5, "\n"

a_list = np.array(a_list)
print a_list,"\n"

print type(a_list), "\n"

# should see a matrix of True and False
print a_list > 5, "\n"

print "a_list[2,1] = ", a_list[2,1], "\n"
print "a_list[2,:] = ", a_list[2,:], "\n"
print "a_list[:,1] = ", a_list[:,1], "\n"

# this will return two arrays indicating the row and column element, equivlent to "find" in MATLAB
print "np.where( a_list > 5 ) \n", np.where( a_list > 5 )

print "np.argwhere(a_list > 5) =\n", np.argwhere(a_list > 5)
print "a_list[np.where( a_list > 5 )] = ", a_list[np.where( a_list > 5 )]

print "a_list[a_list > 5] = ", a_list[a_list > 5]

# block_numpy_reshape_repeat

In [None]:
import numpy as np

a_list = [[16, 2, 3, 13], [5, 11, 10, 8], [9, 7, 6, 12] ,[4, 14, 15, 1]]
print a_list,"\n"

b_list = np.reshape(a_list,[2,8])
print b_list, "\n"

b_list = np.reshape(a_list,[8,2])
print b_list, "\n"

b_list = np.reshape(np.transpose(a_list),[8,2])
print b_list, "\n"

b_list = np.reshape(np.transpose(a_list),[1,-1])
print b_list, "\n"


'''  to get   [ 1 2 
                1 2 
                1 2 
                1 2] '''

print "np.repeat(np.mat([1, 2]), 4,0) = \n", np.repeat(np.mat([1, 2]), 4,0)

# to get [1 1 1 1 2 2 2 2]
print "np.repeat(np.mat([1, 2]), 4,0) = \n", np.repeat(np.mat([1, 2]), 4,1)


# --------------------------------------------------------------------------
# Exercise: how to get [1 2 1 2 1 2 1 2]:
# --------------------------------------------------------------------------
answer = np.reshape(np.repeat(np.mat([1, 2]), 4,0),[1,-1])

# block_numpy_unique

In [None]:
a_list = np.random.rand(5,5) * 5 + 10
a_list = np.floor(a_list)

print a_list,"\n"

print np.unique(a_list), "\n"

val, indices, inv_indices, counts = np.unique(a_list, return_index = True, return_inverse = True, return_counts = True)

print indices, inv_indices, counts,"\n"
one_a_list = a_list.flatten()
print one_a_list[indices],"\n"

for v in val:
    print one_a_list[np.where( one_a_list == v )]


# block_direction_plot

In [None]:
import matplotlib.pyplot as plt
import math
import numpy as np
%matplotlib inline

#A = np.array([[1.2, 2.4], [3.1, 7.0]]);
A = np.array([[3.1, 7.0], [1.2, 2.4]]);
B = np.array([[1.0, 1.8], [2.5, 5.2]]);


plt.plot( A[0,0], A[0,1], 'bo', markersize=3, color='y')
plt.plot( A[1,0], A[1,1], 'bo', markersize=3, color='k')
plt.plot( A[:,0], A[:,1], color='g')


plt.plot( B[0,0], B[0,1], 'bo', markersize=3, color='y')
plt.plot( B[1,0], B[1,1], 'bo', markersize=3, color='k')
plt.plot( B[:,0], B[:,1], color='c')

A_dir = A[1,:] - A[0,:];
A_dir = A_dir / np.linalg.norm(A_dir);

B_dir = B[1,:] - B[0,:];
B_dir = B_dir / np.linalg.norm(B_dir);

theta = math.acos(np.dot(A_dir,B_dir))

print theta,"\n"

text_m = np.append(A,B,axis=0)

print text_m,"\n"

print np.amin(text_m), "\n"

text_min = np.amin(text_m, axis=0)

print text_min, "\n"


# the following are equivalent, except the latter cap only to two decimal points
# plt.text(text_min[0]+1,text_min[1], "theta = " + str(theta) + ": same direction" ); 

if abs(theta)< 0.2:
    plt.text(text_min[0]+1,text_min[1], "theta = " + "%0.2f" % theta + ": same direction" )
    
if abs(theta) > math.pi - 0.2:
    plt.text(text_min[0]+1,text_min[1], "theta = " + "%0.2f" % theta + ": opposite direction" ); 

plt.show()


# block_inline_function

In [None]:
def sigmoid_func(x): 
    return 1/(1 + math.exp(-x))

print sigmoid_func(2)

sigmoid2 = lambda x: 1/(1 + math.exp(-x))

print sigmoid2(2)

# block_map_reduce

In [None]:
items = range(-2,3)

# without using MAP function
sigmoids = []
for i in items:
    sigmoids.append(sigmoid2(i))


print items, sigmoids

# using MAP function

# both does the same thing of the above
sigmoids2 = map(lambda x: 1/(1 + math.exp(-x)), items)
sigmoids2 = map(lambda x: sigmoid_func(x), items)

print sigmoids2


# without using REDUCE function 
prod = 1
for i in sigmoids2:
    prod = prod * i

print prod

# with using REDUCE function
prod2 = reduce((lambda x, y: x * y), sigmoids2)

print prod2


# now apply a filter to input before REDUCE
prod3 = reduce((lambda x, y: x * y), filter(lambda x: x >= 0.2, sigmoids2))

print prod3


### non-MATLAB tutorial: dictionary example

In [None]:
# dictionary isn't used in MATLAB, but a useful tool in Python
skill = {"package" : "tensorflow", "experience": 2.5, "language": "python", "deep_learning": True}
print skill, type(skill)

skill["package"] = "keras"

# note that dictionary isn't indexed sequentially, so print skill[0] doens't work:
print skill

# -------- hierarchical access: -------------
job = {"required": "package", "optional": "langauge"}

print skill[job["required"]]

print 'len(skill) = ', len(skill)

print '"language" in skill = ', "language" in skill
del skill["language"]


print '"language" not in skill = ', "language" not in skill


# -------- use d.pudate() instead of d.add() function -

skill.update({'programming': "python"})
print skill


# --------  the followings are the same ---------------

print('\nfor key in skill:\n')

for key in skill:
    print key

print('\nfor key in skill.iterkeys():\n')
    
for key in skill.iterkeys():
    print key
    
# --------  the followings are the same ---------------
    
print('\nfor val in skill.itervalues():\n')

for val in skill.itervalues():
    print val

print('for key in skill:\n')
    
for key in skill:
    print skill[key]

# --------  use something like ---------------

print('\n')

for key in skill:
    print key, " ", skill[key]



### non-MATLAB tutorial: quick pandas

In [None]:
import pandas as pd
import numpy as np

# ----------------------------------------------------------------
# something about series 
# ----------------------------------------------------------------


# series can be of different type:
s = pd.Series([1,3,5,"str",6,8.])
print s

# series can be of same type:
s = pd.Series([1,3,5,67,6,8])
print s

# series in value and range:
s = pd.Series(1,index=list(range(4)))
print s

s = pd.Series(range(4),index=list(range(4)))
print s

print "s.values = ", s.values

print "\n"


# ----------------------------------------------------------------
# create the first DataFrame
# ----------------------------------------------------------------


dates = pd.date_range('20180101', periods=5)
print dates, "\n"


columns = ['exp_1', 'exp_2', 'exp_3']

df = pd.DataFrame(np.random.randn(len(dates),len(columns)), index=dates, columns=columns)

# swap inputs do not matter
df = pd.DataFrame(np.random.randn(len(dates),len(columns)), columns=columns, index=dates)

print df

# following unlablled version doens't work:
#df = pd.DataFrame(np.random.randn(6,4), columns, dates)

print "np.array([3] * 4,dtype='int32') = ", np.array([3] * 4, dtype='int32')

print "\n"

# --------------------------------------------------------------------------------------
# note that "df2.start" are repeating the same element for as many rows as possible:
# --------------------------------------------------------------------------------------

df2 = pd.DataFrame({'start' : 1.0,
                    'date' : pd.date_range('20180101', periods=4),
                    'stop' : pd.Series(1,index=range(4),dtype='float32'),
                    'class' : np.array([3] * 4,dtype='int32'),
                    'mark' : pd.Categorical(["pass","fail","fail","pass"]),
                    'comment' : "TBA"})

print df2

### non-MATLAB tutorial: numpy pass by reference

In [None]:
import numpy as np

A = np.mat(np.ones((3,3)))

# ---------------------------------------------------------------------------
# pass by object
# ---------------------------------------------------------------------------

np.array(A)[2]=2
print 'numpy.array(A)[2]=2: A = \n', A

# ---------------------------------------------------------------------------
# pass by reference
# ---------------------------------------------------------------------------
np.asarray(A)[2]=2
print 'numpy.array(A)[2]=2: A = \n', A

### non-MATLAB tutorial: numpy shape conversion

In [None]:
import numpy as np

print "np.array([3] * 4 = \n", np.array([3] * 4), '\n' 
print "np.array([3] * 4 = \n", np.array([3] * 4), '\n' 

# ---------------------------------------------------------------------------
# convert a 3x3 matrix to 1x9 matrix
# ---------------------------------------------------------------------------

ans = np.squeeze(A.flatten())
print "np.squeeze(A.flatten()) = \n", ans, ans.shape, type(ans), '\n'

# ---------------------------------------------------------------------------
# convert a nd matrix to nd numpy array
# ---------------------------------------------------------------------------

arr = np.array(A)
print "nd array = \n", arr, arr.shape, type(arr), '\n'

# ---------------------------------------------------------------------------
# then, convert from nd numpy array to 1-d array,
# ---------------------------------------------------------------------------
ans = arr.ravel()

print "1d array = \n", ans, ans.shape, type(ans), '\n'

