## Starting with Numpy

In [1]:
import numpy as np
np.__version__

'1.11.0'

In [2]:
# creating list of 0-9
L = list(range(10))

In [3]:
#converting integers to strings - list comprehension
[str(c) for c in L ]

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

In [4]:
[type(c) for c in L]

[int, int, int, int, int, int, int, int, int, int]

## Creating Arrays

In [5]:
# creating arrays
np.zeros(10, dtype='int')

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [6]:
# 3 * 5 matrix
np.ones((3,5), dtype='float')

array([[ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.]])

In [7]:
# matrix with a predfined value
np.full((3,5),9.1)

array([[ 9.1,  9.1,  9.1,  9.1,  9.1],
       [ 9.1,  9.1,  9.1,  9.1,  9.1],
       [ 9.1,  9.1,  9.1,  9.1,  9.1]])

In [8]:
# range of values with a step
np.arange(0,20,2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [9]:
# array with even N spaces between two numbers
np.linspace(0,1,5)

array([ 0.  ,  0.25,  0.5 ,  0.75,  1.  ])

In [10]:
# matrix with mean 0 and standard deviation 1
np.random.normal(0,1,(3,3))

array([[-0.08115351, -0.65033438, -0.28838965],
       [ 1.09173328, -0.25441249, -0.56536168],
       [ 0.59197555,  1.31857312, -1.36149393]])

In [11]:
# identity matrix
np.eye(3)

array([[ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.]])

In [12]:
# set a random seed
np.random.seed(0)

In [13]:
x1 = np.random.randint(15,size=6) # 1D ; max number <15
print(x1)

x2 = np.random.randint(10, size=(3,3)) # 2D
print(x2)

x3= np.random.randint(10, size=(2,3,5)) # 3D
print(x3)

[12  5  0  3 11  3]
[[7 9 3]
 [5 2 4]
 [7 6 8]]
[[[8 1 6 7 7]
  [8 1 5 9 8]
  [9 4 3 0 3]]

 [[5 0 2 3 8]
  [1 3 3 3 7]
  [0 1 9 9 0]]]


## Array indexing

In [14]:
x1[-1]

3

In [15]:
#all rows till the second last one
x2[:-1]

array([[7, 9, 3],
       [5, 2, 4]])

In [16]:
# all rows starting from the second last one , i.e the last row
x2[-1:]

array([[7, 6, 8]])

In [17]:
# all rows 2nd col
x2[:,1]

array([9, 2, 6])

In [18]:
# 2nd object(matrix), all rows , 2nd and 3rd columns 
x3[1,:,1:3]

array([[0, 2],
       [3, 3],
       [1, 9]])

In [19]:
# all elements in steps of 2
x1[::2]

array([12,  0, 11])

## Array concatenation

In [20]:
x = np.arange(0,5,2)
y = np.array([1,2,3])
np.concatenate([x,y])

array([0, 2, 4, 1, 2, 3])

In [21]:
# create 2D arrays
grid = np.array([[1,2,3],[4,5,6]])
np.concatenate([grid,grid])

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [22]:
# to combine 1D with 2D
x = np.array([1,2,3])
grid = np.array([[5,6,7],[8,9,0]])
np.vstack([x,grid])

array([[1, 2, 3],
       [5, 6, 7],
       [8, 9, 0]])

In [23]:
z =  np.array([[1],[1]])
np.hstack([z,grid])

array([[1, 5, 6, 7],
       [1, 8, 9, 0]])

In [24]:
# splitting arrays
x = np.arange(10)
x
x1,x2,x3 = np.split(x,[3,6])
print(x1,x2,x3)

[0 1 2] [3 4 5] [6 7 8 9]


In [25]:
grid = np.arange(16).reshape((4,4))
print(grid)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]


In [26]:
lower, upper = np.vsplit(grid,[2])
print(lower,upper)

[[0 1 2 3]
 [4 5 6 7]] [[ 8  9 10 11]
 [12 13 14 15]]


# Pandas

In [27]:
import pandas as pd

In [28]:
data = pd.DataFrame({'Country' : ['India', 'Japan', 'America', 'England'], 'Rank':[1,2,3,4]})
data

Unnamed: 0,Country,Rank
0,India,1
1,Japan,2
2,America,3
3,England,4


In [29]:
data.describe()

Unnamed: 0,Rank
count,4.0
mean,2.5
std,1.290994
min,1.0
25%,1.75
50%,2.5
75%,3.25
max,4.0


In [30]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4 entries, 0 to 3
Data columns (total 2 columns):
Country    4 non-null object
Rank       4 non-null int64
dtypes: int64(1), object(1)
memory usage: 96.0+ bytes


In [31]:
data.sort_values(by=['Rank'],ascending=False,inplace=False)

Unnamed: 0,Country,Rank
3,England,4
2,America,3
1,Japan,2
0,India,1


In [32]:
# create another dataFrame with duplicates
d = pd.DataFrame({'k1':['one']*3 + ['two']*4, 'k2':[1,2,3,3,4,4,5]})
d

Unnamed: 0,k1,k2
0,one,1
1,one,2
2,one,3
3,two,3
4,two,4
5,two,4
6,two,5


In [33]:
d.sort_values(by=['k2'],ascending=False)

Unnamed: 0,k1,k2
6,two,5
4,two,4
5,two,4
2,one,3
3,two,3
1,one,2
0,one,1


In [34]:
d.drop_duplicates()

Unnamed: 0,k1,k2
0,one,1
1,one,2
2,one,3
3,two,3
4,two,4
6,two,5


In [35]:
d.drop_duplicates(subset='k1')

Unnamed: 0,k1,k2
0,one,1
3,two,3


# Categorizing the Data

In [36]:
data = pd.DataFrame({'food': ['bacon', 'pulled pork', 'bacon', 'Pastrami','corned beef', 'Bacon', 'pastrami', 'honey ham','nova lox'],
                 'ounces': [4, 3, 12, 6, 7.5, 8, 3, 5, 6]})
data

Unnamed: 0,food,ounces
0,bacon,4.0
1,pulled pork,3.0
2,bacon,12.0
3,Pastrami,6.0
4,corned beef,7.5
5,Bacon,8.0
6,pastrami,3.0
7,honey ham,5.0
8,nova lox,6.0


In [39]:
# now we want to categorize the dish on the animal it comes from
# so we first make a map of dish to animal
meat_to_animal = {
'bacon': 'pig',
'pulled pork': 'pig',
'pastrami': 'cow',
'corned beef': 'cow',
'honey ham': 'pig',
'nova lox': 'salmon'
}

# a function that takes a map of Food and returns the category i.e animal
def meat_2_animal(series):
    if series['food'] == 'bacon':
        return 'pig'
    elif series['food'] == 'pulled pork':
        return 'pig'
    elif series['food'] == 'pastrami':
        return 'cow'
    elif series['food'] == 'corned beef':
        return 'cow'
    elif series['food'] == 'honey ham':
        return 'pig'
    else:
        return 'salmon'



In [41]:
lower = lambda x : x.lower()
data['food'] = data['food'].apply(lower)
data['animal'] = data.apply(meat_2_animal, axis="columns")
data

Unnamed: 0,food,ounces,animal
0,bacon,4.0,pig
1,pulled pork,3.0,pig
2,bacon,12.0,pig
3,pastrami,6.0,cow
4,corned beef,7.5,cow
5,bacon,8.0,pig
6,pastrami,3.0,cow
7,honey ham,5.0,pig
8,nova lox,6.0,salmon
