# Numpy Tutorial

In [1]:
import numpy as np

In [2]:
X = np.array([1, 2, 3])
print(X)

[1 2 3]


In [3]:
type(X)

numpy.ndarray

In [4]:
l = [4, 5, 10]
X = np.asarray(l)
type(X)

numpy.ndarray

In [5]:
X

array([ 4,  5, 10])

In [6]:
X = np.asarray(l, float)
print(X)

[ 4.  5. 10.]


In [7]:
X.shape

(3,)

In [8]:
np.array?

In [9]:
X = np.array([[1, 2, 3], [4, 5, 6]])

In [10]:
X

array([[1, 2, 3],
       [4, 5, 6]])

In [11]:
X.shape

(2, 3)

In [12]:
X[1,2]

6

In [13]:
X[0, 0:2]

array([1, 2])

In [14]:
X[0, :]

array([1, 2, 3])

In [15]:
X = np.zeros((4, 5))

In [16]:
X

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [17]:
X = np.eye(4, 4)
X

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

In [18]:
X = np.random.random((4, 5))
X

array([[0.29208173, 0.28572686, 0.89134417, 0.56207201, 0.69766511],
       [0.95568804, 0.75868495, 0.27406654, 0.33180582, 0.85265874],
       [0.39487547, 0.44993378, 0.91475486, 0.4787255 , 0.55360049],
       [0.43937476, 0.45663814, 0.31865925, 0.36266097, 0.33464866]])

In [19]:
Z = X.T

In [20]:
Z

array([[0.29208173, 0.95568804, 0.39487547, 0.43937476],
       [0.28572686, 0.75868495, 0.44993378, 0.45663814],
       [0.89134417, 0.27406654, 0.91475486, 0.31865925],
       [0.56207201, 0.33180582, 0.4787255 , 0.36266097],
       [0.69766511, 0.85265874, 0.55360049, 0.33464866]])

In [21]:
Y = X.reshape(20, 1)

In [22]:
Y

array([[0.29208173],
       [0.28572686],
       [0.89134417],
       [0.56207201],
       [0.69766511],
       [0.95568804],
       [0.75868495],
       [0.27406654],
       [0.33180582],
       [0.85265874],
       [0.39487547],
       [0.44993378],
       [0.91475486],
       [0.4787255 ],
       [0.55360049],
       [0.43937476],
       [0.45663814],
       [0.31865925],
       [0.36266097],
       [0.33464866]])

In [23]:
A = np.arange(5)
print(A)

[0 1 2 3 4]


In [24]:
B = np.arange(5)

In [25]:
C = A + B
D = A - B
E = A * B
print(C, D, E)

[0 2 4 6 8] [0 0 0 0 0] [ 0  1  4  9 16]


In [26]:
print(A, A + 1)

[0 1 2 3 4] [1 2 3 4 5]


In [27]:
A = np.random.random((2, 3))
print(A)
print(A + 1)

[[0.64126591 0.95784939 0.33397944]
 [0.60316023 0.29746593 0.64996124]]
[[1.64126591 1.95784939 1.33397944]
 [1.60316023 1.29746593 1.64996124]]


In [28]:
A = np.floor(np.random.random((2, 3))*10)
print(A)

[[1. 7. 8.]
 [0. 4. 9.]]


In [29]:
u = [1, 2, 3]
v = [-1, 0, 1]

p1 = np.inner(u, v)
print(p1)

2


In [30]:
p2 = np.outer(u, v)
print(p2)

[[-1  0  1]
 [-2  0  2]
 [-3  0  3]]


In [31]:
A = np.ones((2, 3))
B = np.ones((3, 2))
np.dot(A, B)

array([[3., 3.],
       [3., 3.]])

In [32]:
np.dot(B.T, A.T)

array([[3., 3.],
       [3., 3.]])

In [33]:
A = np.ones((2, 3))
print(A)

[[1. 1. 1.]
 [1. 1. 1.]]


In [34]:
A.sum()

6.0

In [35]:
a=A.sum(axis=0)
b=A.sum(axis=1)

In [36]:
a,b

(array([2., 2., 2.]), array([3., 3.]))

In [37]:
A.max()

1.0

In [38]:
a=np.array([1,2,3,4])
b=np.array([1,2,3,4])
a_dot_b=np.dot(a,b)

In [39]:
a_dot_b

30

In [40]:
a=[1,2,3,4,5]
print(np.diag(a))

[[1 0 0 0 0]
 [0 2 0 0 0]
 [0 0 3 0 0]
 [0 0 0 4 0]
 [0 0 0 0 5]]


In [41]:
d = np.arange(5)  # just like range() 
print(d) 

d[1] = 9.7 
print(d)  # arrays keep their type even if elements changed 

print(d*0.4)  # operations create a new array, with new type 

d = np.arange(5, dtype=np.float)  
print(d) 

np.arange(3, 7, 0.5)  # arbitrary start, stop and step 


[0 1 2 3 4]
[0 9 2 3 4]
[0.  3.6 0.8 1.2 1.6]
[0. 1. 2. 3. 4.]


array([3. , 3.5, 4. , 4.5, 5. , 5.5, 6. , 6.5])

In [42]:
np.arange(4.0)

array([0., 1., 2., 3.])

# Pandas

In [43]:
import pandas as pd

In [44]:
df = pd.read_csv('mobile_clean.csv')

FileNotFoundError: [Errno 2] File mobile_clean.csv does not exist: 'mobile_clean.csv'

In [None]:
df.head()

In [None]:
type(df)

In [None]:
df.tail(5)

In [None]:
df.loc[0] #loc is used to access any particular row

In [None]:
df_short=df[10:30]
df_short.shape

In [None]:
df_thin = df[['stand_by_time', 'expandable_memory', 'price', 'battery_capacity', 'is_liked']]

In [None]:
df_thin

In [None]:
df_liked = df_thin[df_thin['is_liked'] == 1]
df_liked.shape

In [None]:
df_thin['price'].describe()

In [None]:
df_thin.describe()

In [None]:
df_thin[df_thin['is_liked'] == 1]['price'].mean()

In [None]:
df_thin[df_thin['is_liked'] == 0]['price'].mean()

In [None]:
g = df_thin.groupby(['is_liked'])

In [None]:
g

In [None]:
for key, df_key in g:
  print(key)
  print(df_key)

In [None]:
df_thin.groupby(['is_liked']).describe()

In [None]:
x=df.iloc[:,:-1]

In [None]:
x.head()

In [None]:
type(x)

In [None]:
y=df.iloc[:,-1]

In [None]:
type(x)

In [None]:
y.values

# SkLearn

In [None]:
import sklearn.datasets
import numpy as np

In [None]:
breast_cancer = sklearn.datasets.load_breast_cancer()

In [None]:
X = breast_cancer.data
Y = breast_cancer.target

In [None]:
print(X)
print(Y)

In [None]:
print(X.shape, Y.shape)

In [None]:
data = pd.DataFrame(breast_cancer.data, columns=breast_cancer.feature_names)

In [None]:
data['class'] = breast_cancer.target
data.head()

In [None]:
data.describe()

In [None]:
print(data['class'].value_counts())

In [None]:
print(breast_cancer.target_names)

In [None]:
data.groupby('class').mean()

# Train Test Split


In [None]:
from sklearn.cross_validation import train_test_split

In [None]:
X = data.drop('class', axis=1)
Y = data['class']

In [None]:
type(X)

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y)

In [None]:
print(Y.shape, Y_train.shape, Y_test.shape)

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1)


In [None]:
print(Y.mean(), Y_train.mean(), Y_test.mean())

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1, stratify = Y)