Numpy is used for numerical analysis

This will cover

- Indexing
- Aggregate Function
- Logical Operations
- NPS Case Study

### Additional 
FitBit Case study

In [1]:
import numpy as np

# NPS - Net Promoter Score
1 - 6    detractor
7 - 8    Neutral
9 - 10   Promoter



### When promoter is above than 70% company is performing very good

## How to load text file

`numpy.loadtxt(fname, dtype=<class 'float'>, comments='#', delimiter=None, converters=None, skiprows=0, usecols=None, unpack=False, ndmin=0, encoding=None, max_rows=None, *, quotechar=None, like=None)`

In [2]:
score = np.loadtxt("Files/survey.txt",dtype=int)

In [3]:
score

array([ 7, 10,  5, ...,  5,  9, 10])

In [4]:
# Starting 5 elemts of the score
score[:5]

array([ 7, 10,  5,  9,  9])

In [5]:
type(score)

numpy.ndarray

In [6]:
score.ndim

1

In [7]:
score.shape

(1167,)

# Aggregators

In [8]:
score.min()

1

In [9]:
score.max()

10

### NPS Case Study Solution

In [10]:
detractors = score[score<=6]

In [12]:
detractors

array([5, 4, 4, 5, 1, 5, 5, 1, 4, 5, 4, 4, 4, 5, 1, 4, 1, 4, 1, 5, 5, 1,
       1, 4, 1, 5, 4, 1, 1, 4, 1, 5, 1, 4, 4, 1, 1, 1, 1, 1, 1, 1, 4, 1,
       1, 5, 5, 5, 4, 4, 1, 4, 1, 4, 1, 5, 1, 1, 5, 4, 4, 4, 4, 1, 4, 5,
       4, 4, 1, 1, 5, 5, 1, 5, 1, 5, 5, 4, 5, 4, 1, 1, 1, 1, 4, 1, 4, 4,
       5, 4, 1, 1, 1, 1, 5, 4, 5, 5, 4, 1, 5, 1, 4, 4, 1, 1, 1, 4, 4, 5,
       5, 4, 5, 5, 5, 1, 4, 1, 5, 5, 1, 5, 1, 1, 5, 5, 4, 4, 1, 4, 4, 4,
       1, 1, 4, 4, 4, 5, 5, 1, 1, 4, 1, 4, 1, 4, 1, 4, 1, 4, 1, 5, 4, 4,
       5, 1, 4, 5, 5, 5, 1, 5, 4, 1, 1, 5, 5, 5, 4, 5, 4, 4, 1, 4, 4, 4,
       4, 5, 1, 5, 5, 1, 4, 4, 5, 1, 1, 4, 5, 5, 5, 1, 4, 5, 5, 4, 1, 5,
       5, 5, 1, 1, 5, 5, 1, 1, 1, 4, 5, 5, 4, 4, 4, 5, 1, 4, 1, 4, 5, 4,
       5, 5, 1, 5, 1, 5, 5, 1, 4, 5, 5, 4, 1, 5, 1, 4, 1, 4, 1, 1, 1, 1,
       1, 1, 4, 1, 5, 4, 5, 1, 5, 1, 5, 4, 4, 4, 4, 5, 5, 1, 4, 1, 5, 5,
       1, 4, 1, 1, 4, 4, 4, 4, 1, 4, 1, 1, 4, 1, 5, 4, 1, 1, 5, 4, 5, 4,
       4, 4, 1, 5, 5, 1, 4, 5, 4, 4, 4, 1, 4, 1, 4,

In [13]:
promoters = score[score>=9]

In [26]:
tot_promoters = promoters.shape[0]

In [24]:
tot_detractors = detractors.shape[0]

In [20]:
total = score.shape[0]

In [25]:
detractor_per = round((tot_detractors/total)*100)

In [27]:
promoter_per = round((tot_promoters/total)*100)

In [29]:
print(f"Total Percentage for promoters is : {promoter_per}% \nTotal Percentage for promoters is : {detractor_per}%")

Total Percentage for promoters is : 52% 
Total Percentage for promoters is : 28%


### 2D Matrix

`arr.reshape(r,c)`

In [32]:
np.arange(16).shape

(16,)

In [34]:
#Reshape
np.arange(16).reshape(4,4)

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [36]:
np.arange(16).reshape(4,5) # the r and c should be according to the data

ValueError: cannot reshape array of size 16 into shape (4,5)

In [37]:
np.arange(16).reshape(4,4).shape

(4, 4)

### Task
- Create an array - 10,20,30,...90
- Reshape 3 x 3

In [43]:
arr = np.arange(10,100,10)

In [44]:
arr.reshape(3,3)

array([[10, 20, 30],
       [40, 50, 60],
       [70, 80, 90]])

### mention -1 to the column it will find out itself how to devide the columns and vice versa but we have to mention either row or column

In [47]:
np.arange(10,110,10).reshape(2,-1)

array([[ 10,  20,  30,  40,  50],
       [ 60,  70,  80,  90, 100]])

In [48]:
np.arange(10,110,10).reshape(-1,5)

array([[ 10,  20,  30,  40,  50],
       [ 60,  70,  80,  90, 100]])

In [49]:
np.arange(10,110,10).reshape(-1,-1)

ValueError: can only specify one unknown dimension

# Indexing

In [50]:
arr = np.arange(9)

In [52]:
arr[3:]

array([3, 4, 5, 6, 7, 8])

In [53]:
arr[:5]

array([0, 1, 2, 3, 4])

In [54]:
arr[[3,6,8]]

array([3, 6, 8])

In [55]:
np.arange(10,101,10)

array([ 10,  20,  30,  40,  50,  60,  70,  80,  90, 100])

### Indexing with 2D Arrays

In [57]:
arr = np.arange(9).reshape(3,3)

In [58]:
arr

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [59]:
arr[1][1]

4

In [61]:
arr[1,1] #arr[r,c]

4

In [64]:
arr[1:3,0:2]

array([[3, 4],
       [6, 7]])

In [72]:
arr[1:3,1:2]

array([[4],
       [7]])

In [73]:
arr[1:3,1]

array([4, 7])

In [74]:
arr[[1,2]]

array([[3, 4, 5],
       [6, 7, 8]])

In [77]:
arr[[1,2,3]] # will throw error because index 3 is not present

IndexError: index 3 is out of bounds for axis 0 with size 3

In [76]:
arr[[0,1,2],[0,1,2]] # concider as r1,r2,r3 and c1,c2,c3

array([0, 4, 8])

array([[0, 1, 2, 3, 4, 5, 6, 7, 8]])

# Quiz

In [95]:
a = [1,2,3,4,5,6,7,8,9]
b = [8,7,6]
a[3:] = b[::-2]

In [103]:
a #answer

[1, 2, 3, 6, 8]

In [97]:
arr[[0,1,1],[1,0,1]]

array([1, 3, 4])

In [98]:
arr = np.arange(12).reshape(3,4)

In [99]:
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [102]:
# Every elemnt greater than 6
arr[arr>6]

array([ 7,  8,  9, 10, 11])

# Quiz

In [104]:
a = np.arange(6)

In [105]:
a

array([0, 1, 2, 3, 4, 5])

In [106]:
mask = (a%2 == 0)

In [107]:
a[mask] = -1

In [108]:
a

array([-1,  1, -1,  3, -1,  5])

# Aggregate Functions


### axis 
- 0 = column
- 1 = row

In [122]:
arr = np.arange(1,1000,23/7).reshape(5,-1)

In [123]:
arr

array([[  1.        ,   4.28571429,   7.57142857,  10.85714286,
         14.14285714,  17.42857143,  20.71428571,  24.        ,
         27.28571429,  30.57142857,  33.85714286,  37.14285714,
         40.42857143,  43.71428571,  47.        ,  50.28571429,
         53.57142857,  56.85714286,  60.14285714,  63.42857143,
         66.71428571,  70.        ,  73.28571429,  76.57142857,
         79.85714286,  83.14285714,  86.42857143,  89.71428571,
         93.        ,  96.28571429,  99.57142857, 102.85714286,
        106.14285714, 109.42857143, 112.71428571, 116.        ,
        119.28571429, 122.57142857, 125.85714286, 129.14285714,
        132.42857143, 135.71428571, 139.        , 142.28571429,
        145.57142857, 148.85714286, 152.14285714, 155.42857143,
        158.71428571, 162.        , 165.28571429, 168.57142857,
        171.85714286, 175.14285714, 178.42857143, 181.71428571,
        185.        , 188.28571429, 191.57142857, 194.85714286,
        198.14285714],
       [201.42857

In [124]:
np.min(arr)

1.0

In [125]:
arr.min()

1.0

In [126]:
np.max(arr)

999.8571428571428

In [127]:
np.mean(arr)

500.42857142857133

In [128]:
np.median(arr)

500.4285714285714

In [129]:
np.min(arr,axis=1)

array([  1.        , 201.42857143, 401.85714286, 602.28571429,
       802.71428571])

In [130]:
np.max(arr,axis=0)

array([802.71428571, 806.        , 809.28571429, 812.57142857,
       815.85714286, 819.14285714, 822.42857143, 825.71428571,
       829.        , 832.28571429, 835.57142857, 838.85714286,
       842.14285714, 845.42857143, 848.71428571, 852.        ,
       855.28571429, 858.57142857, 861.85714286, 865.14285714,
       868.42857143, 871.71428571, 875.        , 878.28571429,
       881.57142857, 884.85714286, 888.14285714, 891.42857143,
       894.71428571, 898.        , 901.28571429, 904.57142857,
       907.85714286, 911.14285714, 914.42857143, 917.71428571,
       921.        , 924.28571429, 927.57142857, 930.85714286,
       934.14285714, 937.42857143, 940.71428571, 944.        ,
       947.28571429, 950.57142857, 953.85714286, 957.14285714,
       960.42857143, 963.71428571, 967.        , 970.28571429,
       973.57142857, 976.85714286, 980.14285714, 983.42857143,
       986.71428571, 990.        , 993.28571429, 996.57142857,
       999.85714286])

In [131]:
np.max(arr,axis=1)

array([198.14285714, 398.57142857, 599.        , 799.42857143,
       999.85714286])

In [132]:
np.median(arr,axis=1)

array([ 99.57142857, 300.        , 500.42857143, 700.85714286,
       901.28571429])

In [133]:
np.median(arr,axis=0)

array([401.85714286, 405.14285714, 408.42857143, 411.71428571,
       415.        , 418.28571429, 421.57142857, 424.85714286,
       428.14285714, 431.42857143, 434.71428571, 438.        ,
       441.28571429, 444.57142857, 447.85714286, 451.14285714,
       454.42857143, 457.71428571, 461.        , 464.28571429,
       467.57142857, 470.85714286, 474.14285714, 477.42857143,
       480.71428571, 484.        , 487.28571429, 490.57142857,
       493.85714286, 497.14285714, 500.42857143, 503.71428571,
       507.        , 510.28571429, 513.57142857, 516.85714286,
       520.14285714, 523.42857143, 526.71428571, 530.        ,
       533.28571429, 536.57142857, 539.85714286, 543.14285714,
       546.42857143, 549.71428571, 553.        , 556.28571429,
       559.57142857, 562.85714286, 566.14285714, 569.42857143,
       572.71428571, 576.        , 579.28571429, 582.57142857,
       585.85714286, 589.14285714, 592.42857143, 595.71428571,
       599.        ])

In [134]:
np.sum(arr,axis=1)

array([ 6073.85714286, 18300.        , 30526.14285714, 42752.28571429,
       54978.42857143])

In [136]:
prices = np.array([10,20,25,40,50])

In [137]:
budget = 50

In [145]:
mask = prices>budget

In [142]:
mask

array([False, False, False, False, False])

In [141]:
np.any(mask)

False

In [143]:
budget = 20

In [146]:
mask

array([False, False,  True,  True,  True])

In [148]:
np.any(mask) # If even a single value is true it will give the output as true

True

In [150]:
np.all(mask) # if only all the values are true it will return the true

False

In [155]:
np.where(prices<20,"wohoooo",prices) #np.where(condition,true_action,false_action)

array(['wohoooo', '20', '25', '40', '50'], dtype='<U11')

In [156]:
np.where(prices)

(array([0, 1, 2, 3, 4], dtype=int64),)

In [157]:
prices

array([10, 20, 25, 40, 50])

# FitBit Case Study

In [159]:
data = np.loadtxt("Files/fit.txt",dtype=str)

In [160]:
data

array([['06-10-2017', '5464', 'Neutral', '181', '5', 'Inactive'],
       ['07-10-2017', '6041', 'Sad', '197', '8', 'Inactive'],
       ['08-10-2017', '25', 'Sad', '0', '5', 'Inactive'],
       ['09-10-2017', '5461', 'Sad', '174', '4', 'Inactive'],
       ['10-10-2017', '6915', 'Neutral', '223', '5', 'Active'],
       ['11-10-2017', '4545', 'Sad', '149', '6', 'Inactive'],
       ['12-10-2017', '4340', 'Sad', '140', '6', 'Inactive'],
       ['13-10-2017', '1230', 'Sad', '38', '7', 'Inactive'],
       ['14-10-2017', '61', 'Sad', '1', '5', 'Inactive'],
       ['15-10-2017', '1258', 'Sad', '40', '6', 'Inactive'],
       ['16-10-2017', '3148', 'Sad', '101', '8', 'Inactive'],
       ['17-10-2017', '4687', 'Sad', '152', '5', 'Inactive'],
       ['18-10-2017', '4732', 'Happy', '150', '6', 'Active'],
       ['19-10-2017', '3519', 'Sad', '113', '7', 'Inactive'],
       ['20-10-2017', '1580', 'Sad', '49', '5', 'Inactive'],
       ['21-10-2017', '2822', 'Sad', '86', '6', 'Inactive'],
       ['22-10

In [162]:
# Columns
# Date, Step_Count, Mood, Calories Burnt, Hours Of Sleep, Feeling Active

In [163]:
#EDA -  Explore Data Analysis

In [164]:
data.shape

(96, 6)

In [165]:
data[0]

array(['06-10-2017', '5464', 'Neutral', '181', '5', 'Inactive'],
      dtype='<U10')

In [171]:
# Transpose - Change rows to column and column to rows
arr = np.arange(10,101,10)
arr.reshape(2,-1)
arr.T

array([ 10,  20,  30,  40,  50,  60,  70,  80,  90, 100])

In [173]:
dataT = data.T

In [174]:
# data[0] - Date
# data[1] - step count and so on

In [175]:
Date, Step_Count, Mood, Calories_Burnt, Hours_Of_Sleep, Feeling_Active = dataT

In [176]:
Date

array(['06-10-2017', '07-10-2017', '08-10-2017', '09-10-2017',
       '10-10-2017', '11-10-2017', '12-10-2017', '13-10-2017',
       '14-10-2017', '15-10-2017', '16-10-2017', '17-10-2017',
       '18-10-2017', '19-10-2017', '20-10-2017', '21-10-2017',
       '22-10-2017', '23-10-2017', '24-10-2017', '25-10-2017',
       '26-10-2017', '27-10-2017', '28-10-2017', '29-10-2017',
       '30-10-2017', '31-10-2017', '01-11-2017', '02-11-2017',
       '03-11-2017', '04-11-2017', '05-11-2017', '06-11-2017',
       '07-11-2017', '08-11-2017', '09-11-2017', '10-11-2017',
       '11-11-2017', '12-11-2017', '13-11-2017', '14-11-2017',
       '15-11-2017', '16-11-2017', '17-11-2017', '18-11-2017',
       '19-11-2017', '20-11-2017', '21-11-2017', '22-11-2017',
       '23-11-2017', '24-11-2017', '25-11-2017', '26-11-2017',
       '27-11-2017', '28-11-2017', '29-11-2017', '30-11-2017',
       '01-12-2017', '02-12-2017', '03-12-2017', '04-12-2017',
       '05-12-2017', '06-12-2017', '07-12-2017', '08-12

In [177]:
Step_Count = Step_Count.astype(int)

In [178]:
Step_Count

array([5464, 6041,   25, 5461, 6915, 4545, 4340, 1230,   61, 1258, 3148,
       4687, 4732, 3519, 1580, 2822,  181, 3158, 4383, 3881, 4037,  202,
        292,  330, 2209, 4550, 4435, 4779, 1831, 2255,  539, 5464, 6041,
       4068, 4683, 4033, 6314,  614, 3149, 4005, 4880, 4136,  705,  570,
        269, 4275, 5999, 4421, 6930, 5195,  546,  493,  995, 1163, 6676,
       3608,  774, 1421, 4064, 2725, 5934, 1867, 3721, 2374, 2909, 1648,
        799, 7102, 3941, 7422,  437, 1231, 1696, 4921,  221, 6500, 3575,
       4061,  651,  753,  518, 5537, 4108, 5376, 3066,  177,   36,  299,
       1447, 2599,  702,  133,  153,  500, 2127, 2203])

In [180]:
total_moods = np.unique(Mood)

In [187]:
total_moods

array(['Happy', 'Neutral', 'Sad'], dtype='<U10')

In [181]:
Mood == "Happy"

array([False, False, False, False, False, False, False, False, False,
       False, False, False,  True, False, False, False, False, False,
       False, False, False, False, False,  True, False,  True,  True,
        True,  True,  True,  True,  True, False,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True, False,  True,
        True,  True,  True,  True,  True,  True,  True,  True, False,
       False,  True,  True,  True,  True,  True,  True,  True, False,
       False, False, False, False, False, False,  True, False, False,
       False, False, False, False, False, False, False, False, False,
        True, False,  True, False, False, False, False, False, False,
       False, False,  True, False, False,  True])

In [182]:
Step_Count[Mood=="Happy"]

array([4732,  330, 4550, 4435, 4779, 1831, 2255,  539, 5464, 4068, 4683,
       4033, 6314,  614, 3149, 4005, 4880, 4136,  705,  269, 4275, 5999,
       4421, 6930, 5195,  546,  493,  995, 3608,  774, 1421, 4064, 2725,
       5934, 1867, 7422, 5537, 5376,  153, 2203])

In [183]:
Step_Count[Mood=="Sad"]

array([6041,   25, 5461, 4545, 4340, 1230,   61, 1258, 3148, 4687, 3519,
       1580, 2822,  181, 6676, 3721, 1648,  799, 1696,  221, 4061,  651,
        753,  518,  177,   36,  299,  702,  133])

In [190]:
Calories_Burnt[Mood=="Happy"].shape[0]

40

In [189]:
Calories_Burnt[Mood=="Sad"].shape[0]

29

# Task

When person sleep more - When he is happy or sad?

Solution will be in start of [Numpy3 File](Numpy/Tutorial/Numpy3.ipynb)

In [191]:
import numpy as np
x = np.array([[200,200,200],[300,300,300],[400,400,400]])
v = np.array([200,300,400])
print((x / v[:,None])[1][1]) 

1.0


In [192]:
v[:,None]

array([[200],
       [300],
       [400]])

In [193]:
(x / v[:,None])


array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [194]:
import numpy as np
p = np.array([[0], [10], [20]])
q = np.array([10, 11, 12]) 
print((p + q)[1][1]) 

21


In [195]:
(p + q)

array([[10, 11, 12],
       [20, 21, 22],
       [30, 31, 32]])

In [196]:
q

array([10, 11, 12])

In [197]:
import numpy as np
a = np.array([[16, 5], [81, 6], [33, 1]])
x=np.transpose(a).reshape(2,3)
print(x.flatten())

[16 81 33  5  6  1]


In [198]:
np.transpose(a)

array([[16, 81, 33],
       [ 5,  6,  1]])

In [199]:
a

array([[16,  5],
       [81,  6],
       [33,  1]])

In [200]:
np.transpose(a).reshape(2,3)

array([[16, 81, 33],
       [ 5,  6,  1]])

In [201]:
np.transpose(a).reshape(2,3).flatten()

array([16, 81, 33,  5,  6,  1])