# 1.How to create strides from a given 1D array?

* Q. From the given 1d array arr, generate a 2d matrix using strides, with a window length of 4 and strides of 2, like [[0,1,2,3], [2,3,4,5], [4,5,6,7]..]

* Input:

* arr = np.arange(15)

* arr
* #> array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14])

In [17]:
import numpy as np
def gen_strides(a, stride_len=5, window_len=5):
    n_strides = ((a.size-window_len)//stride_len) + 1
    return np.array([a[s:(s+window_len)] for s in np.arange(0, n_strides*stride_len, stride_len)])

print(gen_strides(np.arange(15), stride_len=2, window_len=4))


[[ 0  1  2  3]
 [ 2  3  4  5]
 [ 4  5  6  7]
 [ 6  7  8  9]
 [ 8  9 10 11]
 [10 11 12 13]]


# 2.How to fill in missing dates in an irregular series of numpy dates?

* Q. Given an array of a non-continuous sequence of dates. Make it a continuous sequence of dates, by filling in the missing dates.

* Input:

* dates = np.arange(np.datetime64('2018-02-01'), np.datetime64('2018-02-25'), 2)
print(dates)
* ['2018-02-01' '2018-02-03' '2018-02-05' '2018-02-07' '2018-02-09' '2018-02-11' '2018-02-13' '2018-02-15' '2018-02-17' '2018-02-19' '2018-02-21' '2018-02-23']

In [18]:
dates = np.arange(np.datetime64('2018-02-01'), np.datetime64('2018-02-25'), 2)
print(dates)

filled_in = np.array([np.arange(date, (date+d)) for date, d in zip(dates, np.diff(dates))]).reshape(-1)

output = np.hstack([filled_in, dates[-1]])
output

out = []
for date, d in zip(dates, np.diff(dates)):
    out.append(np.arange(date, (date+d)))

filled_in = np.array(out).reshape(-1)

output = np.hstack([filled_in, dates[-1]])
output

['2018-02-01' '2018-02-03' '2018-02-05' '2018-02-07' '2018-02-09'
 '2018-02-11' '2018-02-13' '2018-02-15' '2018-02-17' '2018-02-19'
 '2018-02-21' '2018-02-23']


array(['2018-02-01', '2018-02-02', '2018-02-03', '2018-02-04',
       '2018-02-05', '2018-02-06', '2018-02-07', '2018-02-08',
       '2018-02-09', '2018-02-10', '2018-02-11', '2018-02-12',
       '2018-02-13', '2018-02-14', '2018-02-15', '2018-02-16',
       '2018-02-17', '2018-02-18', '2018-02-19', '2018-02-20',
       '2018-02-21', '2018-02-22', '2018-02-23'], dtype='datetime64[D]')

# 3.How to create a numpy array sequence given only the starting point, length and the step?

* Q. Create a numpy array of length 10, starting from 5 and has a step of 3 between consecutive numbers

In [4]:
length = 10
start = 5
step = 3

def seq(start, length, step):
    end = start + (step*length)
    return np.arange(start, end, step)

seq(start, length, step)

array([ 5,  8, 11, 14, 17, 20, 23, 26, 29, 32])

# 4.How to compute the moving average of a numpy array?

* Q. Compute the moving average of window size 3, for the given 1D array.

* Input:

* np.random.seed(100)

*  = np.random.randint(10, size=10)

In [19]:
def moving_average(a, n=3) :
    ret = np.cumsum(a, dtype=float)
    ret[n:] = ret[n:] - ret[:-n]
    return ret[n - 1:] / n

np.random.seed(100)
Z = np.random.randint(10, size=10)
print('array: ', Z)
moving_average(Z, n=3).round(2)

array:  [8 8 3 7 7 0 4 2 5 2]


array([6.33, 6.  , 5.67, 4.67, 3.67, 2.  , 3.67, 3.  ])

# 5.How to convert numpy's datetime64 object to datetime's datetime object?

* Q. Convert numpy's datetime64 object to datetime's datetime object

* Input: a numpy datetime64 object

* dt64 = np.datetime64('2018-02-25 22:10:10')

In [20]:
dt64 = np.datetime64('2018-02-25 22:10:10')

from datetime import datetime
dt64.tolist()

datetime.datetime(2018, 2, 25, 22, 10, 10)

# 6.How to find the index of n'th repetition of an item in an array

* Q. Find the index of 5th repetition of number 1 in x.

* x = np.array([1, 2, 1, 1, 3, 4, 3, 1, 1, 2, 1, 1, 2])

In [21]:
x = np.array([1, 2, 1, 1, 3, 4, 3, 1, 1, 2, 1, 1, 2])
n = 5

[i for i, v in enumerate(x) if v == 1][n-1]

np.where(x == 1)[0][n-1]

8

# 7.How to subtract a 1d array from a 2d array, where each item of 1d array subtracts from respective row?

* Q. Subtract the 1d array b_1d from the 2d array a_2d, such that each item of b_1d subtracts from respective row of a_2d.

a_2d = np.array([[3,3,3],[4,4,4],[5,5,5]])

b_1d = np.array([1,1,1]

In [22]:
a_2d = np.array([[3,3,3],[4,4,4],[5,5,5]])
b_1d = np.array([1,2,3])

print(a_2d - b_1d[:,None])

[[2 2 2]
 [2 2 2]
 [2 2 2]]


# 8.How to find all the local maxima (or peaks) in a 1d array?

* Q. Find all the peaks in a 1D numpy array a. Peaks are points surrounded by smaller values on both sides.

* Input:

* a = np.array([1, 3, 7, 1, 2, 6, 0, 1])

In [9]:
a = np.array([1, 3, 7, 1, 2, 6, 0, 1])
doublediff = np.diff(np.sign(np.diff(a)))
peak_locations = np.where(doublediff == -2)[0] + 1
peak_locations

array([2, 5], dtype=int64)

# 9.How to compute the euclidean distance between two arrays?

* Q. Compute the euclidean distance between two arrays a and b.

* Input:

a = np.array([1,2,3,4,5])

b = np.array([4,5,6,7,8])

In [23]:
a = np.array([1,2,3,4,5])
b = np.array([4,5,6,7,8])

dist = np.linalg.norm(a-b)
dist

6.708203932499369

# 10.How to drop all missing values from a numpy array?

* Q. Drop all nan values from a 1D numpy array

* Input:

np.array([1,2,3,np.nan,5,6,7,np.nan])

In [11]:
a = np.array([1,2,3,np.nan,5,6,7,np.nan])
a[~np.isnan(a)]

array([1., 2., 3., 5., 6., 7.])

# 11.How to convert a PIL image to numpy array?

* Q. Import the image from the following URL and convert it to a numpy array.

* URL = 'https://upload.wikimedia.org/wikipedia/commons/8/8b/Denali_Mt_McKinley.jpg'

In [24]:
from io import BytesIO
from PIL import Image
import PIL, requests

# Import image from URL
URL = 'https://upload.wikimedia.org/wikipedia/commons/8/8b/Denali_Mt_McKinley.jpg'
response = requests.get(URL)
I = Image.open(BytesIO(response.content))
I = I.resize([150,150])
arr = np.asarray(I)
im = PIL.Image.fromarray(np.uint8(arr))
Image.Image.show(im)

# 12.How to find the grouped mean in numpy?

* Q. Find the mean of a numeric column grouped by a categorical column in a 2D numpy array

* Input:

* url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'

* iris = np.genfromtxt(url, delimiter=',', dtype='object')
* names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')

In [25]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')

numeric_column = iris[:, 1].astype('float')  # sepalwidth
grouping_column = iris[:, 4]  # species

[[group_val, numeric_column[grouping_column==group_val].mean()] for group_val in np.unique(grouping_column)]
output = []
for group_val in np.unique(grouping_column):
    output.append([group_val, numeric_column[grouping_column==group_val].mean()])

output

[[b'Iris-setosa', 3.418],
 [b'Iris-versicolor', 2.7700000000000005],
 [b'Iris-virginica', 2.974]]

# 13.How to find the duplicate records in a numpy array?

* Q. Find the duplicate entries (2nd occurrence onwards) in the given numpy array and mark them as True. First time occurrences should be False.

* Input

* np.random.seed(100)
* a = np.random.randint(0, 5, 10)
* print('Array: ', a)
* #> Array: [0 0 3 0 2 4 2 2 2 2]

In [26]:
np.random.seed(100)
a = np.random.randint(0, 5, 10)
out = np.full(a.shape[0], True)
unique_positions = np.unique(a, return_index=True)[1]
out[unique_positions] = False

print(out)

[False  True False  True False False  True  True  True  True]


# 14.How to compute the min-by-max for each row for a numpy array 2d?

* Q. Compute the min-by-max for each row for given 2d numpy array.

* np.random.seed(100)

* a = np.random.randint(1,10, [5,3])
* a
* #> array([[9, 9, 4],[8, 8, 1],[5, 3, 6],[3, 3, 3],[2, 1, 9]])

In [27]:
np.random.seed(100)
a = np.random.randint(1,10, [5,3])
a
np.apply_along_axis(lambda x: np.min(x)/np.max(x), arr=a, axis=1)

array([0.44444444, 0.125     , 0.5       , 1.        , 0.11111111])

# 15.How to find the maximum value in each row of a numpy array 2d?

* Q. Compute the maximum for each row in the given array.

* np.random.seed(100)

* a = np.random.randint(1,10, [5,3])
* a
* #> array([[9, 9, 4],[8, 8, 1],[5, 3, 6],[3, 3, 3],[2, 1, 9]])

In [28]:
np.random.seed(100)
a = np.random.randint(1,10, [5,3])
a
np.amax(a, axis=1)


array([9, 8, 6, 3, 9])