In [86]:
import numpy as np 

## Classical arrays 
----

In [87]:
a = np.array([1,2,3], dtype='int32')
b = np.array([1,2,4])

#### Masking

In [88]:
a[a == b]

array([1, 2], dtype=int32)

In [89]:
a[a != b]

array([3], dtype=int32)

#### Shapes and sizes

In [90]:
a.ndim 

1

In [91]:
a.shape

(3,)

In [92]:
np.array([[2], [3]]).ndim

2

In [93]:
c = np.array([[[2,2],[3,3]], [[4,4],[5,5]], [[6,6],[7,7]]])

In [94]:
c.itemsize, c.nbytes

(8, 96)

In [95]:
a.itemsize, a.nbytes

(4, 12)

#### Access and replace elements 

In [96]:
a[0]

1

In [97]:
a = np.array([[1,2,3,4], [5,6,7,8]])

In [98]:
a[1][0]

5

In [99]:
a[0, :] # row then columns

array([1, 2, 3, 4])

In [100]:
a[1, :]

array([5, 6, 7, 8])

In [101]:
a[:, 0] # = c[1]

array([1, 5])

In [102]:
a[:, 1]

array([2, 6])

In [103]:
a[0, 1:-1:2]

array([2])

In [104]:
a

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

In [105]:
a[0, 1:-1:2] = 12

In [106]:
a

array([[ 1, 12,  3,  4],
       [ 5,  6,  7,  8]])

In [303]:
np.ones((4,2,2), dtype='int32')

array([[[1, 1],
        [1, 1]],

       [[1, 1],
        [1, 1]],

       [[1, 1],
        [1, 1]],

       [[1, 1],
        [1, 1]]], dtype=int32)

In [304]:
np.full((2,2), 99)


array([[99, 99],
       [99, 99]])

In [305]:
np.full_like(a, 4)


array([[4, 4, 4, 4],
       [4, 4, 4, 4]])

In [306]:
np.random.randint(-4,8, size=(3,3))

array([[ 4,  6,  0],
       [ 3,  3, -3],
       [ 7, -4,  0]])

In [307]:
a = np.array([1,2,0,0])
b = np.array([1,0,1,0])
a + b

array([2, 2, 1, 0])

In [356]:
a.reshape(2, 2) #indicate the shape to get (multiplcation of all values should be the same)

array([[1, 2],
       [0, 0]])

In [146]:
import pandas as pd

In [357]:
dataset = [["2018-09-19", "98405", "12309888", 1],
           ["2018-09-19", "98402", "12309888", 2], 
           ["2018-09-19", "98402", "12309118", 3]]
df = pd.DataFrame(dataset, columns=["date", "dzip", "sku", "eta"])

In [358]:
df

Unnamed: 0,date,dzip,sku,eta
0,2018-09-19,98405,12309888,1
1,2018-09-19,98402,12309888,2
2,2018-09-19,98402,12309118,3


In [249]:
df["date"] = pd.to_datetime(df.date)

In [250]:
df["day"] = df['date'].dt.day
df["week"] = df['date'].dt.isocalendar().week

In [251]:
sku_dict = {"12309888" : "45000", "12309118": "32000"}

In [252]:
df["ozip"] = df["sku"].map(sku_dict)

In [253]:
zip_dict = {"45000": [32, 65], "32000": [38, 69], "98405": [25, 75], "98402": [26, 75]}

In [254]:
df["dlat"] = df["dzip"].map(zip_dict).apply(lambda x: x[0])
df["dlon"] = df["dzip"].map(zip_dict).apply(lambda x: x[1])

df["olat"] = df["ozip"].map(zip_dict).apply(lambda x: x[0])
df["olon"] = df["ozip"].map(zip_dict).apply(lambda x: x[1])

In [255]:
df

Unnamed: 0,date,dzip,sku,eta,day,week,ozip,dlat,dlon,olat,olon
0,2018-09-19,98405,12309888,1,19,38,45000,25,75,32,65
1,2018-09-19,98402,12309888,2,19,38,45000,26,75,32,65
2,2018-09-19,98402,12309118,3,19,38,32000,26,75,38,69


In [256]:
columns = ["day", "week", "dlat", "dlon", "olat", "olon"]
X = df[columns]
y = df["eta"]

In [257]:
from sklearn.ensemble import RandomForestRegressor

In [258]:
rfr = RandomForestRegressor()
rfr.fit(X, y)

RandomForestRegressor()

In [None]:
rfr.predict()

In [260]:
new_customer = ["2018-09-19", "98405", "12309888"]

In [261]:
def pipeline(array):
    date = pd.to_datetime(array[0])
    day, week = date.day, date.week
    dlat, dlon = zip_dict[array[1]]
    ozip = sku_dict[array[2]]
    olat, olon = zip_dict[ozip]
    return [day, week, dlat, dlon, olat, olon]

In [262]:
customer_processed = pipeline(new_customer)

In [263]:
rfr.predict(pd.DataFrame(customer_processed).T)



array([1.29])

In [264]:
new_customer = [["2018-09-19", "98405", "12309888"], ["2018-09-19", "98405", "12309118"]]

In [265]:
def pipeline_2(array):
    all_arrays = []
    for elem in array:
        date = pd.to_datetime(elem[0])
        day, week = date.day, date.week
        dlat, dlon = zip_dict[elem[1]]
        ozip = sku_dict[elem[2]]
        olat, olon = zip_dict[ozip]
        features = [day, week, dlat, dlon, olat, olon]
        all_arrays.append(features)
    return all_arrays

In [266]:
%%timeit
pipeline_2(new_customer)

139 µs ± 2.03 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [267]:
rfr.predict(pipeline_2(new_customer))



array([1.29, 2.11])

In [310]:
dates = np.array(list(map(np.datetime64, new_customer[:,0])))

In [312]:
new_customer = np.array([["2018-09-19", "98405", "12309888"], ["2018-09-19", "98405", "12309118"]])

In [None]:
dlat, dlon = zip_dict[elem[1]]
        ozip = sku_dict[elem[2]]
        olat, olon = zip_dict[ozip]

In [331]:
all_arrays

[[19, 3], [19, 3]]

In [347]:
def convert_date_pandas(new_customer):
    to_return = []
    for array in new_customer: 
        date = pd.to_datetime(array[0])
        to_return.append([date.day, date.weekday()])
    return to_return

In [349]:
def convert_date_np(new_customer):
    to_return = []
    for date in map(np.datetime64, new_customer[:,0]):
        converted = date.astype(datetime.datetime)
        to_return.append([converted.day, converted.isoweekday() -1])
    return to_return

In [359]:
%%timeit 
convert_date_pandas(new_customer)

150 µs ± 3.27 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [360]:
%%timeit 
convert_date_np(new_customer)

7.24 µs ± 155 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [352]:
convert_date_pandas(new_customer)

[[19, 2], [19, 2]]

In [353]:
convert_date_np(new_customer)

[[19, 2], [19, 2]]

In [327]:
new_customer[:, 1]

array(['98405', '98405'], dtype='<U10')

In [None]:
zip_dict.get

<function dict.get(key, default=None, /)>

In [212]:
def pipeline_3(array):
    np.array(list(map(np.datetime64, new_customer[:,0])))
    [i.astype(datetime.datetime).isoweekday() for i in map(np.datetime64, new_customer[:,0]]
    new_customer[:,0]

IndentationError: expected an indented block (2553754343.py, line 1)

In [281]:
np.array(list(map(np.datetime64, new_customer[:,0])))

ValueError: Error parsing datetime string "<map objec" at position 0

In [271]:
new_customer[:,0].map(np.datetime64)

AttributeError: 'numpy.ndarray' object has no attribute 'map'

In [228]:
pd.to_datetime(new_customer[:,0])

DatetimeIndex(['2019-01-01', '2019-01-01'], dtype='datetime64[ns]', freq=None)

In [242]:
t = '2018-09-19'
t = np.datetime64(t)
day = t.astype(datetime.datetime).isoweekday()

ValueError: Error parsing datetime string "01/01/2019" at position 2

In [243]:
day

3

In [244]:
t = '2018-09-19'
t = np.datetime64(t)
day = t.astype(datetime.datetime).day

19