In [1]:
import pandas as pd
import numpy as np

from matplotlib import pyplot as plt
from scipy.stats import linregress 

In [2]:

my_arr = np.arange(1_000_000)


In [3]:
my_list = list(range(1_000_000))


In [4]:
%timeit my_arr2 = my_arr * 2

700 µs ± 26 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [5]:
%timeit my_list2 = [x * 2 for x in my_list]

27.4 ms ± 1.12 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [6]:
rand_array = np.random.rand(2, 3)

In [7]:
rand_array

array([[0.6585195 , 0.65354961, 0.85015836],
       [0.13434509, 0.19438543, 0.78485353]])

In [8]:
b = np.array([[1,2,3], [4,5,6], [7,8,9]])
b

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [9]:
a = np.random.rand(2, 3, 4)
a

array([[[0.12107957, 0.69991933, 0.84163354, 0.58838205],
        [0.13082753, 0.25946093, 0.07128255, 0.09222803],
        [0.24793002, 0.91892033, 0.68870432, 0.7220213 ]],

       [[0.22195696, 0.07475156, 0.70930497, 0.34725914],
        [0.81365285, 0.22305002, 0.75706114, 0.53927159],
        [0.9975453 , 0.35929319, 0.26494649, 0.82442265]]])

In [10]:
a[1, 0: 2, 1:]

array([[0.07475156, 0.70930497, 0.34725914],
       [0.22305002, 0.75706114, 0.53927159]])

In [11]:
a.ndim

3

In [12]:
a.dtype

dtype('float64')

In [13]:
from timeit import Timer

In [14]:
my_list = list(range(10**6))
my_array = np.array(my_list)

In [15]:
def for_add():
    return [item + 1 for item in my_list]

def vec_add():
    return my_array + 1

In [16]:
print('For-loop addition:')
print(min(Timer(for_add).repeat(10, 10)))
print('Vectorized addition:')
print(min(Timer(vec_add).repeat(10, 10)))


For-loop addition:
0.2492127499972412
Vectorized addition:
0.004762833999848226


In [17]:
def for_mul():
    return [item * 2 for item in my_list]
def vec_mul():
    return my_array * 2
print('For-loop multiplication:')
print(min(Timer(for_mul).repeat(10, 10)))
print('Vectorized multiplication:')
print(min(Timer(vec_mul).repeat(10, 10)))

For-loop multiplication:
0.2624412080003822
Vectorized multiplication:
0.006063459000870353


In [18]:
import math
def for_sqrt():
    return [math.sqrt(item) for item in my_list]
def vec_sqrt():
    return np.sqrt(my_array)
print('For-loop square root:')
print(min(Timer(for_sqrt).repeat(10, 10)))
print('Vectorized square root:')
print(min(Timer(vec_sqrt).repeat(10, 10)))

For-loop square root:
0.6102179589979642
Vectorized square root:
0.005471417000080692


In [19]:
sample = np.random.normal()

In [20]:
sample

-0.4035199406324069

In [21]:
sample = np.random.normal(loc=100, scale=10, size=(2, 3))
sample

array([[ 99.20104182, 100.42716335, 103.20682812],
       [101.59265435, 109.71334889, 116.56911586]])

In [22]:
samples = np.random.poisson(lam=100, size=(2, 3))
samples

array([[ 88, 102,  95],
       [104, 100,  92]])

In [23]:
my_dict = {'coll1': [2, 1, 4], 'col2': np.array([3, 4, 2]), 'col3': [4, 5,6], 'col4': np.array([7,3, 4]),}

df = pd.DataFrame(my_dict)
df

Unnamed: 0,coll1,col2,col3,col4
0,2,3,4,7
1,1,4,5,3
2,4,2,6,4


In [24]:
df.loc[2]

coll1    4
col2     2
col3     6
col4     4
Name: 2, dtype: int64

In [25]:
df.loc[2, ['col2', 'col3']]

col2    2
col3    6
Name: 2, dtype: int64

In [26]:
import pandas as pd

In [27]:
my_dict = {'col1': [1, 2], 'col2': np.array([3, 4]),'col3': [5, 6]}

In [28]:
df = pd.DataFrame(my_dict)

In [29]:
df

Unnamed: 0,col1,col2,col3
0,1,3,5
1,2,4,6


In [31]:
my_array = np.array([[1,3,5],[2,4,6]])
alt_df = pd.DataFrame(my_array, columns=['col1', 'col2', 'col3'])
alt_df

Unnamed: 0,col1,col2,col3
0,1,3,5
1,2,4,6


In [32]:
df.loc[0]

col1    1
col2    3
col3    5
Name: 0, dtype: int64

In [33]:

df.iloc[0]

col1    1
col2    3
col3    5
Name: 0, dtype: int64

In [36]:
df.loc[0, ['col2', 'col3']]

col2    3
col3    5
Name: 0, dtype: int64

In [37]:
df['col4'] = [10,10]

In [39]:
df.loc[3] = [1,2,3,4]

In [40]:
df

Unnamed: 0,col1,col2,col3,col4
0,1,3,5,10
1,2,4,6,10
3,1,2,3,4
