<h1>Numpy Section From Book</h1>
<br>
<h3>Array Creation, Indexing and Operations</h3>

In [15]:
import numpy as np
# Element wise operations
data = np.arange(11)
print(data * data) # Each element is multiplied by 2, this is an element wise operation not cross product
data2 = np.arange(10)
# print(data + data2) this won't work because the arrays don't have the same shape

[  0   1   4   9  16  25  36  49  64  81 100]


In [11]:
# Its possible to create numpy arrays from any sequence, so what about a dictionary? 
data = np.array({
    1:[i for i in range(5)],
    2: [i for i in range(5, 10)],
    3: "Hello"
})
print(data.ndim) # 0D array? so it is just an element of an array
print(data.dtype)
print(data)
# You can't create array from a dict it will just create a single array element, maybe create from a list of dicts

0
object
{1: [0, 1, 2, 3, 4], 2: [5, 6, 7, 8, 9], 3: 'Hello'}


In [12]:
# Methods for creating arrays - not exhaustive but a good amount
arr1 = np.arange(1, 15, 2)
arr2 = np.ones_like(arr1)
arr3 = np.random.rand(5)
arr4 = np.identity(4)
arr5 = np.empty_like(arr4)
print(arr1, arr2, arr3, arr4, arr5, sep='\n')

[ 1  3  5  7  9 11 13]
[1 1 1 1 1 1 1]
[0.532781   0.10809884 0.6862789  0.92068998 0.27868183]
[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]
[[2.11312272e-316 0.00000000e+000 6.92455134e-310 6.92455134e-310]
 [6.92455135e-310 6.92455134e-310 6.92455134e-310 6.92455135e-310]
 [6.92455134e-310 6.92455134e-310 6.92455134e-310 6.92455133e-310]
 [6.92455134e-310 6.92455134e-310 6.92455133e-310 6.92455134e-310]]


In [13]:
# You can change types of numpy arrays
print(f"Type of arr1 before change {arr1.dtype}")
arr1 = arr1.astype(np.uint8)
print(f"Type of arr1 after changing is {arr1.dtype}")
# One very useful case is to convert a list of strings representing numbers into a list of numbers

Type of arr1 before change int64
Type of arr1 after changing is uint8


In [25]:
# Indexing arrays
arr2D = np.random.randint(10, size=(3,5))
print(arr2D)
print(arr2D[-1, 2])
# For multidimensional arrays a slice slices along the arrays in that dimension e.g
# Perform a slice on top dimension
print(arr2D[:-1])
# Perform a slice on bottom dimension
print(arr2D[0, :-1])

names = np.array(["Bob", "Joe", "Will", "Bob", "Will", "Joe", "Joe"])
data = np.array([[4, 7], [0, 2], [-5, 6], [0, 0], [1, 2],
                [-12, -4], [3, 4]])
# Suppose that each name in names is represented by a vector in data, if you want to get all the vectors that represent Bob
# You could use a condition to index data
bob_vectors = data[names == 'Bob'] # No need to do .copy() since it already creates a copy
data < 0
# If I wanted to get the values in the last diagonal I'd use lists to index
print(arr2D[[0, 1, 2], [2, 3, 4]])

[[5 7 1 9 9]
 [4 0 2 8 9]
 [8 3 7 4 8]]
7
[[5 7 1 9 9]
 [4 0 2 8 9]]
[5 7 1 9]
[1 8 8]


<h3>Numpy Random Module</h3>

In [30]:
# Can generate arrays of random values, distribution values e.t.c
rng = np.random.default_rng(seed=42) # Used to create a random number generator with a specific seed
arr1 = rng.standard_normal(size=(4, 4)) # Generates a 4 x 4 matrix of values following a normal distribution
print(arr1)
arr2 = rng.uniform(low=5, high=10, size=(4, 4))
print(arr2)

[[ 0.30471708 -1.03998411  0.7504512   0.94056472]
 [-1.95103519 -1.30217951  0.1278404  -0.31624259]
 [-0.01680116 -0.85304393  0.87939797  0.77779194]
 [ 0.0660307   1.12724121  0.46750934 -0.85929246]]
[[7.77292394 5.31908628 9.13815586 8.158322  ]
 [8.7904387  6.77262984 9.85349012 9.46560561]
 [8.89191749 5.97319354 7.33360502 5.21901883]
 [5.77144746 8.41524477 8.72381078 9.83754866]]


<h3>Universal Functions</h3>

In [35]:
# Seed random number generator
gen = np.random.default_rng(seed=42)
arr1 = gen.integers(0, high=10, size=(20))
print(np.isnan(arr1))
print(np.square(arr1))

[False False False False False False False False False False False False
 False False False False False False False False]
[ 0 49 36 16 16 64  0 36  4  0 25 81 49 49 49 49 25  1 64 16]


<h2>Array Oriented Programming</h2>

In [36]:
# Use np.meshgrid to create 2 matrices from lists
arr1 = np.arange(11)
A, B = np.meshgrid(arr1, arr1)
print(A)
print(B)

[[ 0  1  2  3  4  5  6  7  8  9 10]
 [ 0  1  2  3  4  5  6  7  8  9 10]
 [ 0  1  2  3  4  5  6  7  8  9 10]
 [ 0  1  2  3  4  5  6  7  8  9 10]
 [ 0  1  2  3  4  5  6  7  8  9 10]
 [ 0  1  2  3  4  5  6  7  8  9 10]
 [ 0  1  2  3  4  5  6  7  8  9 10]
 [ 0  1  2  3  4  5  6  7  8  9 10]
 [ 0  1  2  3  4  5  6  7  8  9 10]
 [ 0  1  2  3  4  5  6  7  8  9 10]
 [ 0  1  2  3  4  5  6  7  8  9 10]]
[[ 0  0  0  0  0  0  0  0  0  0  0]
 [ 1  1  1  1  1  1  1  1  1  1  1]
 [ 2  2  2  2  2  2  2  2  2  2  2]
 [ 3  3  3  3  3  3  3  3  3  3  3]
 [ 4  4  4  4  4  4  4  4  4  4  4]
 [ 5  5  5  5  5  5  5  5  5  5  5]
 [ 6  6  6  6  6  6  6  6  6  6  6]
 [ 7  7  7  7  7  7  7  7  7  7  7]
 [ 8  8  8  8  8  8  8  8  8  8  8]
 [ 9  9  9  9  9  9  9  9  9  9  9]
 [10 10 10 10 10 10 10 10 10 10 10]]


In [39]:
# Building arrays based on conditions
# Python Way (alower and hard to do for multidimensional arrays)
arr1 = [i for i in range(11)]
arr2 = [x * 2 if x % 2 == 0 else x for x in arr1]
# Numpy way
arr1 = np.array(arr1)
arr2 = np.where(arr1 % 2 == 0, arr1 * 2, arr1)
arr2

array([ 0,  1,  4,  3,  8,  5, 12,  7, 16,  9, 20])

In [43]:
# You can also do mathematical and statistical functions on all the data in the array or on an axis in array
arr1 = np.reshape(np.arange(1, 21), newshape=(2, 10))
print(arr1)
# Get mean of all values
print(f"Mean of all items is {arr1.mean()}")
# Get a list of the means of each column
print(f"Mean of each row is {np.mean(arr1, axis=0)}")
# Get a list of the means of each row
print(f"Mean of each row is {np.mean(arr1, axis=1)}")

[[ 1  2  3  4  5  6  7  8  9 10]
 [11 12 13 14 15 16 17 18 19 20]]
Mean of all items is 10.5
Mean of each row is [ 6.  7.  8.  9. 10. 11. 12. 13. 14. 15.]
Mean of each row is [ 5.5 15.5]


In [51]:
rng = np.random.default_rng(42)
# Sorting
arr1 = np.arange(20, 0, -1).reshape((10, 2))
rng.shuffle(arr1)
print(arr1)
# Sort the entire arr
arr1.sort(axis=1)
print(arr1)

[[10  9]
 [ 8  7]
 [20 19]
 [ 6  5]
 [14 13]
 [16 15]
 [12 11]
 [ 2  1]
 [18 17]
 [ 4  3]]
[[ 9 10]
 [ 7  8]
 [19 20]
 [ 5  6]
 [13 14]
 [15 16]
 [11 12]
 [ 1  2]
 [17 18]
 [ 3  4]]


In [56]:
# Simulate a random walk using gained knowledge
rng = np.random.default_rng(42) # Set seed

# We are standing in a building with 60 floors, we go up a floor if we get heads on a coin flip and go down if we flip tails
# Let's see the highest we can go with 100 coin flips
no_coin_flips = 100
# 0 will be tails and 1 will be head
coin_flips = rng.integers(0, 2, size=no_coin_flips)

initial_floor = 0
walk = np.where(coin_flips == 1, 1, -1)
walk = np.cumsum(walk) # Walk

highest_floor = walk.max()
print(highest_floor)
lowest_floor = walk.min()
print(lowest_floor)


9
-2


<h1>Pandas Section From Book</h1>
<h2>Introduction</h2>

In [6]:
from pandas import Series
# Pandas has 2 main data structures - Series and Dataframes
# Series - 1D array like object with the same datatype and an array of indexes
ser1 = Series([1, 2, 3, -4, 5])
print(ser1)
# How to create a series with a specific index
ser2 = Series([100, 100, 50, 75, 40], index=['Roman', 'Rico', 'Oktombo', 'Ankantele', 'Uvuvwevwevwe'])
print(ser2[['Roman', 'Rico']])

0    1
1    2
2    3
3   -4
4    5
dtype: int64
Roman    100
Rico     100
dtype: int64


In [17]:
# You can still use numpy functions with Series objects
import numpy as np
ser3 = Series(np.arange(1, 21)) # Different ways to create 1D arrays can be used to make Series
print(ser3)
# Element Wise Operations
ser4 = ser3 + 2 # Do element wise operations with scalars
# print(ser4)
ser5 = ser3 * ser3  # Do element wise operations with other arrays
# print(ser5)
ser6 = np.cumsum(ser3)
# print(ser6)  # Use universal functions with Series
ser7 = np.mean(ser1)
# print(ser7) # Do array operations that don't result in an array
# ser8 = Series(np.identity(4)) # Can't make series from multidimensional array
arr1 = np.unique(ser1)
# print(arr1)
ser8 = np.where(ser1 % 2 == 0, ser1 * 2, ser1)
print(ser8) # Returned an array

0      1
1      2
2      3
3      4
4      5
5      6
6      7
7      8
8      9
9     10
10    11
11    12
12    13
13    14
14    15
15    16
16    17
17    18
18    19
19    20
dtype: int64
[ 1  4  3 -8  5]


In [20]:
# Series objects can also be used like dictionaries
from re import I


print('Roman' in ser2)
# Create a series from a dictionary
ser9 = Series({'Luffy': 3_000_000_000, 'Zoro': 1_100_000_000, 'Sanji': 1_050_000_000, 'Jinbe': 1_110_000_000, 'Robin': 1_000_000_000})
print(ser9)
# If I wanted to make sure ser9 had values in a certain order I can pass the index parameter with the order of values I want
ser9 = Series({'Luffy': 3_000_000_000, 'Zoro': 1_100_000_000, 'Sanji': 1_050_000_000, 'Jinbe': 1_110_000_000, 'Robin': 1_000_000_000}, index=['Jinbe', 'Robin', 'Sanji', 'Luffy', 'Robin'])
ser9


True
Luffy    3000000000
Zoro     1100000000
Sanji    1050000000
Jinbe    1110000000
Robin    1000000000
dtype: int64


Jinbe    1110000000
Robin    1000000000
Sanji    1050000000
Luffy    3000000000
Robin    1000000000
dtype: int64

In [22]:
print(np.isnan(ser9))

Jinbe    False
Robin    False
Sanji    False
Luffy    False
Robin    False
dtype: bool


In [31]:
from pandas import DataFrame
df1 = DataFrame({
    'name': ['Luffy', 'Zoro', 'Jinbe', 'Robin', 'Sanji'],
    'bounties': [3_000_000_000, 1_100_000_000, 1_110_000_000, 1_000_000_000, 1_050_000_000]
})
df1.set_index('name' ,inplace=True)
df1 


Unnamed: 0_level_0,bounties
name,Unnamed: 1_level_1
Luffy,3000000000
Zoro,1100000000
Jinbe,1110000000
Robin,1000000000
Sanji,1050000000
