# A QUICK NUMPY TUTORIAL FOR ML

In [2]:
import numpy as np

### **np.array()** is used to create arrays

In [3]:
# create a 1D array, one set of [] square brackets
one_dimensional_array = np.array([1.2, 2.4, 3.5, 4.7, 6.1, 7.2, 8.3, 9.5])
print("1D arr:\n", one_dimensional_array)
print("\n")

# create a 2D array, two sets of [] square brackets
two_dimensional_array = np.array([[6, 5], [11, 7], [4, 8]])
print("2D arr:\n", two_dimensional_array)

1D arr:
 [1.2 2.4 3.5 4.7 6.1 7.2 8.3 9.5]


2D arr:
 [[ 6  5]
 [11  7]
 [ 4  8]]


### **np.zeros()** is used to create an array of 0's
### **np.ones()** is used to create an array of 1's

In [4]:
zeros = np.zeros([10,6])
print("ZEROS:\n", zeros)
print("\n")

ones = np.ones([10])
print("ONES:\n", ones)

ZEROS:
 [[0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0.]]


ONES:
 [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]


### **np.arange()** is used to generated a sequence of numbers
note the upper value is not included

In [5]:
sequence_of_integers = np.arange(5, 12)
print(sequence_of_integers)

[ 5  6  7  8  9 10 11]


### **np.random.randint()** is used to generate an array of random number

In [6]:
random_integers_between_50_and_100 = np.random.randint(low=50, high=101, size=(6))
print(random_integers_between_50_and_100)

[85 68 51 52 72 80]


### **np.random.random()** is used to generate a random floating points

In [7]:
random_floats_between_0_and_1 = np.random.random([6])
print(random_floats_between_0_and_1)

[0.69359033 0.16475824 0.21607692 0.92720434 0.7155198  0.20828534]


## Mathematical Operations on NumPy Operands

In mathematics an **operand** is the object of a mathematical operation, i.e., it is the object or quantity that is operated on.

Linear algebra requires that the two operands have the same dimensions during addition and subtraction. 
Furthermore, if you want to multiply two vectors or matrices, linear algebra imposes strict rules on the dimensional compatibility of operands.

However NumPy uses **broadcasting** which expands the smaller operand to a compatible dimension

In [8]:
random_floats_between_2_and_3 = random_floats_between_0_and_1 + 2.0
print(random_floats_between_2_and_3)

[2.69359033 2.16475824 2.21607692 2.92720434 2.7155198  2.20828534]


In [9]:
random_integers_between_150_and_300 = random_integers_between_50_and_100 * 3
print(random_integers_between_150_and_300)

[255 204 153 156 216 240]


## Task 1: Create a Linear Dataset

1. create feature
2. create label
3. add noise to lable (random between -2 and 2)

In [37]:
feature = np.arange(6,21)
print(feature)

[ 6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]


In [38]:
label = 3 * feature + 4
print(label)

[22 25 28 31 34 37 40 43 46 49 52 55 58 61 64]


In [39]:
noise = np.random.random(len(label)) * 4 - 2
print(noise)

[-0.54842865 -0.46971847  0.3391865  -1.61815153 -0.63142177 -1.14986973
  1.18697561  0.855927   -1.27041099  1.84178136 -0.20201872  0.45064489
  1.40188636  0.65861068  1.24391303]


In [42]:
label = label + noise
print(label)

[20.9031427  24.06056306 28.678373   27.76369695 32.73715647 34.70026053
 42.37395122 44.71185401 43.45917802 52.68356271 51.59596255 55.90128977
 60.80377271 62.31722135 66.48782606]


# A QUICK PANDAS TUTORIAL FOR ML

In [43]:
import pandas as pd

### **pd.DataFrame()** is used to create a pandas data frame

In [44]:
# Create and populate a 5x2 NumPy array.
my_data = np.array([[0, 3], [10, 7], [20, 9], [30, 14], [40, 15]])

# Create a Python list that holds the names of the two columns.
my_column_names = ['temperature', 'activity']

# Create a DataFrame.
my_dataframe = pd.DataFrame(data=my_data, columns=my_column_names)

# Print the entire DataFrame
print(my_dataframe)

   temperature  activity
0            0         3
1           10         7
2           20         9
3           30        14
4           40        15


### **dataframe["new_column"] =** for adding a new column

In [45]:
# Create a new column named adjusted.
my_dataframe["adjusted"] = my_dataframe["activity"] + 2

# Print the entire DataFrame
print(my_dataframe)

   temperature  activity  adjusted
0            0         3         5
1           10         7         9
2           20         9        11
3           30        14        16
4           40        15        17


### subsetting a dataframe

In [47]:
print("Rows #0, #1, and #2:")
print(my_dataframe.head(3), '\n')

print("Row #2:")
print(my_dataframe.iloc[[2]], '\n')

print("Rows #1, #2, and #3:")
print(my_dataframe[1:4], '\n')

print("Column 'temperature':")
print(my_dataframe['temperature'])

Rows #0, #1, and #2:
   temperature  activity  adjusted
0            0         3         5
1           10         7         9
2           20         9        11 

Row #2:
   temperature  activity  adjusted
2           20         9        11 

Rows #1, #2, and #3:
   temperature  activity  adjusted
1           10         7         9
2           20         9        11
3           30        14        16 

Column 'temperature':
0     0
1    10
2    20
3    30
4    40
Name: temperature, dtype: int32


## Task 1: Create a DataFrame

1. Create an 3x4 (3 rows x 4 columns) pandas DataFrame 
2. columns are named Eleanor, Chidi, Tahani, and Jason. 
3. Populate each of the 12 cells in the DataFrame with a random integer between 0 and 100
4. print Eleanor row 1
5. create Janet = Tahani + Jason and print

In [65]:
df = np.random.randint(100, size=(3, 4))
df = pd.DataFrame(df)
df.columns = ["Eleanor","Chidi","Tahani","Jason"]

print("The dataframe")
print(df, "\n")

print("The Eleanor row 1")
print(df["Eleanor"][1],"\n")

df["Janet"] = df["Tahani"] + df["Jason"]
print("The Janet")
print(df["Janet"])

The dataframe
   Eleanor  Chidi  Tahani  Jason
0       82     91      23     34
1        3     37      67     21
2       83     72      28     97 

The Eleanor row 1
3 

The Janet
0     57
1     88
2    125
Name: Janet, dtype: int32


In [57]:
print(df["Eleanor"][1])

79


## Copying a DataFrame

1. **Referencing.** If you assign a DataFrame to a new variable, any change to the DataFrame or to the new variable will be reflected in the other.
2. **Copying.** If you call the pd.DataFrame.copy method, you create a true independent copy. Changes to the original DataFrame or to the copy will not be reflected in the other.

In [66]:
print(df)

   Eleanor  Chidi  Tahani  Jason  Janet
0       82     91      23     34     57
1        3     37      67     21     88
2       83     72      28     97    125


In [72]:
reference_to_df = df

print("Starting value of df: %d" % df['Jason'][1])
print("Starting value of reference_to_df: %d\n" % reference_to_df['Jason'][1])

# Modify a cell in df.
df.at[1, 'Jason'] = df['Jason'][1] + 5
print("Updated df: %d" % df['Jason'][1])
print("Updated reference_to_df: %d\n\n" % reference_to_df['Jason'][1])

Starting value of df: 31
Starting value of reference_to_df: 31

Updated df: 36
Updated reference_to_df: 36




In [74]:
copy_of_my_dataframe = my_dataframe.copy()

# Print the starting value of a particular cell.
print("Starting value of my_dataframe: %d" % my_dataframe['activity'][1])
print("Starting value of copy_of_my_dataframe: %d\n" % copy_of_my_dataframe['activity'][1])

# Modify a cell in df.
my_dataframe.at[1, 'activity'] = my_dataframe['activity'][1] + 3

print("Updated my_dataframe: %d" % my_dataframe['activity'][1])
print("copy_of_my_dataframe does not get updated: %d" % copy_of_my_dataframe['activity'][1])

Starting value of my_dataframe: 10
Starting value of copy_of_my_dataframe: 10

Updated my_dataframe: 13
copy_of_my_dataframe does not get updated: 10
