# Data Generation

In [3]:
import numpy as np
np.set_printoptions(precision=2)

# get data params
n_student, n_class = 100 , 5
means = np.random.randint(40, 60, (n_class, ))
stds = np.random.uniform(4, 10, (n_class, ))

# generate data
scores = np.random.normal(loc=means, scale=stds, size=(n_student, n_class))

# print the setted scores params
print(f"mean setting: \n{means}")
print(f"std setting: \n{stds}\n")

# print the calculated scores params
print(f"means: \n{scores.mean(axis=0)}")
print(f"stds: \n{scores.std(axis=0)}")
print(f"max value: \n{scores.max(axis=0)}")
print(f"min value: \n{scores.min(axis=0)}")

mean setting: 
[59 59 50 46 43]
std setting: 
[8.37 8.92 9.95 9.56 7.56]

means: 
[59.15 59.4  48.41 45.91 43.91]
stds: 
[7.81 8.15 9.82 9.16 6.74]
max value: 
[76.51 81.42 73.27 69.56 58.26]
min value: 
[36.34 39.14 20.43 22.71 22.68]


In [4]:
def get_scores():
  # get data params
  n_student, n_class = 100 , 5
  means = np.random.randint(40, 60, (n_class, ))
  stds = np.random.uniform(4, 10, (n_class, ))

  # generate data
  scores = np.random.normal(loc=means, scale=stds, size=(n_student, n_class))
 
  return scores

# Min-max Normalization

In [9]:
import numpy as np
from termcolor import colored
np.set_printoptions(precision=2)

scores = get_scores()
M_scores = scores.max(axis=0)
m_scores = scores.min(axis=0)

# print the max, in values before min-max normalization
print(colored("Before m-M normalization", 'blue'))
print(f"M: {M_scores}")
print(f"m: {m_scores}\n")

# min-max normalization (using broadcasting)
print(f"scores.shape: {scores.shape}")
print(f"M_scores.shape: {M_scores.shape}")
print(f"m_scores.shape: {m_scores.shape}\n")

scores = (scores - m_scores) / (M_scores - m_scores)
M_scores = scores.max(axis=0)
m_scores = scores.min(axis=0)

# print the max, in values after min-max normalization
print(colored("After m-M normalization", 'blue'))
print(f"M: {M_scores}")
print(f"m: {m_scores}")

[34mBefore m-M normalization[0m
M: [60.41 58.94 60.22 62.59 68.4 ]
m: [25.99 37.99 38.16 21.76 17.47]

scores.shape: (100, 5)
M_scores.shape: (5,)
m_scores.shape: (5,)

[34mAfter m-M normalization[0m
M: [1. 1. 1. 1. 1.]
m: [0. 0. 0. 0. 0.]


# Standard Deviation

In [11]:
import numpy as np
np.set_printoptions(precision=2)

scores = get_scores()

# calculate the vars, stds with element-wise options
vars = (scores**2).mean(axis=0) - (scores.mean(axis=0))**2
stds = vars**0.5
print(f"vars: {vars}")
print(f"stds: {stds}\n")

# calculate the vars, stds with APIs
vars = scores.var(axis=0)
stds = scores.std(axis=0)
print(f"vars: {vars}")
print(f"stds: {stds}")

vars: [70.5  67.7  81.56 48.   82.62]
stds: [8.4  8.23 9.03 6.93 9.09]

vars: [70.5  67.7  81.56 48.   82.62]
stds: [8.4  8.23 9.03 6.93 9.09]


# Standardization

In [15]:
import numpy as np
from termcolor import colored
np.set_printoptions(precision=2)

scores= get_scores()

# calculate the means, stds
means = scores.mean(axis=0)
stds = scores.std(axis=0)

# print the means, stds values before standardization
print(colored("Before standardization", 'blue'))
print(f"means: {means}")
print(f"stds: {stds}\n")

# standardization (using broadcasting)
scores = (scores - means) / stds

# calculate the means, stds after standardization
means = scores.mean(axis=0)
stds = scores.std(axis=0)

# print the means, stds values after standardization
print(colored("After standardization", 'blue'))
print(f"means: {means}")
print(f"stds: {stds}")

[34mBefore standardization[0m
means: [41.34 57.68 49.22 56.75 53.07]
stds: [4.97 7.25 8.63 6.46 4.88]

[34mAfter standardization[0m
means: [-1.76e-15 -7.65e-16 -2.57e-15  1.86e-15  1.33e-15]
stds: [1. 1. 1. 1. 1.]


# Scores to Grades

In [23]:
import numpy as np

# generate data
n_student = 10
scores = np.random.randint(0, 100, (n_student, ))
print(f"scores: {scores.shape}\n{scores}\n")

# cutoffs
cutoffs = np.array([20, 40, 60, 80])
print(f"cutoffs: {cutoffs.shape}\n{cutoffs}\n")

scores, cutoffs = scores.reshape(-1,1), cutoffs.reshape(1,-1)
print(scores)
print(cutoffs,'\n')

grades = (scores > cutoffs).astype(int).sum(axis=1) 
print(grades)

scores: (10,)
[81  8 97 63 50 77 42 21 65 13]

cutoffs: (4,)
[20 40 60 80]

[[81]
 [ 8]
 [97]
 [63]
 [50]
 [77]
 [42]
 [21]
 [65]
 [13]]
[[20 40 60 80]] 

[4 0 4 3 2 3 2 1 3 0]
