# Section 09 03 Numpy.mean()

In [1]:
# in most cases mean is the same as average but the difference is the usage. 
# with average you can compute weighted averages. mean is faster than average


In [2]:
#np.mean(arr, axis=None, dtype=None, keepdims=False)

In [3]:
import numpy as np

In [4]:
data = np.array([1,2,3,4,5])
avg = np.mean(data)
print(avg)

3.0


In [5]:
#Faster than sum()/len()

In [6]:
arr2D = np.array([[1,2,3], [4,5,6]])

In [8]:
col_mean = np.mean(arr2D, axis=0) 
row_mean = np.mean(arr2D, axis=1)

In [9]:
print(col_mean)

[2.5 3.5 4.5]


In [10]:
print(row_mean)

[2. 5.]


In [11]:
arr2D

array([[1, 2, 3],
       [4, 5, 6]])

In [12]:
#keep dimensions keepdims

In [13]:
rowkd= np.mean(arr2D, axis=1, keepdims=True)
print(rowkd)

[[2.]
 [5.]]


In [14]:
rowkd= np.mean(arr2D, axis=0, keepdims=True)
print(rowkd)

[[2.5 3.5 4.5]]


In [19]:
#control data type:
d1 = np.mean(arr2D, axis=0, dtype=np.float16)

In [20]:
d1

array([2.5, 3.5, 4.5], dtype=float16)

In [21]:
#handling NAN values

In [22]:
arr_nan = np.array([1,2, np.nan, 4])

In [23]:
print(np.mean(arr_nan))

nan


In [24]:
print(np.nanmean(arr_nan))

2.3333333333333335


#### Weighted Mean

In [25]:
weights = np.array([0.1, 0.3, 0.6])
values = np.array([10, 20, 30])

In [26]:
weighted_avg = np.average(values, weights=weights)

In [27]:
print(weighted_avg)

25.0


In [28]:
10*.1 + 0.3*20 + 0.6*30 

25.0

In [29]:
sum(values * weights)/sum(weights)

25.0

#### Complex Arrays

In [30]:
complx = np.array([1+2j, 3+4j])
print(np.mean(complx))

(2+3j)


#### Empty Array

In [31]:
emp = np.array([])

In [32]:
print(np.mean(emp))

nan


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


# Section 09 04 np.median

In [33]:
#The median is the middle value in a sorted dataset. Unlike the mean (average) it's robust to outliers.

In [34]:
import numpy as np

In [35]:
data = np.array([5,1,2,4,3])

In [36]:
median = np.median(data)
print(median)

3.0


In [37]:
sorted(data)

[1, 2, 3, 4, 5]

In [38]:
data2 = np.array([5,1,2,4,3, 80])

In [40]:
print(np.median(data2))

3.5


In [41]:
sorted(data2)

[1, 2, 3, 4, 5, 80]

In [43]:
data2 = np.array([5,1,2,10,3, 80])
print(np.median(data2))

4.0


In [44]:
sorted(data2)

[1, 2, 3, 5, 10, 80]

In [45]:
#multi-dimensional array

In [46]:
arr2D = np.array([[1,2,3],
                  [4,5,6],
                  [7,8,9]])

In [48]:
col_median = np.median(arr2D, axis=0)
print(col_median)

[4. 5. 6.]


In [49]:
row_median = np.median(arr2D, axis=1)
print(row_median)

[2. 5. 8.]


In [50]:
#keepdims

In [51]:
row_median = np.median(arr2D, axis=1, keepdims=True)
print(row_median)

[[2.]
 [5.]
 [8.]]


In [52]:
arr_nan = np.array([1,2, np.nan, 4])

In [53]:
median = np.nanmedian(arr_nan)

In [54]:
print(median)

2.0


# Section 09 05 numpy.corrcoef()

In [55]:
#Basic Correlation: measures how two variables are related to one another (-1 to 1)
# +1 perfect positive linear relationship
# 0 No linear relationship
# -1: Perfect negative relationship

In [56]:
import numpy as np
x= [1,2,3,4,5]
y= [2,4,6,8,10]
#Pearson correlation coefficient
corr_matrix = np.corrcoef(x,y)
print(corr_matrix)

[[1. 1.]
 [1. 1.]]


In [57]:
data = np.array([[1, 2, 3], 
                 [2, 4, 6], 
                 [3, 6, 9]])  # Columns = variables

corr_matrix = np.corrcoef(data, rowvar=False)  # Columns are variables
print(corr_matrix)

[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]


In [58]:
x = [1, 2, np.nan, 4]
y = [2, 4, 6, 8]

# Mask NaN values
valid_mask = ~np.isnan(x) & ~np.isnan(y)
corr = np.corrcoef(np.array(x)[valid_mask], np.array(y)[valid_mask])[0, 1]
print(corr)  # 1.0

1.0


In [59]:
from scipy.stats import spearmanr

x_ranks = np.argsort(np.argsort(x))
y_ranks = np.argsort(np.argsort(y))
spearman_corr = np.corrcoef(x_ranks, y_ranks)[0, 1]
 


In [60]:
print(spearman_corr)

0.7999999999999999
