In [None]:
# Mount Google drive folder
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Load essential libraries
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')
%matplotlib inline

In [None]:
# Setup working directory and load data
DIR = '/content/drive/My Drive/Colab Notebooks/IntroPythonAndR'
FILENAME = 'Data/president_heights.csv'
os.chdir(DIR)
df = pd.read_csv(FILENAME) # read the csv file on to a dataframe 

In [None]:
# Display info about the dataframe
df.shape
df.head(5)

In [None]:
# Create a new column with heights in inches
df['height(in)'] = df['height(cm)'] * (1/2.54)
# Create a new column with randomly generated values of weight in pounds
df['weight(lbs)'] = np.round(np.random.normal(170, 15, len(df)))
df.head(5)

In [None]:
# Histogram of heights
#plt.hist(df['height(in)'])
plt.hist(df['height(cm)'])

In [None]:
# Difference between setting y = x and y = x.copy() for a numpy array x
x = np.array([-1.1, 0.0, 3.6, -7.2])
print(x)
y = x.copy()
print(y)
y[0] = 1.1
print('----')
print(x)
print(y)

In [None]:
# Create vectors corresponding to height values in cm and inches
x = np.array(df['height(cm)'])
y = np.array(df['height(in)'])
type(x)
x.shape

In [None]:
# Dot product, average, norm, rms, standard deviation
a = np.array([-1, 2, 2]) 
b = np.array([1, 0, -3])
u = np.ones(len(a)) # vector full of ones

n = len(a) # length of the vector

np.dot(a, b) # dot product between vectors a and b
np.dot(u, a) # sum of the components of vector a

print('Average = %f\n'%((1/n) * np.dot(u, a))) # average of the components of vector a
print('Average = %f\n'%(np.mean(a))) # average of the components of vector a using built-in function

print('Norm = %f\n'%(np.sqrt(np.dot(a, a)))) # norm of vector a
print('Norm = %f\n'%(np.linalg.norm(a))) # norm of vector a using built-in function

print('RMS = %f\n'%((1/np.sqrt(n))*np.sqrt(np.dot(a, a)))) # rms of the components of vector a
print('RMS = %f\n'%((1/np.sqrt(n))*np.linalg.norm(a))) # rms of the components of vector a

atilde = a - np.mean(a) # decentred version of vector a calculated using broadcasting 
print('SD = %f\n'%((1/np.sqrt(n))*np.linalg.norm(atilde))) # rms of the components of vector a
print('SD = %f\n'%(np.std(a))) # standard deviation of the components of vector a

In [None]:
# lambda function
decenter = lambda x: x - np.mean(x) # decentering a vector
rms = lambda x: np.linalg.norm(x)/np.sqrt(len(x)) # rms of a vector

In [None]:
rms(a)
decenter(a)
# Average of the decentered version of vector a
print('Mean of the decentred version of a = %f\n'%(np.mean(decenter(a)))) 
# Standard deviation of the decentered version and of the original vector are the same
print('Standard deviation of the decentred version of a = %f\n'%(np.std(decenter(a)))) 

In [None]:
# User-defined function for standardizing a vector
def standardize(x):
  return (x - np.mean(x))/np.std(x)   

In [None]:
z = standardize(a)
print(a) # vector a
print(decenter(a)) # decentred version of vector a
print(z) # standardized version of vector a

In [None]:
u = np.concatenate((np.ones(int(len(df)/2)),np.zeros(int(len(df)/2))))
o = np.ones(len(df))
print(np.dot(x, u)) # Sum of the heights for the first half of presidents
print(np.dot(x, o)) # Sum of the heights

In [None]:
# Calculate the average and the standard deviation of the heights in
# cm and inches
print('Average of heights in cm = %f cm\n'%(np.mean(x)))
print('Average of heights in inches = %f in\n'%(np.mean(y)))
print('Standard deviation of heights in cm = %f cm\n'%(np.std(x)))
print('Standard deviation of heights in inches = %f in\n'%(np.std(y)))

# Standardized vectors
z1 = standardize(x) # standardized version of the height vector in cm
z2 = standardize(y) # standardized version of the height vector in in

print('Average of z1 = %f, average of z2 = %f'%(np.mean(z1), np.mean(z2)))
print('Standard deviation of z1 = %f, standard deviation of z2 = %f'%(np.std(z1), np.std(z2)))
print(z1)
print(z2)