# This notebook contains all my notes and serves as a testbed for my curiosities as I make my way through chapter 2 in the d2l.ai textbook

In [199]:
# Importing the necessary libraries
import torch
import pandas as pd
import numpy as np
from sklearn import datasets

## Section 2.1: Data Manipulation

In [200]:
# Broadcasting dimensionality tests 

X = torch.arange(2).reshape(2,1)
print(X)

Y = torch.arange(6).reshape(2,3)
print(Y)

print(X+Y)

Z = torch.arange(8).reshape(4,2)

#print(X+Z) <- Will throw an error as the first dimensions are not compatible. 

tensor([[0],
        [1]])
tensor([[0, 1, 2],
        [3, 4, 5]])
tensor([[0, 1, 2],
        [4, 5, 6]])


## Section 2.2: Data Preprocessing.  


## Loading Iris using scikit-learn

In [201]:
iris_sk = datasets.load_iris()
iris_df = pd.DataFrame(iris_sk.data, columns=iris_sk.feature_names)
print(iris_df) # Print the data frame to see that we have loaded the data correctly. 

iris_tens = torch.tensor(iris_df.values) # Transform the data frame to a torch tensor 
print(iris_tens)

     sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)
0                  5.1               3.5                1.4               0.2
1                  4.9               3.0                1.4               0.2
2                  4.7               3.2                1.3               0.2
3                  4.6               3.1                1.5               0.2
4                  5.0               3.6                1.4               0.2
..                 ...               ...                ...               ...
145                6.7               3.0                5.2               2.3
146                6.3               2.5                5.0               1.9
147                6.5               3.0                5.2               2.0
148                6.2               3.4                5.4               2.3
149                5.9               3.0                5.1               1.8

[150 rows x 4 columns]
tensor([[5.1000, 3.5000, 1.4000, 0.2000]

## Loading Iris using pandas

In [202]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
col_names = ['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)', 'Target'] 
iris_pd = pd.read_csv(url, names=col_names)
# Converting the pandas datafram to a tensor
iris_array = np.array(iris_pd.values[:, :4], dtype=float)
iris_tens = torch.from_numpy(iris_array)

### Exercise 1: Counting the fraction of NaN entries in the dataframe and finding the datatypes

In [203]:
total = iris_pd.size
non_na = np.sum(iris_pd.count(0))
fraction_not_na = non_na/total
print('Fraction not NaN: ', fraction_not_na)
fraction_na = (total - non_na)/total
print('Fraction NaN: ', fraction_na)
print(iris_pd.dtypes.value_counts())

Fraction not NaN:  1.0
Fraction NaN:  0.0
float64    4
object     1
Name: count, dtype: int64


### Exercise 2: Accessing the columns of the dataframe by name rather than by index

In [204]:
# If we want to access the column with attribute 'petal length (cm)' we can just do

petal_length = iris_pd['petal length (cm)']
print(petal_length)

0      1.4
1      1.4
2      1.3
3      1.5
4      1.4
      ... 
145    5.2
146    5.0
147    5.2
148    5.4
149    5.1
Name: petal length (cm), Length: 150, dtype: float64


## Section 2.3

### Froebenius Norm 

The square root of the sum of the squares of each matrix entry

In [205]:
t = torch.ones(4,9)
print(t)
print(t.norm())

tensor([[1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1., 1., 1., 1., 1.]])
tensor(6.)


## Summation Tests

In [206]:
x = torch.arange(24).resize(2,3,4)
print(x)
print(x.sum(axis=0))
print(x.sum(axis=1))
print(x.sum(axis=2))

tensor([[[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11]],

        [[12, 13, 14, 15],
         [16, 17, 18, 19],
         [20, 21, 22, 23]]])
tensor([[12, 14, 16, 18],
        [20, 22, 24, 26],
        [28, 30, 32, 34]])
tensor([[12, 15, 18, 21],
        [48, 51, 54, 57]])
tensor([[ 6, 22, 38],
        [54, 70, 86]])




## Resize method curiosities        

In [215]:
w=torch.tensor([2, -3.4])
print(w.reshape(-1,1))
w.shape
print(w)

x = [1,2,3,4,5,6,7]
y = slice(0,10)
j = slice(0,None)
print(x[j])

w = [1,2,3]
w = torch.tensor(w)

x = [1,2,3,4,5]
x = torch.tensor(x)

z = torch.empty(x.shape[0] - w.shape[0] + 1)

for i in range(z.shape[0]):
    print(x[i: i + w.shape[0]])

w @ x[0: w.shape[0]]
w.shape, x.shape

k = torch.randn(2,3)
j = torch.randn(3,4)

k @ j, torch.mm(k, j)

tensor([[ 2.0000],
        [-3.4000]])
tensor([ 2.0000, -3.4000])
[1, 2, 3, 4, 5, 6, 7]
tensor([1, 2, 3])
tensor([2, 3, 4])
tensor([3, 4, 5])


(tensor([[-1.2412,  0.9427,  0.3037, -2.8729],
         [-1.8832,  1.1898, -2.2801,  2.3551]]),
 tensor([[-1.2412,  0.9427,  0.3037, -2.8729],
         [-1.8832,  1.1898, -2.2801,  2.3551]]))