# Modules examples


### In this notebook I will demonstate the usage of several modules from this folder, using a randomly generated dataset

January 2018


In [1]:
import pandas as pd
import numpy as np

** Dummy dataset to work with **

In [2]:
data = np.random.randint(-100,100,size=(400,5))

In [3]:
df = pd.DataFrame(data)
df['boolean'] = np.random.randint(0,2,size=(400,))
df['text'] = np.random.randint(100,120,size=(400,))
df['text'] = df['text'].map(lambda x: chr(x))
df.head()

Unnamed: 0,0,1,2,3,4,boolean,text
0,66,70,-27,-67,60,0,p
1,-53,-47,65,-17,-68,1,r
2,53,-35,-98,62,-32,1,o
3,-43,-67,80,92,25,1,u
4,13,-95,9,-9,-1,0,n


<br>
### Scaler object

In [4]:
from scaler import Scaler

In [10]:
sc = Scaler()
sc.fit(df,scaler='standard')
sc.scale(df).head()

Unnamed: 0,0,1,2,3,4,boolean,text
0,1.118633,1.252969,-0.493762,-1.183031,0.999311,0,p
1,-0.929641,-0.707347,1.076446,-0.303944,-1.131132,1,r
2,0.894872,-0.506289,-1.705553,1.085013,-0.531945,1,o
3,-0.757517,-1.042444,1.332458,1.612465,0.416768,1,u
4,0.206376,-1.511579,0.120667,-0.16329,-0.015978,0,n


**scaling factors of 0 are skipped columns**

In [11]:
sc.factors

{0: (1.01, 58.097704413731904),
 1: (-4.7825, 59.684258565817665),
 2: (1.93, 58.59097503788211),
 3: (0.2875, 56.87721325116101),
 4: (-0.04, 60.08138507125315),
 'boolean': (0, 0),
 'text': (0, 0)}

In [12]:
sc = Scaler()
sc.fit(df,scaler='standard',skip_boolean=False)
sc.scale(df).head()

Unnamed: 0,0,1,2,3,4,boolean,text
0,1.118633,1.252969,-0.493762,-1.183031,0.999311,-1.008787,p
1,-0.929641,-0.707347,1.076446,-0.303944,-1.131132,0.988811,r
2,0.894872,-0.506289,-1.705553,1.085013,-0.531945,0.988811,o
3,-0.757517,-1.042444,1.332458,1.612465,0.416768,0.988811,u
4,0.206376,-1.511579,0.120667,-0.16329,-0.015978,-1.008787,n


In [13]:
sc = Scaler()
sc.fit(df,scaler='standard',skip_row=[1,2])
sc.scale(df).head()

Unnamed: 0,0,1,2,3,4,boolean,text
0,1.118633,70,-27,-1.183031,0.999311,0,p
1,-0.929641,-47,65,-0.303944,-1.131132,1,r
2,0.894872,-35,-98,1.085013,-0.531945,1,o
3,-0.757517,-67,80,1.612465,0.416768,1,u
4,0.206376,-95,9,-0.16329,-0.015978,0,n


In [17]:
sc = Scaler()
sc.fit(df,scaler='normalize')
scaled = sc.scale(df)
scaled.head()

Unnamed: 0,0,1,2,3,4,boolean,text
0,0.834171,0.854271,0.366834,0.167513,0.80402,0,p
1,0.236181,0.266332,0.829146,0.42132,0.160804,1,r
2,0.768844,0.326633,0.01005,0.822335,0.341709,1,o
3,0.286432,0.165829,0.904523,0.974619,0.628141,1,u
4,0.567839,0.025126,0.547739,0.461929,0.497487,0,n


In [16]:
sc.factors

{0: (-100, 199),
 1: (-100, 199),
 2: (-100, 199),
 3: (-100, 197),
 4: (-100, 199),
 'boolean': (0, 0),
 'text': (0, 0)}

**unscale and compare to original dataframe**

In [18]:
sc.unscale(scaled).head()

Unnamed: 0,0,1,2,3,4,boolean,text
0,66.0,70.0,-27.0,-67.0,60.0,0,p
1,-53.0,-47.0,65.0,-17.0,-68.0,1,r
2,53.0,-35.0,-98.0,62.0,-32.0,1,o
3,-43.0,-67.0,80.0,92.0,25.0,1,u
4,13.0,-95.0,9.0,-9.0,-1.0,0,n


In [19]:
df.head()

Unnamed: 0,0,1,2,3,4,boolean,text
0,66,70,-27,-67,60,0,p
1,-53,-47,65,-17,-68,1,r
2,53,-35,-98,62,-32,1,o
3,-43,-67,80,92,25,1,u
4,13,-95,9,-9,-1,0,n


<br>

### Scaling function
** not an object ** <br>
** otherwise has the exact same argument options as Scaler object above **

In [24]:
from scaling import scale

In [27]:
scale(df,skip_row=[3]).head()

Unnamed: 0,0,1,2,3,4,boolean,text
0,1.118633,1.252969,-0.493762,-67,0.999311,0,p
1,-0.929641,-0.707347,1.076446,-17,-1.131132,1,r
2,0.894872,-0.506289,-1.705553,62,-0.531945,1,o
3,-0.757517,-1.042444,1.332458,92,0.416768,1,u
4,0.206376,-1.511579,0.120667,-9,-0.015978,0,n


<br>
## Linear Algebra tools

** Note: these are slow and inefficient implementations **

this implementation was written to practice and gain intuition

In [31]:
from linear_algebra import Vector, Matrix

### Vector

In [36]:
v = Vector([1,3,4])
u = Vector((1,2,3))
u2 = Vector(1,2,3)
u3 = Vector([1,2],3)
u4 = Vector([1,2],(3,4),u)

In [38]:
u.present()
u4.present()

[1.0, 2.0, 3.0]
[1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0]


** size of vector |v| **

In [40]:
v.size()

5.0990195135927845

** length of vector array **

In [41]:
v.length()
len(v)

3

** other operations:  **

In [50]:
norm = v.normalize()
norm.present()

[0.19611613513818404, 0.5883484054145521, 0.7844645405527362]


In [51]:
ortho = v.orthogonal()
ortho.present()

[0.5773502691896258, 0.5773502691896258, -0.5773502691896258]


 returns the easiest to calculate orthogonal vector to v 
 
 <br>
 <br>
 
 
 **Arithemetics:**


In [52]:
a = v + u
a.present()

[2.0, 5.0, 7.0]


In [55]:
a = v + 3
a.present()

[4.0, 6.0, 7.0]


In [56]:
a = v - u*2
a.present()

[-1.0, -1.0, -2.0]


In [57]:
a = v/1.2
a.present()

[0.8333333333333334, 2.5, 3.3333333333333335]


In [53]:
v * u

19.0

In [54]:
v.dot(u)

19.0

### Matrix

In [84]:
m = Matrix([1,2,3],[4,5,6],[7,8,9])
m2 = Matrix((1,2,3),[4,5,6],[7,8,9])
m3 = Matrix((1,2,3),[4,5,6],u)
m4 = Matrix(u,v,u3)

In [85]:
m.present()

[1.0, 2.0, 3.0]
[4.0, 5.0, 6.0]
[7.0, 8.0, 9.0]


In [86]:
m.shape()

(3, 3)

In [87]:
a = m.transpose()
a.present()

[1.0, 4.0, 7.0]
[2.0, 5.0, 8.0]
[3.0, 6.0, 9.0]


In [89]:
a = m.minor(1,1)
a.present()

[1.0, 3.0]
[7.0, 9.0]


In [90]:
m.det()

0.0

original m is not invertible, redefining m:

In [93]:
m = Matrix((1,12,3),[2,5,6],[-7,8,9])
m.det()

-570.0

In [94]:
a = m.inverse()
a.present()

[0.005263157894736842, 0.14736842105263157, -0.1]
[0.10526315789473684, -0.05263157894736842, 0.0]
[-0.08947368421052632, 0.16140350877192983, 0.03333333333333333]


In [95]:
a = m.cofactor()
a.present()

[-3.0, -60.0, 51.0]
[-84.0, 30.0, -92.0]
[57.0, -0.0, -19.0]


 **Arithemetics:**

In [96]:
a = m + 2
a.present()

[3.0, 14.0, 5.0]
[4.0, 7.0, 8.0]
[-5.0, 10.0, 11.0]


In [97]:
a = m - u
a.present()

[0.0, 10.0, 0.0]
[1.0, 3.0, 3.0]
[-8.0, 6.0, 6.0]


In [101]:
a = m * u
a = m.by_vector(u)

a.present()

[34.0, 30.0, 36.0]


In [103]:
a = m * m2
a = m.by_matrix(m2)

a.present()

[70.0, 86.0, 102.0]
[64.0, 77.0, 90.0]
[88.0, 98.0, 108.0]


In [104]:
a = m + m2
a.present()

[2.0, 14.0, 6.0]
[6.0, 10.0, 12.0]
[0.0, 16.0, 18.0]
