# Arrays

In [None]:
# see https://docs.scipy.org/doc/numpy-1.15.0/user/index.html
import numpy as np

x = np.array([12, 3, 6, 14])
print ("x: ", x)
print(type(x))
print("x.ndim: ", x.ndim)
print("x.shape: ", x.shape)
print(x[0])

In [None]:
x = np.array([  [1, 2, 3, 4],
                [5, 6, 7, 8],
                [9, 10, 11, 12]
            ])
print("x: ", x)
print(type(x))
print("x.ndim: ", x.ndim)
print("x.shape: ", x.shape)
print(x[0,1])

In [None]:
x = np.array([
                [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]],
                [[21, 22, 23, 24], [25, 26, 27, 28], [29, 30, 31, 32]]
            ])

print("x.shape: ", x.shape)
print(x[0,1,2])

## Array Slice

In [None]:
import numpy as np

arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

print(arr[3:8])
print(arr[3:8:2])
print(arr[3:-1])
print(arr[3:])
print(arr[:5])

In [None]:
new = arr[0:3]
print(len(new))
print(new)

In [None]:
new[0] = 100
print(new[0])
print(arr[0])
print(arr)

## Array Reshape

In [None]:
import numpy as np

arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])

# b = arr.reshape(3, 4)
b = arr.reshape(3, 4).copy()
print(b)

b[0,0] = 100
print(b)
print(arr)

In [None]:
b = arr.reshape(2,2,3)
print(b)

In [None]:
b = arr.reshape(2,2,-1)
print(b)

In [None]:
# error
b = arr.reshape(2, 5)
print(b)

# Dataframes

In [2]:
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.html

import pandas as pd

df = pd.read_csv('../../data/iris.csv')
print(type(df))

<class 'pandas.core.frame.DataFrame'>


In [None]:
print(df.head())

In [None]:
df.head()

In [None]:
print(df.columns)

In [None]:
print(df.shape)

In [None]:
print(df.index)

In [None]:
print(df.iloc[1])

In [None]:
print(df.iloc[1,1])

In [3]:
df2 = df.copy()
# df2 = df

df2.iloc[1,1] = 100
print(df2.iloc[1,1])
print(df.iloc[1,1])

100.0
3.0


In [None]:
print(df.dtypes)

## Dataframes - Selecting Columns

In [None]:
X = df[["sepal_length","sepal_width","petal_length","petal_width"]]
y = df["species"]
print(type(X))
print(type(y))
print(X.shape)
print(y.shape)
# print(X.head())
# print(y.head())

In [None]:
X = df[["sepal_length","sepal_width","petal_length","petal_width"]].to_numpy()
y = df["species"].to_numpy()
print(type(X))
print(type(y))

# print(y)

In [None]:
X = df.drop("species", axis="columns")
y = df.species
X.head()

## Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix
actual = [0, 1, 0, 1]
predicted = [1, 1, 1, 0]

cm = confusion_matrix(actual, predicted)
print(type(cm))

print(" ", "0 ", "1 ", "P ")
print(0, "TN", "FP")
print(1, "FN", "TP")
print("A")
print()

print("confusion matrix")
print(cm)
print()

In [None]:
# ravel - returns a flattened array in row major style
# print(type(cm.ravel()))
tn, fp, fn, tp = cm.ravel()
print("TN", tn, "FP", fp, "FN", fn, "TP", tp)

In [None]:
from sklearn.metrics import confusion_matrix
actual = ['B', 'M', 'B', 'M']
predicted = ['M', 'M', 'M', 'B']

# in aphabetical order, B is 0, M is 1

cm = confusion_matrix(actual, predicted)
print(type(cm))

print(" ", "B ", "M ", "P ")
print("B", "TN", "FP")
print("M", "FN", "TP")
print("A")
print()

print("CM", cm)
print()

tn, fp, fn, tp = cm.ravel()
print("TN", tn, "FP", fp, "FN", fn, "TP", tp)

### Specifying Labels

In [None]:
from sklearn.metrics import confusion_matrix

actual = ['W', 'M', 'W', 'M']
predicted = ['M', 'M', 'M', 'W']

# labels forces W to 0 (negative) and M to 1 (positive)
cm = confusion_matrix(actual, predicted, labels=["W", "M"])
print(type(cm))

print(" ", "W ", "M ", "P ")
print("W ", "TN", "FP")
print("M ", "FN", "TP")
print("A")
print()

print("CM", cm)
print()

tn, fp, fn, tp = cm.ravel()
print("TN", tn, "FP", fp, "FN", fn, "TP", tp)

### Multiple Classes

In [None]:
from sklearn.metrics import confusion_matrix

actual = ["cat", "ant", "cat", "cat", "ant", "bird"]
predicted = ["ant", "ant", "cat", "cat", "ant", "cat"]
cm = confusion_matrix(actual, predicted)
print(cm)
print()

cm = confusion_matrix(actual, predicted, labels=["ant", "bird", "cat"])
print(cm)

# cm = confusion_matrix(actual, predicted, labels=["ant", "cat", "bird"])
# print(cm)

### Attack Data

In [None]:
from sklearn.metrics import confusion_matrix

actual = ['attack', 'normal', 'normal',  'attack', 'normal', 'normal', 'attack', 'normal', 'attack']
predicted = ['attack', 'normal', 'attack', 'attack', 'normal', 'normal', 'normal',  'normal', 'normal']

cm = confusion_matrix(actual, predicted, labels=["normal", "attack"])
print(cm)

tn, fp, fn, tp = cm.ravel()
print("TN", tn, "FP", fp, "FN", fn, "TP", tp)


## Builtin Datasets

In [None]:
from sklearn.datasets import load_iris
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
import numpy as np

iris = load_iris()
print(type(iris))
print(iris.feature_names)
print(iris.target_names)

In [None]:
print(iris)

In [None]:
# normally
X = iris.data
print("X.shape", X.shape)
y = iris.target
print("y.shape", y.shape)

print(y)

In [None]:
print(iris.DESCR)

# Onehot Encoding

In [None]:
from sklearn.datasets import load_iris
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
import numpy as np
import pandas as pd

df = pd.read_csv('data/iris.csv')

In [None]:
# normally
# X = df.drop("species", axis='columns')
# y = df.species

X = pd.get_dummies(df)
X.head()

### Mushroom Dataset

In [None]:
df = pd.read_csv('data/mushrooms.csv')
df.head()

In [None]:
X = df.drop("type", axis='columns')
y = df.type

X = pd.get_dummies(X)
X.head()