In [None]:
import numpy as np
# each NumPy array must have the same type
x = np.array([[1,2,3], [4,5,6]])
print("x:\n{}".format(x))

In [None]:
from scipy import sparse
# Create a 2d NumPy array wiath a diagonal of ones, and zeroes everywhere else
eye = np.eye(4)
print("numpy array:\n{}".format(eye))

# convert the numpy array to a CSR sparse matrix (compressed sparse matrix). only the non-zero entries are stored
sparse_matrix = sparse.csr_matrix(eye)
print("\nSciPy sparse CSR matrix:\n{}".format(sparse_matrix))

# representation in COOrdinate format
data = np.ones(4)
row_indices = np.arange(4)
col_indices = np.arange(4)
eye_coo = sparse.coo_matrix((data, (row_indices, col_indices)))
print("COO representation:\n{}".format(eye_coo))

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

# generate a sequence of numbers from -10 to 10 with 100 steps in between, and draw a plot
x = np.linspace(-10, 10, 100)
y = np.sin(x)
plt.plot(x,y, marker="x")

In [None]:
# pandas main data structures are DataFrames, which are like tables, where each column can have a separate type (unlike NumPy)
import pandas as pd

data = {'Name': ['John', 'Anna', 'Peter', 'Linda'],
        'Location': ['NY', 'Paris', 'Berlin', 'London'],
        'Age': [24, 13,53,33]
        }
data_pandas = pd.DataFrame(data)
display(data_pandas)

# select all rows that have an age greater than 30
display(data_pandas[data_pandas.Age > 30])

In [None]:
# first example: classification problem
# the data is a table with 150 rows and 4 columns
from sklearn.datasets import load_iris
iris_dataset = load_iris()
for i in range(0, len(iris_dataset['data'])):
    target = iris_dataset['target_names'][iris_dataset['target'][i]]
    sepal_length = iris_dataset['data'][i][0]
    sepal_width = iris_dataset['data'][i][1]
    petal_length = iris_dataset['data'][i][2]
    petal_width = iris_dataset['data'][i][3]
    print("{} sepal length, {} sepal width, {} petal length, {} petal width ==> {}".format(sepal_length, sepal_width, petal_length, petal_width, target))

In [None]:
# shuffle and split the data into train and test sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(iris_dataset['data'], iris_dataset['target'], random_state=0)
print("X_test shape: {}".format(X_test.shape))
print("y_test shape: {}".format(y_test.shape))

In [None]:
# draw a scatter plot to analyze the input data and detect any peculiarities
!pip install mglearn
import mglearn
iris_dataframe = pd.DataFrame(X_train, columns=iris_dataset.feature_names)
print("iris_dataframe: {}\n".format(iris_dataframe.shape))
grr = pd.plotting.scatter_matrix(iris_dataframe, c=y_train, figsize=(15,15), marker='o', hist_kwds={'bins':20}, s=60,alpha=.8, cmap=mglearn.cm3)

In [None]:
# first model! k-nearest neighbors
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(X_train, y_train)
X_new = np.array([[5, 2.9, 1, 0.2]])
print("X_new.shape: {}".format(X_new.shape)) # 1 row, 4 columns

# make a prediction for this new iris!
prediction = knn.predict(X_new)
print("Prediction: {}".format(iris_dataset['target_names'][prediction]))

In [None]:
# test the accuracy of the model by running it against the test set we created before
y_pred = knn.predict(X_test)
print("Test set predictions: \n {}".format(y_pred))

accuracy = np.mean(y_pred == y_test)
accuracy2 = knn.score(X_test, y_test)
print("Model accuracy: {:.2f}".format(accuracy))
print("Model accuracy: {:.2f}".format(accuracy2))