# Pandas 

### Very briefly 

https://pandas.pydata.org/docs/reference/frame.html

In [None]:
import pandas as pd

In [None]:
lorenz_df = pd.read_csv('./lorenz.csv')

In [None]:
lorenz_df

In [None]:
del lorenz_df['Unnamed: 0']

In [None]:
lorenz_df

In [None]:
mask = lorenz_df.sigma == 10
lorenz_df[mask].x.plot()

In [None]:
lorenz_df.x.mean(), lorenz_df.x.std()

In [None]:
lorenz_df.describe()

In [None]:
for i, value in lorenz_df.iterrows():
    print(i, value.sigma, value.x, value.y, value.z)
    break

## Plotly (with data frame)

https://plotly.com/python/

In [None]:
import plotly.express as px      # nice by default minimal code required


import plotly.graph_objects as go  # ok by default a bit more code required compared to matplotlib


# Dash from same company as plotly - Maximum flexibility / a lot of code to write

# Machine Learning in Python

### (without theory)

My pick:
- shallow machine learning: https://scikit-learn.org/stable/index.html
- deeplearning: https://pytorch.org/
- 

## Sklearn 


https://scikit-learn.org/stable/index.html

# Create a random dataset 

In [None]:
import numpy as np
import matplotlib.pyplot as plt


def create_random_class(class_id, std, offset, number_sample=500):
    data = std * np.random.randn(number_sample, 2) + offset
    data_df = pd.DataFrame({'x': data[:, 0],
                            'y': data[:, 1],
                            'label': int(class_id)})
    return data_df

std1, offset1 = 1, np.array([1.5, 0])
std2, offset2 = 1, np.array([-1.5, 0])

# Train Data
data_df1 = create_random_class(1, std1, offset1)
data_df2 = create_random_class(2, std2, offset2)

dataset = pd.concat([data_df1, data_df2])
dataset = dataset.sample(frac=1) #shuffle

# Test data 
data_df1 = create_random_class(1, std1, offset1)
data_df2 = create_random_class(2, std2, offset2)

test_dataset = pd.concat([data_df1, data_df2])

In [None]:
dataset.label = dataset.label.astype('str')
fig = px.scatter(dataset, x='x', y='y', color='label')
fig.show()
dataset.label = dataset.label.astype('int64')


In [None]:
from sklearn import neighbors

x_train = dataset[['x', 'y']].to_numpy()
y_train = dataset['label'].to_numpy()

classifier = neighbors.KNeighborsClassifier(10)
classifier.fit(x_train, y_train)

In [None]:
x_test = test_dataset[['x', 'y']].to_numpy()
predictions = classifier.predict(x_test)

np.sum(predictions == test_dataset.label)/len(predictions)

In [None]:
# Evaluation

In [None]:
from sklearn.metrics import classification_report, ConfusionMatrixDisplay, confusion_matrix
print(classification_report(test_dataset.label, predictions))

In [None]:
display = ConfusionMatrixDisplay(confusion_matrix(test_dataset.label, predictions))
display.plot()


In [None]:
# Sampe API for all classifiers

from sklearn import svm
classifier = svm.SVC()
classifier.fit(x_train, y_train)

x_test = test_dataset[['x', 'y']].to_numpy()
predictions = classifier.predict(x_test)

print(classification_report(test_dataset.label, predictions))

# Regression

In [None]:
x = np.linspace(-1, 2, 100)
y = 2 * x + 1
plt.plot(x, y)

In [None]:
noise = 0.2
num_outiliers = 0
noise_outiliers = 2

x = np.linspace(-1, 2, 100)
y = 2 * x + 1
y += np.random.randn(100) * noise

ourliers = np.random.choice(100, replace=False, size=(num_outiliers))
y[ourliers] += np.random.randn(num_outiliers) * noise_outiliers

plt.scatter(x, y)

In [None]:
from sklearn import linear_model
reg = linear_model.LinearRegression()
reg.fit(x[:, None], y)
reg.coef_[0], reg.intercept_

# Skimage

In [None]:
from skimage import data

In [None]:
image = data.astronaut()
plt.imshow(image)

In [None]:
from skimage.color import rgb2gray
image_gray = rgb2gray(image)

# Filters

In [None]:
from skimage.filters import gaussian, sobel, sobel_v, sobel_h

plt.title('Smoothing')
plt.imshow(gaussian(image, 3, channel_axis=2))
plt.show()

plt.title('Edges')
plt.imshow(sobel(image_gray))
plt.show()


plt.title('Edges horizontal')
plt.imshow(sobel_h(image_gray))
plt.show()

plt.title('Edges vertical')
plt.imshow(sobel_v(image_gray))
plt.show()