# Pandas 

### Very briefly 

https://pandas.pydata.org/docs/reference/frame.html

In [None]:
import pandas as pd

In [None]:
lorenz_df = pd.read_csv('./data/lorenz.csv')

In [None]:
lorenz_df

In [None]:
del lorenz_df['Unnamed: 0']

In [None]:
lorenz_df

In [None]:
mask = lorenz_df.sigma == 10
lorenz_df[mask].x.plot()

In [None]:
import numpy as np
lorenz_df.x.mean(), lorenz_df.x.std()
np.mean(lorenz_df.x)

In [None]:
lorenz_df.describe()

In [None]:
for i, value in lorenz_df.iterrows():
    print(i, value.sigma, value.x, value.y, value.z)
    break

In [None]:
# 1 
df1 = pd.DataFrame({'contry': ['germany', 'italy', 'uk'], 
              'continent': 'europe',
              'currency': ['euro', 'euro', 'pound']})

In [None]:
list_dict = [{'contry': 'Germany', 'continent': 'europe'},
                   {'contry': 'Germany', 'continent': 'europe'},
                  {'contry': 'Germany', 'continent': 'europe'}]
df2 = pd.DataFrame().from_records(list_dict)

pd.concat([df1, df2])

## Plotly (with data frame)

https://plotly.com/python/

In [None]:
import plotly.express as px      # nice by default minimal code required


import plotly.graph_objects as go  # ok by default a bit more code required compared to matplotlib


# Dash from same company as plotly - Maximum flexibility / a lot of code to write

In [None]:
px.line(lorenz_df, x='t', y='x', color='sigma')

In [None]:
px.line(x=lorenz_df['t'], y=lorenz_df['x'], color=lorenz_df['sigma'])

In [None]:
px.line_3d(lorenz_df, x='x', y='y', z='z', color='sigma')

# Machine Learning in Python

### (without theory)

My pick:
- shallow machine learning: https://scikit-learn.org/stable/index.html
- deeplearning: https://pytorch.org/
- image processing: https://scikit-image.org/

## Sklearn 


https://scikit-learn.org/stable/index.html

# Create a random dataset 

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def create_random_class(class_id, std, offset, number_sample=500):
    data = std * np.random.randn(number_sample, 2) + offset
    data_df = pd.DataFrame({'x': data[:, 0],
                            'y': data[:, 1],
                            'label': int(class_id)})
    return data_df

std1, offset1 = 0.5, np.array([1.5, 0])
std2, offset2 = 2.0, np.array([-2, 0])

# Train Data
data_df1 = create_random_class(1, std1, offset1)
data_df2 = create_random_class(2, std2, offset2)

dataset = pd.concat([data_df1, data_df2])
dataset = dataset.sample(frac=1) #shuffle

# Test data 
data_df1 = create_random_class(1, std1, offset1)
data_df2 = create_random_class(2, std2, offset2)

test_dataset = pd.concat([data_df1, data_df2])

In [None]:
dataset.label = dataset.label.astype('str')
fig = px.scatter(dataset, x='x', y='y', color='label')
fig.show()
dataset.label = dataset.label.astype('int64')


In [None]:
data = dataset[['x', 'y']].to_numpy()
data.shape

In [None]:
from sklearn import neighbors

x_train = dataset[['x', 'y']].to_numpy()
y_train = dataset['label'].to_numpy()


classifier = neighbors.KNeighborsClassifier(n_neighbors=10)
classifier.fit(x_train, y_train)

In [None]:
x_test = test_dataset[['x', 'y']].to_numpy()

predictions = classifier.predict(x_test)

np.sum(predictions == test_dataset.label)/len(predictions)

# Evaluation

In [None]:
from sklearn.metrics import classification_report, ConfusionMatrixDisplay, confusion_matrix
print(classification_report(test_dataset.label, predictions))

In [None]:
display = ConfusionMatrixDisplay(confusion_matrix(test_dataset.label, predictions))
display.plot()

In [None]:
# Sampe API for all classifiers

from sklearn import svm
classifier = svm.SVC()
classifier.fit(x_train, y_train)

x_test = test_dataset[['x', 'y']].to_numpy()
predictions = classifier.predict(x_test)

print(classification_report(test_dataset.label, predictions))

# Regression

In [None]:
x = np.linspace(-1, 2, 100)
y = 2 * x + 1
plt.plot(x, y)

In [None]:
noise = 0.2
num_outiliers = 10
noise_outiliers = 50

x = np.linspace(-1, 2, 100)
y = 2 * x + 1
y += np.random.randn(100) * noise

ourliers = np.random.choice(100, replace=False, size=(num_outiliers))
y[ourliers] += np.random.randn(num_outiliers) * noise_outiliers

plt.scatter(x, y)

In [None]:
x[:, None].shape

In [None]:
from sklearn import linear_model
reg = linear_model.LinearRegression()
reg.fit(x[:, None], y)
reg.coef_[0], reg.intercept_

In [None]:
from sklearn import linear_model
reg = linear_model.HuberRegressor()
reg.fit(x[:, None], y)
reg.coef_[0], reg.intercept_

# Skimage

In [None]:
from skimage import data

In [None]:
image = data.astronaut()
plt.imshow(image)

In [None]:
from skimage.color import rgb2gray
image_gray = rgb2gray(image)
plt.imshow(image_gray, cmap='gray')

# Filters

In [None]:
from skimage.filters import gaussian, sobel, sobel_v, sobel_h

plt.title('Smoothing')
plt.imshow(gaussian(image, 3, channel_axis=2))
plt.show()

plt.title('Edges')
plt.imshow(sobel(image_gray))
plt.show()


plt.title('Edges horizontal')
plt.imshow(sobel_h(image_gray))
plt.show()

plt.title('Edges vertical')
plt.imshow(sobel_v(image_gray))
plt.show()

In [None]:
x = np.zeros((50, 50))
x[20:30, 20:30] = 1
x[29:45, 29:45] = 1

plt.imshow(x)

In [None]:
from skimage.morphology import disk
from skimage.morphology import erosion, dilation, closing

In [None]:
plt.imshow(erosion(x, disk(2)))

In [None]:
plt.imshow(dilation(x, disk(2)))

In [None]:
image_gray = data.coins().astype('float64')
image_gray =image_gray / image_gray.max()
plt.imshow(image_gray > 0.55)
plt.colorbar()

In [None]:
mask = image_gray > 0.55

plt.imshow(closing(mask, disk(3)))

In [None]:
from skimage.morphology import label

In [None]:
close_mask = closing(mask, disk(3))
plt.imshow(label(close_mask), cmap='prism', interpolation='nearest')