# Epicenter computation

In [None]:
from PyAWD.AcousticWaveDataset import *
import numpy as np
import sklearn
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
import seaborn as sns
sns.set_theme()

In [None]:
train = torch.load("../../local/datasets/AWD/train_small.pt")
X = [train[i][1].flatten() for i in range(train.size)]
Y = [train[i][0][0] for i in range(train.size)]

In [None]:
test = torch.load("../../local/datasets/AWD/test_small.pt")
X_test = np.array([test[i][1].flatten() for i in range(test.size)])
Y_test = np.array([test[i][0][0] for i in range(test.size)])

## Data presentation

We work with pairs of data under the form:
- Epicenter $(x, y)$
- Wave mesured every $2$ seconds for $10$ seconds.

As an example, sample $0$ starts at this epicenter:

In [None]:
print("Epicenter:", train[0][0])

and yields this propagation measurement:

In [None]:
train.plot_item(0)

## Linear regression

In [None]:
linear = LinearRegression().fit(X, Y)
Y_hat = linear.predict(X_test)
print("Normalized MAE:", (np.mean(np.abs(Y_test-Y_hat))/((test.nx//2)*test.sx)))

## Principal Components Analysis

In [None]:
pca = PCA(n_components=125)
pca_model = pca.fit(X)

### Linear regression on PCA

In [None]:
linear_pca = LinearRegression().fit(pca_model.transform(X), Y)
Y_hat = linear_pca.predict(pca_model.transform(X_test))
print("Normalized MAE:", (np.mean(np.abs(Y_test-Y_hat))/((test.nx//2)*test.sx)))

### Polynomial regression on PCA

In [None]:
polynomial = PolynomialFeatures(degree=2)
feat_poly = polynomial.fit_transform(pca_model.transform(X))
poly_pca = LinearRegression().fit(feat_poly, Y)
feat_poly_test = polynomial.fit_transform(pca_model.transform(X_test))
Y_hat = poly_pca.predict(feat_poly_test)
print("Normalized MAE:", (np.mean(np.abs(Y_test-Y_hat))/((test.nx//2)*test.sx)))