In [1]:
from sklearn import datasets, linear_model, metrics
import matplotlib.pyplot as plt
import numpy as np
import scipy
import time

In [2]:
#ustawienia
time_multiplicator = 100
seed = 244810
samples = 2410
noise = 16

In [3]:
#tworzenie dataset
x,Y  = datasets.make_regression(n_features=1,noise=noise,n_samples=samples, random_state=seed)

In [4]:
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(1)
X = poly.fit_transform(x)

In [5]:
#podział dataset na dane treningowe i testowe
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=seed)


### Metoda numeryczna

### $$ L(θ) = \frac{1}{N}(Y - X^T θ)^{T} (Y - X^T θ) $$

In [23]:
def func(w, X, Y):
    xt_w = np.matmul(X, w)
    y_xt_w = np.subtract(Y, xt_w)
    return np.matmul(np.transpose(y_xt_w),(y_xt_w))*(1/len(X))

In [24]:
start_num = time.time()
for i in range(time_multiplicator):
    
    ans_num = scipy.optimize.minimize(func,[1,1], args=(x,Y),method="Powell")
    
num_ex_time = (time.time() - start_num)/time_multiplicator


ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 2 is different from 2410)

In [None]:
print(ans_num.x)

### Metoda analityczna

### $$ θ = (X^T X)^{-1} X^T Y $$

In [None]:
#rozbite na części
#X^T Y
xty = np.matmul(np.transpose(X),Y)
#X^T X
xtx = np.matmul(np.transpose(X),X)
#(X^T X)^-1
xtxinv =np.linalg.inv(xtx)
#answer
ans = np.matmul(xtxinv,xty)

In [None]:
start_ana = time.time()
for i in range(time_multiplicator):
    ans_ana = np.matmul(np.linalg.inv(np.matmul(np.transpose(X),X)),np.matmul(np.transpose(X),Y))
ana_ex_time = (time.time() - start_ana)/time_multiplicator 

In [None]:
print(ans_ana)

### Wykres

In [None]:
X_train_plot = np.delete(X_train, 0,1)
X_test_plot = np.delete(X_test, 0,1)

In [None]:
plt.figure(figsize=(12,6))

plt.scatter(X_train_plot,y_train, color='red', alpha=0.5, marker='+', label="Train data")

plt.scatter(X_test_plot,y_test, color='green', alpha=0.5, marker='+', label="Test data")

plt.plot([min(x),max(x)], [ans_num.x[1]*min(x)+ans_num.x[0] , ans_num.x[1]*max(x)+ans_num.x[0]], label="Numeryczna", linestyle=":" )

plt.plot([min(x),max(x)], [ans_ana[1]*min(x)+ans_ana[0] , ans_ana[1]*max(x)+ans_ana[0]], label="Analityczna", linestyle=":" )

plt.legend()



### MSE dla danych testowych

In [None]:
y_pred_an = np.empty(len(y_test))

for i in range(len(y_test)):
    y_pred_an[i] = ans_ana[1]*X_test_plot[i][0]+ans_ana[0]

MSE_an = np.square(np.subtract(y_test, y_pred_an )).mean()

In [None]:
y_pred_num = np.empty(len(y_test))

for i in range(len(y_test)):
    y_pred_num[i] = ans_num.x[1]*X_test_plot[i][0]+ans_num.x[0]

MSE_num = np.square(np.subtract(y_test, y_pred_num )).mean()

### Porównanie metody numerycznej i analitycznej

In [None]:
print("MSE dla metody numerycznej: ",MSE_num)
print("MSE dla metody analitycznej:",MSE_an)

In [None]:
print("Czas wykonania dla metody numerycznej: ",'{:f}'.format(num_ex_time))
print("Czas wykonania dla metody analitycznej:",'{:f}'.format(ana_ex_time))

In [None]:
#print("Czas wykonania dla metody numerycznej: ",'{:.3e}'.format(num_ex_time))
#print("Czas wykonania dla metody analitycznej:",'{:.3e}'.format(ana_ex_time))

# FlapPy Bird

In [None]:
flappy1 = np.genfromtxt('data/1.csv', delimiter=',')

In [None]:
X= np.reshape(flappy1[:,0],(len(flappy1),1))
y= np.reshape(flappy1[:,1],(len(flappy1),1))

In [None]:
pre_pro = PolynomialFeatures(degree = 9)
X_poly = pre_pro.fit_transform(X)


In [None]:
pr_model = linear_model.LinearRegression()
pr_model.fit(X_poly,y)

In [None]:
y_pred = pr_model.predict(X_poly)

In [None]:
X= np.reshape(flappy1[:,0],(len(flappy1),1))
y= np.reshape(flappy1[:,1],(len(flappy1),1))


plt.scatter(X,y)
plt.plot(X, y_pred)
