# Studio Projektowe

## Pobranie danych z API NBP

In [16]:
import requests
import pandas as pd
import numpy as np
import datetime

from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split


def loadData():
    col_names = ['date', 'price']
    tmp_data = []
    for i in range(2013, 2020):
        tmp_df = pd.DataFrame(columns=col_names)
        response = requests.get(f'http://api.nbp.pl/api/cenyzlota/{i}-01-01/{i}-12-31?format=json')
        response = response.json()
        response = pd.DataFrame(response)
        tmp_df['date'] = response['data']
        tmp_df['price'] = response['cena']
        tmp_data.append(tmp_df)
    data = pd.concat(tmp_data, ignore_index=True)
    
    today = datetime.date.today()
    tmp_df = pd.DataFrame(columns=col_names)
    response = requests.get(f'http://api.nbp.pl/api/cenyzlota/2020-01-01/{today}?format=json')
    response = response.json()
    response = pd.DataFrame(response)
    tmp_df['date'] = response['data']
    tmp_df['price'] = response['cena']
    data = pd.concat([data, tmp_df], ignore_index=True)
    return data

def saveDataToFile(data):
    data.to_csv('data.csv', index=False)

data = loadData()
saveDataToFile(data)

## Przetwarzanie danych

In [17]:
def addTimeColumns(data):
    data['date'] = data['date'].astype('datetime64')
    data['year'] = data['date'].dt.year
    data['month'] = data['date'].dt.month
    data['day'] = data['date'].dt.day
    data['weekday'] = data['date'].dt.weekday
    return data

data = addTimeColumns(data)

years = [data[data.year == i] for i in range(2013, 2021)]

months = [[years[i][data.month == j] for j in range(1,13)] for i in range(8)]


  months = [[years[i][data.month == j] for j in range(1,13)] for i in range(8)]


## Podział danych na zbiory treningowe i testowe

In [18]:
data.index = data['date']
df = data[['price']]
print(df)

# a variable for predicting 30 days out into the future
forecast_out = 30

df['Prediction'] = df[['price']].shift(-forecast_out)
data.tail(40)

# Convert the dataframe to a numpy array
X = np.array(df.drop(['Prediction'],1))

# Remove the last '30' rows
X = X[:-forecast_out]

# Convert the dataframe to a numpy array 
y = np.array(df['Prediction'])

# Get all of the y values except the last '30' rows
y = y[:-forecast_out]

# Split the data into training set and testing set
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.015 )

x_forecast = np.array(df.drop(['Prediction'], 1))[-forecast_out:]

             price
date              
2013-01-02  165.83
2013-01-03  166.97
2013-01-04  167.43
2013-01-07  167.98
2013-01-08  167.26
...            ...
2020-11-17  229.03
2020-11-18  230.06
2020-11-19  226.93
2020-11-20  226.17
2020-11-23  227.23

[1990 rows x 1 columns]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Prediction'] = df[['price']].shift(-forecast_out)


In [22]:
# Support Vector Machine (Regression)
svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1)
svr_rbf.fit(x_train, y_train)

svm_confidence = svr_rbf.score(x_test, y_test)
print("SVM Confidence: ", svm_confidence) # the best possible score is 1.0

svm_prediction = svr_rbf.predict(x_forecast)

# SVM Prediction output
print("SVM Prediction Output")
print(svm_prediction)

# Linear Regression
lr = LinearRegression()
lr.fit(x_train, y_train)

lr_confidence = lr.score(x_test, y_test)
print("LR Confidence: ", lr_confidence)

lr_prediction = lr.predict(x_forecast)

# Linear Regression Prediction Output
print("Linear Regression Prediction Output")
print(lr_prediction)

SVM Confidence:  0.9616039302997276
SVM Prediction Output
[230.62008023 230.13168907 225.67634861 230.8737589  238.19948818
 233.00687372 237.90107742 237.52018109 232.15651133 238.42576214
 238.14085428 237.98139414 235.3700387  237.03070769 237.73477067
 232.16211005 232.55205416 232.2292076  234.79637357 232.41981377
 234.37580757 232.12676334 230.89224646 228.80846683 226.64106055
 228.80846683 225.92413106 232.1951819  231.12553884 232.40981178]
LR Confidence:  0.9398547683145247
Linear Regression Prediction Output
[236.81469724 237.21156733 232.98846247 237.61861358 239.08398009
 241.2006206  239.93877722 240.05071494 241.92312769 239.17556549
 239.06362778 239.01274699 240.55952275 238.7786954  239.989658
 241.84171844 242.84915791 242.1775316  240.69181278 241.49572913
 240.79357434 229.13169924 230.64794652 231.329749   233.42603719
 231.329749   232.37789309 229.19275617 228.4193683  229.49804086]
