<a href="https://colab.research.google.com/github/jovanape/Bitcoint-Price-Prediction/blob/main/Preprocessing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Pretprocesiranje podataka**

### **Učitavanje biblioteka**

In [None]:
# Ucitavanje biblioteka

import numpy as np 
import pandas as pd 
from matplotlib import pyplot as plt
import os

In [None]:
DAYS_TO_PREDICT = 60

### **Učitavanje skupa podataka i uklanjanje NaN vrednosti**

In [None]:
def preprocess_data(file_name = 'bitstampUSD_1-min_data_2012-01-01_to_2021-03-31.csv'):

  # Ucitavanje skupa podataka
  data = pd.read_csv(file_name)
  #print('Prvih 5 instanci skupa podataka pre sredjivanja:\n', data.head(5))

  # Dimenzije DataFrame-a
  #print('\n\nDimenzije skupa podataka pre sredjivanja:\n', data.shape)

  # Ciscenje podataka od NaN vrednosti
  data = data.dropna()

  # Kreiranje datuma
  data['Date'] = pd.to_datetime(data['Timestamp'], unit='s').dt.date # samo datum (bez vremena)
  #print('\n\nPrvih 5 instanci skupa podataka nakon sredjivanja:\n', data.head(5))

  # Dimenzije DataFrame-a
  #print('\n\nDimenzije skupa podataka nakon sredjivanja:\n', data.shape)

  return data

In [None]:
data = preprocess_data(file_name = 'bitstampUSD_1-min_data_2012-01-01_to_2021-03-31.csv')
data

### Filtriranje podataka

In [None]:
def filter_data(data, date_str = '2021-03-28'):
    
    # Filtriranje podataka
    data = data.loc[data['Date'] > pd.to_datetime(date_str)]
    #print('\nPrvih 5 instanci skupa podataka nakon filtriranja:\n', data.head(5))

    # Dimenzije DataFrame-a
    #print('\n\nDimenzije skupa podataka nakon filtriranja:\n', data.shape)
    
    return data

In [None]:
data = filter_data(data, date_str = '2021-03-28')
data

### Podela skupa podataka

In [None]:
def data_split(data, test_size = 0):

  if test_size <= 0:
    print('Vrednost parametra test_size mora biti strogo veca od 0.\n')
    return
  else: # test_size > 0
    test_limit = len(data) - int(test_size * len(data))

  train = data[:test_limit]
  test = data[test_limit:]
  
  return train, test

### Odredjivanje ciljne promenljive

In [None]:
def determine_x_and_y(dataset):
  X = []
  y = []

  df = pd.DataFrame()

  for i in range(60, dataset.shape[0]):
    X.append(dataset[i-60:i])
    y.append(dataset[i,0])
  
  return X, y

### Vizuelni prikaz trening i validacionog skupa

In [None]:
def plot_train_and_validation_data(train, validation):
  plt.title('Podaci za trening i validaciju')
  plt.xlabel('vreme')
  plt.ylabel('vrednost bitkoina')
  plt.plot(train, c = 'lime')
  plt.plot(validation, c = 'orchid')
  plt.show()

### Skaliranje podataka - rnn

In [None]:
def scale_columns(X_train_df, X_validation_df, X_test_df):
    
    scaler0 = MinMaxScaler(feature_range=(0, 1))
    scaler1 = MinMaxScaler(feature_range=(0, 1))
    scaler2 = MinMaxScaler(feature_range=(0, 1))
    scaler3 = MinMaxScaler(feature_range=(0, 1))

    X_train_df[0] = scaler0.fit_transform(X_train_df[0].to_numpy().reshape(-1,1))
    X_train_df[1] = scaler1.fit_transform(X_train_df[1].to_numpy().reshape(-1,1))
    X_train_df[2] = scaler2.fit_transform(X_train_df[2].to_numpy().reshape(-1,1))
    X_train_df[3] = scaler3.fit_transform(X_train_df[3].to_numpy().reshape(-1,1))

    X_validation_df[0] = scaler0.transform(X_validation_df[0].to_numpy().reshape(-1,1))
    X_validation_df[1] = scaler1.transform(X_validation_df[1].to_numpy().reshape(-1,1))
    X_validation_df[2] = scaler2.transform(X_validation_df[2].to_numpy().reshape(-1,1))
    X_validation_df[3] = scaler3.transform(X_validation_df[3].to_numpy().reshape(-1,1))

    X_test_df[0] = scaler0.transform(X_test_df[0].to_numpy().reshape(-1,1))
    X_test_df[1] = scaler1.transform(X_test_df[1].to_numpy().reshape(-1,1))
    X_test_df[2] = scaler2.transform(X_test_df[2].to_numpy().reshape(-1,1))
    X_test_df[3] = scaler3.transform(X_test_df[3].to_numpy().reshape(-1,1))
    
    return X_train_df, X_validation_df, X_test_df

### **Informacije o skupu podataka**

In [None]:
#data.info()

### **Statistike o atributima skupa podataka**

In [None]:
#data.describe