**Outline:**

1. [Libraries and settings](#1-bullet)
2. [Train and Evaluate Model](#2-bullet)
3. [Predictions](#3-bullet)

## 1. [Libraries and settings](#1-bullet)

In [1]:
from DataPreprocessing import *

ModuleNotFoundError: No module named 'DataPreprocessing'

## 2. [Train and Evaluate Model](#2-bullet)

### Create dataset

In [None]:
time_step = 30

In [None]:
def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), 0]  
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
    return np.array(dataX), np.array(dataY)

In [None]:
x_for_train, y_for_train = create_dataset(scaled_data, time_step)

x_for_train =x_for_train.reshape(x_for_train.shape[0],x_for_train.shape[1])
y_for_train = y_for_train.reshape(y_for_train.shape[0],1)

In [None]:
training_size=int(len(dataset)*0.8)
test_size=len(dataset)-training_size
train_data,test_data=scaled_data[0:training_size],scaled_data[training_size:len(dataset)]
print("train_data: ", train_data.shape)
print("test_data: ", test_data.shape)

In [None]:
x_train, y_train = create_dataset(train_data, time_step)
x_test, y_test = create_dataset(test_data, time_step)

x_train =x_train.reshape(x_train.shape[0],x_train.shape[1])
x_test = x_test.reshape(x_test.shape[0],x_test.shape[1])
y_train = y_train.reshape(y_train.shape[0],1)
y_test = y_test.reshape(y_test.shape[0],1)


print("x_train: ", x_train.shape)
print("y_train: ", y_train.shape)
print("x_test: ", x_test.shape)
print("y_test", y_test.shape)

### Building model

Using best hyperparameters after tuning to retrain model

In [None]:
# Create and train the SVR model
eval_set = [(x_test, y_test)]
XgB_model = XGBRegressor(learning_rate= 0.01, max_depth = 10, n_estimators = 500)
XgB_model.fit(x_train, y_train)

In [None]:
predictions = XgB_model.predict(x_test)
predictions = predictions.reshape(-1,1)

predictions_train = XgB_model.predict(x_train)
predictions_train = predictions_train.reshape(-1,1)

In [None]:
predictions = scaler.inverse_transform(predictions)
predictions_train = scaler.inverse_transform(predictions_train)
y_test = scaler.inverse_transform(y_test)
y_train = scaler.inverse_transform(y_train)

In [None]:
column_names_pred = ['Close_predict']
df_predictions = pd.DataFrame(predictions, columns=column_names_pred)

column_names_test = ['Close_test']
df_test = pd.DataFrame(y_test, columns=column_names_test)

concatenated = np.concatenate([df_predictions, df_test], axis=1)
column_names_concat = ['Close_predict', 'Close_test']
df_concatenated = pd.DataFrame(concatenated, columns = column_names_concat)

In [None]:
plt.figure(figsize=(16,6))
plt.title('Close Price History')
plt.plot(df_predictions['Close_predict'], color='red', label='Predicted Close Price')
plt.plot(df_test['Close_test'], color='blue', label='Actual Close Price')
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price USD ($)', fontsize=18)
plt.show()

### Evaluate model

In [None]:

mae_train = mean_absolute_error(y_train, predictions_train)
rmse_train = np.sqrt(mean_squared_error(y_train, predictions_train))
r2_train = r2_score(y_train, predictions_train)

print("Train Mean Absolute Error:", mae_train)
print("Train Mean Squared Error:", rmse_train)
print("Train R-squared:", r2_train)

In [None]:
mae_test = mean_absolute_error(y_test, predictions)
rmse_test = np.sqrt(mean_squared_error(y_test, predictions))
r2_test = r2_score(y_test, predictions)

print("Test Mean Absolute Error:", mae_test)
print("Test Root Mean Squared Error:", rmse_test)
print("Test R-squared:", r2_test)

## 4. [Predictions](#4-bullet)

In [None]:
last_30_days = x_test[-1:, :]  
next_days = []  

for _ in range(101):
    input_data = np.reshape(last_30_days, (last_30_days.shape[0], last_30_days.shape[1]))

    predicted_price = XgB_model.predict(input_data)
    next_day = predicted_price
    next_days.append(next_day)

    last_30_days = np.append(last_30_days[:, 1:], np.expand_dims(next_day, axis=0), axis=1)
    
pop_price = next_days.pop(0)
print("Predicted prices for the next days:")
    
for i, price in enumerate(next_days):
    price = scaler.inverse_transform(price.reshape(-1,1))
    print(f"Day {i+1}: {price}")
    
next_days = scaler.inverse_transform(np.array(next_days).reshape(-1,1))

In [None]:
y_test = np.concatenate([y_test, next_days], axis=0)


In [None]:
plt.figure(figsize=(16, 6))
plt.title('Close Price History')
plt.plot(y_test[:-100], color='red', label='Actual Close Price')
plt.plot(range(len(y_test)-100, len(y_test)), y_test[-100:], color='blue', label='Predicted Close Price in next days')
plt.xlabel('Date', fontsize=18)
plt.ylabel('Close Price USD ($)', fontsize=18)
plt.legend()
plt.show()
