In [1]:
import warnings

from matplotlib import pyplot
from pandas import read_csv
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.arima_model import ARIMA

training = 0.7
testing = 1 - training
past_datapoints = 3
length_full_dataset = 0
length_training_data = 0 

In [2]:
def training_testing(raw_datapoints, training, testing):
    global length_training_data, length_test_data
    length_training_data = int((length_full_dataset) * training)
    length_test_data = length_full_dataset - length_training_data
    training_datapoints, testing_datapoints = raw_datapoints[0:length_training_data], raw_datapoints[length_training_data:length_full_dataset]
    return training_datapoints, testing_datapoints

In [3]:
def arima_evaluation(actualdata_testing, actualdata_prediction):
    return mean_squared_error(actualdata_testing, actualdata_prediction)

In [4]:
def arima_plot(currency, actualdata_testing, actualdata_prediction, file_name):
    actual_datapoints = pyplot.plot(actualdata_testing, label="Actual values", color="yellow")
    testing_datapoints = pyplot.plot(actualdata_prediction, label="Predicted values", color="red")

    pyplot.ylabel('Currency values for 1 USD')
    pyplot.xlabel('Number of days')
    pyplot.title('USD/' + currency + ' : Actual vs Predicted values using ARIMA')

    pyplot.legend()
    pyplot.savefig(file_name)
    pyplot.clf()

In [5]:
def dataset_loading(currency):
    data_frame = read_csv('currency_prediction_data_set.csv', header=0,
                              index_col=0, squeeze=True)
    column_headers = data_frame.columns.values.tolist()
    currency_index = column_headers.index('USD/' + currency.upper()) + 1

    data = read_csv("currency_prediction_data_set.csv", usecols=[currency_index], engine='python')
    raw_datapoints = []
    for data_point in data.values.tolist():
        raw_datapoints.append(data_point[0])
    global length_full_dataset
    length_full_dataset= len(raw_datapoints)
    return raw_datapoints

In [6]:
def arima_prediction(actualdata_training, actualdata_testing):
    test_prediction = list()
    training_prediction = list(actualdata_training)
    for testing_set_index in range(length_test_data):
        arima = ARIMA(training_prediction, order=(5, 1, 0))
        model_arima = arima.fit(disp=0)
        forecasting = model_arima.forecast()[0].tolist()[0]
        test_prediction.append(forecasting)
        training_prediction.append(actualdata_testing[testing_set_index])
        
    print('Predicting...')
    print('\t The prediction for the next day:', model_arima.forecast()[0])
    return test_prediction

In [7]:
def arima_model(currency):
    print('\nARIMA Model')

    print('Dataset Loading...')
    raw_datapoints = dataset_loading(currency)

    print('Train-test split...')
    actualdata_training, actualdata_testing = training_testing(raw_datapoints, training, testing)

    print('Training the model...')
    arima_test_prediction = arima_prediction(actualdata_training, actualdata_testing)

    print('Performance Evaluation...')
    arima_mse = arima_evaluation(actualdata_testing, arima_test_prediction)
    print('\t Saving Mean Square Error for Test Data:', arima_mse)

    with open("arima_mse.txt", 'w') as meansq_error:
        meansq_error.write(str(arima_mse) + '\n')

    print('Graph Plotting...')
    arima_plot(currency, actualdata_testing, arima_test_prediction, "arima_test_predictions.pdf")

    print('Predictions available now...')
    return raw_datapoints, arima_test_prediction

In [8]:
if __name__ == '__main__':
    warnings.filterwarnings("ignore")
    data_frame = read_csv('currency_prediction_data_set.csv', header=0,
                              index_col=0, squeeze=True)
    column_headers = str([cur[4:] for cur in data_frame.columns.values.tolist()])
    currency = input('Which one of ' + column_headers + ' currencies, do you want to predict for?\n').strip()
    arima_model(currency)

Which one of ['INR', 'PKR', 'CNY', 'KWD', 'AED', 'LKR', 'CHF', 'EUR', 'ALL', 'DZD', 'AOA', 'XCD', 'ARS', 'AWG', 'SHP', 'AUD', 'BSD', 'BHD', 'BDT', 'BZD', 'XOF', 'BTN', 'BOB', 'BWP', 'BRL', 'BND', 'BGN', 'BIF', 'CVE'] currencies, do you want to predict for?
EUR

ARIMA Model
Dataset Loading...
Train-test split...
Training the model...
Predicting...
	 The prediction for the next day: [0.93868167]
Performance Evaluation...
	 Saving Mean Square Error for Test Data: 1.2490802611927058e-05
Graph Plotting...
Predictions available now...


<Figure size 432x288 with 0 Axes>