Skip to content

Commit

Permalink
visualize.py simulaenous raw-scale-denoise output; seperate scale/den…
Browse files Browse the repository at this point in the history
…oise_periods func
  • Loading branch information
timothyyu committed Mar 3, 2019
1 parent 8073c42 commit b715d88
Show file tree
Hide file tree
Showing 27 changed files with 2,118 additions and 67 deletions.
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ Repository that aims to implement the WSAE-LSTM model and replicate the results

https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0180944

This implementation of the WSAE-LSTM models aims to address potential issues in the implementation of the WSAE-LSTM model as defined by Bao et al. (2017)while also addressing issues in previous attempts to implement and replicate results of said model (i.e. [mlpanda/DeepLearning_Financial](https://github.com/mlpanda/DeepLearning_Financial)).


## Source journal (APA)

Bao W, Yue J, Rao Y (2017). "A deep learning framework for financial time series using stacked autoencoders and long-short term memory". PLOS ONE 12(7): e0180944. https://doi.org/10.1371/journal.pone.0180944
Expand All @@ -24,8 +27,5 @@ Repository package requirements/dependencies are defined in `requirements.txt` f

### `mlpanda/DeepLearning_Financial`:

Repository of an existing attempt to replicate above paper in PyTorch: [mlpanda/DeepLearning_Financial:](https://github.com/mlpanda/DeepLearning_Financial)
, checked out as a `git-subrepo` for reference in `submodules` folder.



Repository of an existing attempt to replicate above paper in PyTorch: [mlpanda/DeepLearning_Financial](https://github.com/mlpanda/DeepLearning_Financial)
, checked out as a `git-subrepo` for reference in `submodules` folder.
Binary file modified data/interim/cdii_tvt_split.pickle
Binary file not shown.
Binary file modified data/interim/cdii_tvt_split_scaled_denoised.pickle
Binary file not shown.
Binary file modified data/interim/clean_data_index_interval.pickle
Binary file not shown.
2 changes: 1 addition & 1 deletion notebooks/3a scaling exploration.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
"version": "3.6.8"
}
},
"nbformat": 4,
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -948,7 +948,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
"version": "3.6.8"
}
},
"nbformat": 4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2046,7 +2046,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
"version": "3.6.8"
}
},
"nbformat": 4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -33,7 +33,14 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
Expand All @@ -42,7 +49,7 @@
"dict_keys(['csi300 index data', 'nifty 50 index data', 'hangseng index data', 'nikkei 225 index data', 's&p500 index data', 'djia index data'])"
]
},
"execution_count": 2,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -52,6 +59,261 @@
"dict_dataframes_index.keys()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"dict"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(dict_dataframes_index['djia index data'])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"24"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(dict_dataframes_index['djia index data'])"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"TRAIN: 2008-07-01 00:00:00\n",
"TRAIN: 2008-10-01 00:00:00\n",
"TRAIN: 2009-01-01 00:00:00\n",
"TRAIN: 2009-04-01 00:00:00\n",
"TRAIN: 2009-07-01 00:00:00\n",
"TRAIN: 2009-10-01 00:00:00\n",
"TRAIN: 2010-01-04 00:00:00\n",
"TRAIN: 2010-04-01 00:00:00\n",
"TRAIN: 2010-07-01 00:00:00\n",
"TRAIN: 2010-10-01 00:00:00\n",
"TRAIN: 2011-01-03 00:00:00\n",
"TRAIN: 2011-04-01 00:00:00\n",
"TRAIN: 2011-07-01 00:00:00\n",
"TRAIN: 2011-10-03 00:00:00\n",
"TRAIN: 2012-01-02 00:00:00\n",
"TRAIN: 2012-04-02 00:00:00\n",
"TRAIN: 2012-07-02 00:00:00\n",
"TRAIN: 2012-10-01 00:00:00\n",
"TRAIN: 2013-01-01 00:00:00\n",
"TRAIN: 2013-04-01 00:00:00\n",
"TRAIN: 2013-07-01 00:00:00\n",
"TRAIN: 2013-10-01 00:00:00\n",
"TRAIN: 2014-01-01 00:00:00\n",
"TRAIN: 2014-04-01 00:00:00\n"
]
}
],
"source": [
"for item in dict_dataframes_index['djia index data']:\n",
" print(\"TRAIN:\",dict_dataframes_index['nifty 50 index data'][item][1].index[0])\n",
" #print(\"VAL:\",dict_dataframes_index['nifty 50 index data'][item][2].index[0])\n",
" #print(\"TEST:\",dict_dataframes_index['nifty 50 index data'][item][3].index[0])"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"VAL: 2010-07-01 00:00:00\n",
"VAL: 2010-10-01 00:00:00\n",
"VAL: 2011-01-03 00:00:00\n",
"VAL: 2011-04-01 00:00:00\n",
"VAL: 2011-07-01 00:00:00\n",
"VAL: 2011-10-03 00:00:00\n",
"VAL: 2012-01-04 00:00:00\n",
"VAL: 2012-04-02 00:00:00\n",
"VAL: 2012-07-02 00:00:00\n",
"VAL: 2012-10-01 00:00:00\n",
"VAL: 2013-01-03 00:00:00\n",
"VAL: 2013-04-01 00:00:00\n",
"VAL: 2013-07-01 00:00:00\n",
"VAL: 2013-10-03 00:00:00\n",
"VAL: 2014-01-02 00:00:00\n",
"VAL: 2014-04-02 00:00:00\n",
"VAL: 2014-07-02 00:00:00\n",
"VAL: 2014-10-01 00:00:00\n",
"VAL: 2015-01-01 00:00:00\n",
"VAL: 2015-04-01 00:00:00\n",
"VAL: 2015-07-01 00:00:00\n",
"VAL: 2015-10-01 00:00:00\n",
"VAL: 2016-01-01 00:00:00\n",
"VAL: 2016-04-01 00:00:00\n"
]
}
],
"source": [
"for item in dict_dataframes_index['djia index data']:\n",
" #print(\"TRAIN:\",dict_dataframes_index['nifty 50 index data'][item][1].index[0])\n",
" print(\"VAL:\",dict_dataframes_index['djia index data'][item][2].index[0])\n",
" #print(\"TEST:\",dict_dataframes_index['nifty 50 index data'][item][3].index[0])"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"TEST: 2010-10-01 00:00:00\n",
"TEST: 2011-01-03 00:00:00\n",
"TEST: 2011-04-01 00:00:00\n",
"TEST: 2011-07-01 00:00:00\n",
"TEST: 2011-10-03 00:00:00\n",
"TEST: 2012-01-02 00:00:00\n",
"TEST: 2012-04-04 00:00:00\n",
"TEST: 2012-07-02 00:00:00\n",
"TEST: 2012-10-01 00:00:00\n",
"TEST: 2013-01-01 00:00:00\n",
"TEST: 2013-04-03 00:00:00\n",
"TEST: 2013-07-01 00:00:00\n",
"TEST: 2013-10-01 00:00:00\n",
"TEST: 2014-01-03 00:00:00\n",
"TEST: 2014-04-02 00:00:00\n",
"TEST: 2014-07-02 00:00:00\n",
"TEST: 2014-10-07 00:00:00\n",
"TEST: 2015-01-01 00:00:00\n",
"TEST: 2015-04-01 00:00:00\n",
"TEST: 2015-07-01 00:00:00\n",
"TEST: 2015-10-01 00:00:00\n",
"TEST: 2016-01-01 00:00:00\n",
"TEST: 2016-04-01 00:00:00\n",
"TEST: 2016-07-01 00:00:00\n"
]
}
],
"source": [
"for item in dict_dataframes_index['djia index data']:\n",
" #print(\"TRAIN:\",dict_dataframes_index['nifty 50 index data'][item][1].index[0])\n",
" #print(\"VAL:\",dict_dataframes_index['nifty 50 index data'][item][2].index[0])\n",
" print(\"TEST:\",dict_dataframes_index['djia index data'][item][3].index[0])"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'preprocessing' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-17-0911614acaaf>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 33\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mddi_scaled\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mddi_denoised\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 34\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 35\u001b[1;33m \u001b[0mddi_scaled\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mddi_denoised\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mdenoise_periods\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdict_dataframes_index\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;32m<ipython-input-17-0911614acaaf>\u001b[0m in \u001b[0;36mdenoise_periods\u001b[1;34m(dict_dataframes)\u001b[0m\n\u001b[0;32m 7\u001b[0m \u001b[0mddi_scaled\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mindex_name\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdeepcopy\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdict_dataframes\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mindex_name\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 8\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindex_name\u001b[0m \u001b[1;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mddi_denoised\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 9\u001b[1;33m \u001b[0mscaler\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpreprocessing\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mRobustScaler\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 10\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mindex\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mvalue\u001b[0m \u001b[1;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mddi_denoised\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mindex_name\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mNameError\u001b[0m: name 'preprocessing' is not defined"
]
}
],
"source": [
"def denoise_periods(dict_dataframes):\n",
" \n",
" ddi_scaled = dict()\n",
" ddi_denoised= dict() \n",
" for key, index_name in enumerate(dict_dataframes):\n",
" ddi_denoised[index_name] = copy.deepcopy(dict_dataframes[index_name])\n",
" ddi_scaled[index_name] = copy.deepcopy(dict_dataframes[index_name])\n",
" for key, index_name in enumerate(ddi_denoised): \n",
" scaler = preprocessing.RobustScaler()\n",
"\n",
" for index,value in enumerate(ddi_denoised[index_name]):\n",
" \n",
" X_train = ddi_denoised[index_name][value][1]\n",
" X_train_scaled = scaler.fit_transform(X_train)\n",
" X_train_scaled = pd.DataFrame(X_train_scaled,columns=list(X_train.columns))\n",
" \n",
" X_val = ddi_denoised[index_name][value][2]\n",
" X_val_scaled = scaler.transform(X_val)\n",
" X_val_scaled = pd.DataFrame(X_val_scaled,columns=list(X_val.columns))\n",
" \n",
" X_test = ddi_denoised[index_name][value][3]\n",
" X_test_scaled = scaler.transform(X_test)\n",
" X_test_scaled = pd.DataFrame(X_test_scaled,columns=list(X_test.columns))\n",
" \n",
" ddi_scaled[index_name][value][1] = X_train_scaled\n",
" ddi_scaled[index_name][value][2] = X_val_scaled\n",
" ddi_scaled[index_name][value][3] = X_test_scaled\n",
" \n",
" ddi_denoised[index_name][value][1] = waveletSmooth(X_train_scaled)\n",
" ddi_denoised[index_name][value][2] = waveletSmooth(X_val_scaled)\n",
" ddi_denoised[index_name][value][3] = waveletSmooth(X_test_scaled)\n",
" \n",
" return ddi_scaled,ddi_denoised\n",
"\n",
"ddi_scaled,ddi_denoised = denoise_periods(dict_dataframes_index)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 12,
Expand Down Expand Up @@ -640,7 +902,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.6"
"version": "3.6.8"
}
},
"nbformat": 4,
Expand Down

0 comments on commit b715d88

Please sign in to comment.