# Data Downloading and preparing for deep learning
This post will download all the raw data needed for the deep learning study. More infomration about the CWRU data introduction can be found in https://engineering.case.edu/bearingdatacenter/download-data-file
## Data Description
We will take drive end data acquired at a sampling frequency of 48 kHz. The load on the shaft is 1 hp. For this load, there are 10 fault classes:
* C0 : Normal
* C1 : Ball defect (0.007 inch)
* C2 : Ball defect (0.014 inch)
* C3 : Ball defect (0.021 inch)
* C4 : Inner race fault (0.007 inch)
* C5 : Inner race fault (0.014 inch)
* C6 : Inner race fault (0.021 inch)
* C7 : Outer race fault (0.007 inch, data collected from 6 O'clock position)
* C8 : Outer race fault (0.014 inch, 6 O'clock)
* C9 : Outer race fault (0.021 inch, 6 O'clock)

In [5]:
import os
import pandas as pd
import scipy.io as sio
import urllib.request as urq
import numpy as np
da_path = 'CWRU_Data'
if not os.path.exists(da_path):
  # Create a new directory because it does not exist 
  os.makedirs(da_path)

## C0 : Normal

In [4]:
mat_fname = da_path+"//normal.mat"
urq.urlretrieve("https://engineering.case.edu/sites/default/files/98.mat", mat_fname)
mat_contents = sio.loadmat(mat_fname)
print(mat_contents)

{'__header__': b'MATLAB 5.0 MAT-file, Platform: PCWIN, Created on: Fri Jan 28 10:52:35 2000', '__version__': '1.0', '__globals__': [], 'X098_DE_time': array([[ 0.046104  ],
       [-0.03713354],
       [-0.089496  ],
       ...,
       [-0.09909231],
       [-0.10827138],
       [-0.07092923]]), 'X098_FE_time': array([[ 0.02321636],
       [ 0.08115455],
       [ 0.09533091],
       ...,
       [-0.00760182],
       [ 0.04026909],
       [ 0.06102   ]])}


In [6]:
np_normal = mat_contents['X098_DE_time']

## C1 : Ball defect (0.007 inch)

In [7]:
mat_fname = da_path+"//c1.mat"
urq.urlretrieve("https://engineering.case.edu/sites/default/files/123.mat", mat_fname)
mat_contents = sio.loadmat(mat_fname)
print(mat_contents)

{'__header__': b'MATLAB 5.0 MAT-file, Platform: PCWIN, Created on: Fri Jan 28 11:46:29 2000', '__version__': '1.0', '__globals__': [], 'X123_DE_time': array([[-0.04109723],
       [-0.046104  ],
       [-0.02837169],
       ...,
       [ 0.07343262],
       [ 0.04819015],
       [ 0.03838523]]), 'X123_FE_time': array([[-0.08115455],
       [-0.09759091],
       [-0.08320909],
       ...,
       [ 0.02136727],
       [ 0.00472545],
       [-0.01314909]]), 'X123RPM': array([[1772]], dtype=uint16)}


In [8]:
np_c1 = mat_contents['X123_DE_time']

## C2 : Ball defect (0.014 inch)

In [9]:
mat_fname = da_path+"//c2.mat"
urq.urlretrieve("https://engineering.case.edu/sites/default/files/190.mat", mat_fname)
mat_contents = sio.loadmat(mat_fname)
print(mat_contents)

{'__header__': b'MATLAB 5.0 MAT-file, Platform: PCWIN, Created on: Fri Jan 28 13:47:10 2000', '__version__': '1.0', '__globals__': [], 'X190_DE_time': array([[-0.46354338],
       [-0.47585169],
       [-0.41806523],
       ...,
       [-0.06884308],
       [-0.04902462],
       [-0.01648062]]), 'X190_FE_time': array([[0.25620182],
       [0.21963091],
       [0.12635455],
       ...,
       [0.00123273],
       [0.01109455],
       [0.03266727]]), 'X190RPM': array([[1772]], dtype=uint16)}


In [10]:
np_c2 = mat_contents['X190_DE_time']

## C3 : Ball defect (0.021 inch)

In [11]:
mat_fname = da_path+"//c3.mat"
urq.urlretrieve("https://engineering.case.edu/sites/default/files/227.mat", mat_fname)
mat_contents = sio.loadmat(mat_fname)
print(mat_contents)

{'__header__': b'MATLAB 5.0 MAT-file, Platform: PCWIN, Created on: Fri Jan 28 14:10:08 2000', '__version__': '1.0', '__globals__': [], 'X227_DE_time': array([[-0.06195877],
       [-0.03755077],
       [-0.01794092],
       ...,
       [ 0.21174462],
       [ 0.22926831],
       [ 0.21925477]]), 'X227_FE_time': array([[ 0.12471091],
       [ 0.11669818],
       [ 0.08115455],
       ...,
       [ 0.02629818],
       [-0.04704909],
       [-0.06060909]]), 'X227RPM': array([[1774]], dtype=uint16)}


In [12]:
np_c3 = mat_contents['X227_DE_time']

## C4 : Inner race fault (0.007 inch)

In [13]:
mat_fname = da_path+"//c4.mat"
urq.urlretrieve("https://engineering.case.edu/sites/default/files/110.mat", mat_fname)
mat_contents = sio.loadmat(mat_fname)
print(mat_contents)
np_c4 = mat_contents['X110_DE_time']

{'__header__': b'MATLAB 5.0 MAT-file, Platform: PCWIN, Created on: Fri Jan 28 11:27:55 2000', '__version__': '1.0', '__globals__': [], 'X110_DE_time': array([[ 0.03212677],
       [ 0.07823077],
       [ 0.16146831],
       ...,
       [-0.08845292],
       [-0.02899754],
       [ 0.00855323]]), 'X110_FE_time': array([[0.08629091],
       [0.11053455],
       [0.11012364],
       ...,
       [0.40145818],
       [0.46782   ],
       [0.52534727]]), 'X110RPM': array([[1772]], dtype=uint16)}


## C5 : Inner race fault (0.014 inch)

In [18]:
mat_fname = da_path+"//c5.mat"
urq.urlretrieve("https://engineering.case.edu/sites/default/files/175.mat", mat_fname)
mat_contents = sio.loadmat(mat_fname)
print(mat_contents)
np_c5 = mat_contents['X175_DE_time']

{'__header__': b'MATLAB 5.0 MAT-file, Platform: PCWIN, Created on: Fri Jan 28 13:39:57 2000', '__version__': '1.0', '__globals__': [], 'X217': array([[-0.01968912,  0.0463752 ],
       [-0.04995504,  0.0398664 ],
       [-0.07407828,  0.02367576],
       ...,
       [-0.00687492,  0.08066844],
       [-0.03278808,  0.09022824],
       [-0.05723676,  0.08835696]]), 'X217_DE_time': array([[0.23782154],
       [0.20444308],
       [0.12141415],
       ...,
       [0.41368431],
       [0.46270892],
       [0.45311262]]), 'X175_DE_time': array([[-0.010016  ],
       [-0.00292133],
       [ 0.00542533],
       ...,
       [ 0.36141067],
       [ 0.36141067],
       [ 0.34513467]]), 'X175_FE_time': array([[-0.07026545],
       [-0.02958545],
       [ 0.04006364],
       ...,
       [-0.05485636],
       [-0.03020182],
       [-0.01088909]]), 'X175RPM': array([[1772]], dtype=uint16)}


## C6 : Inner race fault (0.021 inch)

In [19]:
mat_fname = da_path+"//c6.mat"
urq.urlretrieve("https://engineering.case.edu/sites/default/files/214.mat", mat_fname)
mat_contents = sio.loadmat(mat_fname)
print(mat_contents)
np_c6 = mat_contents['X214_DE_time']

{'__header__': b'MATLAB 5.0 MAT-file, Platform: PCWIN, Created on: Fri Jan 28 13:59:56 2000', '__version__': '1.0', '__globals__': [], 'X214_DE_time': array([[1.12109908],
       [0.78335077],
       [0.39157108],
       ...,
       [0.44163877],
       [0.22843385],
       [0.01168246]]), 'X214_FE_time': array([[0.29975818],
       [0.18942909],
       [0.06533455],
       ...,
       [0.17402   ],
       [0.14998182],
       [0.21141273]]), 'X214RPM': array([[1774]], dtype=uint16)}


## C7 : Outer race fault (0.007 inch, data collected from 6 O'clock position)

In [21]:
mat_fname = da_path+"//c7.mat"
urq.urlretrieve("https://engineering.case.edu/sites/default/files/136.mat", mat_fname)
mat_contents = sio.loadmat(mat_fname)
print(mat_contents)
np_c7 = mat_contents['X136_DE_time']

{'__header__': b'MATLAB 5.0 MAT-file, Platform: PCWIN, Created on: Fri Jan 28 11:54:30 2000', '__version__': '1.0', '__globals__': [], 'X136_DE_time': array([[ 0.08471867],
       [ 0.69486   ],
       [ 1.15976933],
       ...,
       [ 1.07964133],
       [ 0.24330533],
       [-0.57258133]]), 'X136_FE_time': array([[-0.21634364],
       [-0.29975818],
       [-0.27058364],
       ...,
       [ 0.01808   ],
       [-0.11217818],
       [-0.19538727]]), 'X136RPM': array([[1774]], dtype=uint16)}


## C8 : Outer race fault (0.014 inch, 6 O'clock)

In [22]:
mat_fname = da_path+"//c8.mat"
urq.urlretrieve("https://engineering.case.edu/sites/default/files/202.mat", mat_fname)
mat_contents = sio.loadmat(mat_fname)
print(mat_contents)
np_c8 = mat_contents['X202_DE_time']

{'__header__': b'MATLAB 5.0 MAT-file, Platform: PCWIN, Created on: Fri Jan 28 13:52:24 2000', '__version__': '1.0', '__globals__': [], 'X202_DE_time': array([[0.13330523],
       [0.15249785],
       [0.16647508],
       ...,
       [0.09554585],
       [0.08448923],
       [0.07510154]]), 'X202_FE_time': array([[ 0.02054545],
       [ 0.04458364],
       [ 0.09081091],
       ...,
       [ 0.05465091],
       [ 0.00883455],
       [-0.02198364]]), 'X202RPM': array([[1772]], dtype=uint16)}


## C9 : Outer race fault (0.021 inch, 6 O'clock)

In [23]:
mat_fname = da_path+"//c9.mat"
urq.urlretrieve("https://engineering.case.edu/sites/default/files/239.mat", mat_fname)
mat_contents = sio.loadmat(mat_fname)
print(mat_contents)
np_c9 = mat_contents['X239_DE_time']

{'__header__': b'MATLAB 5.0 MAT-file, Platform: PCWIN, Created on: Sun Jan 30 11:38:57 2000', '__version__': '1.0', '__globals__': [], 'X239_DE_time': array([[ 0.040064  ],
       [ 0.02170133],
       [-0.02086667],
       ...,
       [ 0.36224533],
       [ 0.32552   ],
       [ 0.27627467]]), 'X239_FE_time': array([[0.12864616],
       [0.09946444],
       [0.07110475],
       ...,
       [0.01520737],
       [0.13974343],
       [0.21824636]]), 'X239RPM': array([[1771]], dtype=uint16)}


## Save all the dataset combined

In [24]:
np.savez('CWRU_cleaned.npz', c0=np_normal, c1=np_c1, c2=np_c2,c3=np_c3, c4=np_c4,
         c5=np_c5, c6=np_c6, c7=np_c7, c8=np_c8, c9 =np_c9 )