In [None]:
import pandas
import numpy
from matplotlib import pyplot
filename = "dataframe.csv"
data = pandas.read_csv(filename, header=0, index_col=0).T
data.index = data.index.astype(float)
data.T.plot(legend=None, xlim=[data.columns.max(), data.columns.min()])
pyplot.show()

In [None]:
# 横軸範囲 / change horizontal region
flag = False
#flag = True
xmax, xmin = 1500, 1000  # 範囲
buff = data.iloc[:, (xmin <= data.columns) & (data.columns <= xmax)]
buff.T.plot(legend=None, xlim=[buff.columns.max(), buff.columns.min()])
pyplot.show()
if flag: data = buff

In [None]:
# データスキップ / data skip
flag = False
#flag = True
step = 2  # スキップする点数
buff = data.iloc[::step]
buff.T.plot(legend=None, xlim=[buff.columns.max(), buff.columns.min()])
pyplot.show()
if flag: data = buff

In [None]:
# 補完 / interpolation
flag = False
#flag = True
xdiv = 1  # 補完間隔
xmin = data.columns.min() // xdiv * xdiv + xdiv
xmax = data.columns.max()
from scipy.interpolate import interp1d
ix = numpy.arange(xmin, xmax, xdiv)
buff = pandas.DataFrame(index=data.index, columns=ix)
for i in range(len(data)): buff.iloc[i] = interp1d(data.columns, data.iloc[i], kind="quadratic")(ix)
buff.T.plot(legend=None, xlim=[buff.columns.max(), buff.columns.min()])
pyplot.show()
if flag: data = buff

In [None]:
# 外れ値除去 (Isolation Forest) / Outlier removal by Isolation Forest
flag = False
#flag = True
frac = 0.05  # 外れ値の割合
from sklearn.ensemble import IsolationForest
model = IsolationForest(contamination=frac).fit(data.values)
predict = model.predict(data.values)
data[predict == -1].T.plot(legend=None, xlim=[data.columns.max(), data.columns.min()])
data[predict == 1].T.plot(legend=None, xlim=[data.columns.max(), data.columns.min()])
pyplot.show()
if flag: data = data[predict == 1]

In [None]:
# 外れ値除去 (One Class SVM) / Outlier removal by One Class SVM
flag = False
#flag = True
frac = 0.05  # 外れ値の割合
from sklearn.svm import OneClassSVM
model = OneClassSVM(nu=frac).fit(data.values)
predict = model.predict(data.values)
data[predict == -1].T.plot(legend=None, xlim=[data.columns.max(), data.columns.min()])
data[predict == 1].T.plot(legend=None, xlim=[data.columns.max(), data.columns.min()])
pyplot.show()
if flag: data = data[predict == 1]

In [None]:
# 平滑化 / smoothing
flag = False
#flag = True
window = 3  # 窓幅
x = data.columns
print("window size =", x[window - 1] - x[0])
y = data.values[-1]
from scipy.signal import savgol_filter
s = savgol_filter(y, window, 2, 0)
pyplot.axes().set_xlim([data.columns.max(), data.columns.min()])
pyplot.scatter(x, y, s=3, c="blue")
pyplot.plot(x, s, c="red")
pyplot.show()
pyplot.axes().set_xlim([data.columns.max(), data.columns.min()])
pyplot.scatter(x, y-s, s=3, c="blue")
pyplot.show()
if flag:
    data = pandas.DataFrame(savgol_filter(data.values, window, 2, 0), index=data.index, columns=data.columns)
    data.T.plot(legend=None, xlim=[data.columns.max(), data.columns.min()])
    pyplot.show()

In [None]:
# 二次微分 / 2nd derivative
flag = False
#flag = True
window = 3  # 窓幅
x = data.columns
print("window size =", x[window - 1] - x[0])
from scipy.signal import savgol_filter
buff = data
buff = savgol_filter(buff, window, 2, 0)
buff = savgol_filter(buff, window, 2, 1)
buff = savgol_filter(buff, window, 2, 0)
buff = savgol_filter(buff, window, 2, 1)
buff = pandas.DataFrame(buff, index=data.index, columns=data.columns)
buff.T.plot(legend=None, xlim=[buff.columns.max(), buff.columns.min()])
pyplot.show()
if flag: data = buff

In [None]:
# センタリング / centering
flag = False
#flag = True
buff = data - data.mean()
buff.T.plot(legend=None, xlim=[buff.columns.max(), buff.columns.min()])
pyplot.show()
if flag: data = buff

In [None]:
# スケーリング / scaling
flag = False
#flag = True
buff = data / data.std()
buff.T.plot(legend=None, xlim=[buff.columns.max(), buff.columns.min()])
pyplot.show()
if flag: data = buff

In [None]:
# オートスケーリング / autoscaling
flag = False
#flag = True
buff = (data - data.mean()) / data.std()
buff.T.plot(legend=None, xlim=[buff.columns.max(), buff.columns.min()])
pyplot.show()
if flag: data = buff

In [None]:
# 規格化 / normalizing
flag = False
#flag = True
xmax, xmin = 3000, 2700
buff = data.iloc[:, (xmin <= data.columns) & (data.columns <= xmax)]
buff = ((data.T - buff.T.min()) / (buff.T.max() - buff.T.min())).T
buff.T.plot(legend=None, xlim=[buff.columns.max(), buff.columns.min()])
pyplot.show()
if flag: data = buff

In [None]:
# 差スペクトル / subtraction spectra
flag = False
#flag = True
buff = data.iloc[0]  # 差し引くスペクトル
buff = data - buff
buff.T.plot(legend=None, xlim=[buff.columns.max(), buff.columns.min()])
pyplot.show()
if flag: data = buff

In [None]:
# 差分スペクトル
flag = False
#flag = True
buff = data.copy()
for i in range(len(data) - 1): buff.iloc[i] = buff.iloc[i + 1] - buff.iloc[i]
buff = buff.iloc[:-1]
buff.T.plot(legend=None, xlim=[buff.columns.max(), buff.columns.min()])
pyplot.show()
if flag: data = buff