In [None]:
import pandas
import numpy
from matplotlib import pyplot
filename = "dataframe.csv"
data = pandas.read_csv(filename, header=0, index_col=0).T
data.index = pandas.read_csv(filename, header=None, index_col=0).iloc[0].values
data.T.plot()
pyplot.gca().invert_xaxis()
pyplot.gca().legend_ = None
pyplot.show()

In [None]:
# 横軸範囲 / change horizontal region
xmax, xmin = 1500, 1000  # 範囲
buff = data.iloc[:, (xmin <= data.columns) & (data.columns <= xmax)]
buff.T.plot()
pyplot.gca().invert_xaxis()
pyplot.gca().legend_ = None
pyplot.show()
#data = buff

In [None]:
# 強度が一定値を超えたときにその範囲を0に置換するマスク処理
xmax, xmin = 3800, 3000  # 範囲
ymin = 1  # 0に置換する最小値
spec = data.iloc[0]  # マスク判定に使うスペクトル
spec = spec.iloc[(xmin <= data.columns) & (data.columns <= xmax)]
imin = spec[spec.values >= ymin].index.min()
imax = spec[spec.values >= ymin].index.max()
buff = data.copy()
buff.loc[:, imin:imax] = 0
buff.T.plot()
pyplot.gca().invert_xaxis()
pyplot.gca().legend_ = None
pyplot.show()
#data = buff

In [None]:
# データスキップ（スペクトルの本数を減らす） / data skip
step = 2  # スキップする点数
buff = data.iloc[::step]
buff.T.plot()
pyplot.gca().invert_xaxis()
pyplot.gca().legend_ = None
pyplot.show()
#data = buff

In [None]:
# データスキップ（横軸の点数を減らす） / data skip
step = 2  # スキップする点数
buff = data.iloc[:, ::step]
buff.T.plot()
pyplot.gca().invert_xaxis()
pyplot.gca().legend_ = None
pyplot.show()
#data = buff

In [None]:
# 横軸補完 / interpolation
xdiv = 1  # 補完間隔
xmin = data.columns.min() // xdiv * xdiv + xdiv 
xmax = data.columns.max()
from scipy.interpolate import interp1d
ix = numpy.arange(xmin, xmax, xdiv)
buff = pandas.DataFrame(index=data.index, columns=ix)
for i in range(len(data)): buff.iloc[i] = interp1d(data.columns, data.iloc[i], kind="quadratic")(ix)
buff.T.plot()
pyplot.gca().invert_xaxis()
pyplot.gca().legend_ = None
pyplot.show()
#data = buff

In [None]:
# 外れ値除去 (Isolation Forest) / Outlier removal by Isolation Forest
frac = 0.05  # 外れ値の割合
from sklearn.ensemble import IsolationForest
model = IsolationForest(contamination=frac).fit(data)
predict = model.predict(data)
print("outlier")
data[predict == -1].T.plot()
pyplot.gca().invert_xaxis()
pyplot.gca().legend_ = None
pyplot.show()
print("not outlier")
data[predict == 1].T.plot()
pyplot.gca().invert_xaxis()
pyplot.gca().legend_ = None
pyplot.show()
#data = data[predict == 1]

In [None]:
# 外れ値除去 (One Class SVM) / Outlier removal by One Class SVM
frac = 0.05  # 外れ値の割合
from sklearn.svm import OneClassSVM
model = OneClassSVM(nu=frac).fit(data)
predict = model.predict(data)
print("outlier")
data[predict == -1].T.plot()
pyplot.gca().invert_xaxis()
pyplot.gca().legend_ = None
pyplot.show()
print("not outlier")
data[predict == 1].T.plot()
pyplot.gca().invert_xaxis()
pyplot.gca().legend_ = None
pyplot.show()
#data = data[predict == 1]

In [None]:
# 平滑化 / smoothing
w = 3  # 窓幅
x = data.columns
y = data.values[-1]
print("2m + 1 =", w)
print("window size =", x[w - 1] - x[0])
from scipy.signal import savgol_filter
s = savgol_filter(y, w, 2, 0)
pyplot.scatter(x, y, s=3, c="blue")
pyplot.plot(x, s, c="red")
pyplot.gca().invert_xaxis()
pyplot.show()
pyplot.scatter(x, y-s, s=3, c="red")
pyplot.gca().invert_xaxis()
pyplot.show()
buff = pandas.DataFrame(savgol_filter(data.values, w, 2, 0), index=data.index, columns=data.columns)
buff.T.plot()
pyplot.gca().invert_xaxis()
pyplot.gca().legend_ = None
pyplot.show()
#data = buff

In [None]:
# 二次微分 / 2nd derivative
w = 3  # 窓幅
x = data.columns
print("2m + 1 =", w)
print("window size =", x[w - 1] - x[0])
from scipy.signal import savgol_filter
buff = data
buff = savgol_filter(buff, w, 2, 0)
buff = savgol_filter(buff, w, 2, 1)
buff = savgol_filter(buff, w, 2, 0)
buff = savgol_filter(buff, w, 2, 1)
buff = pandas.DataFrame(buff, index=data.index, columns=data.columns)
buff.T.plot()
pyplot.gca().invert_xaxis()
pyplot.gca().legend_ = None
pyplot.show()
#data = buff

In [None]:
# センタリング / centering
buff = data - data.mean()
buff.T.plot()
pyplot.gca().invert_xaxis()
pyplot.gca().legend_ = None
pyplot.show()
#data = buff

In [None]:
# スケーリング / scaling
buff = data / data.std()
buff.T.plot()
pyplot.gca().invert_xaxis()
pyplot.gca().legend_ = None
pyplot.show()
#data = buff

In [None]:
# オートスケーリング / autoscaling
buff = (data - data.mean()) / data.std()
buff.T.plot()
pyplot.gca().invert_xaxis()
pyplot.gca().legend_ = None
pyplot.show()
#data = buff

In [None]:
# 規格化 / normalizing
xmax, xmin = 3000, 2700
buff = data.iloc[:, (xmin <= data.columns) & (data.columns <= xmax)]
buff = ((data.T - buff.T.min()) / (buff.T.max() - buff.T.min())).T
buff.T.plot()
pyplot.gca().invert_xaxis()
pyplot.gca().legend_ = None
pyplot.show()
#data = buff

In [None]:
# 差スペクトル / subtraction spectra
buff = data.iloc[0]  # 差し引くスペクトル
buff = data - buff
buff.T.plot()
pyplot.gca().invert_xaxis()
pyplot.gca().legend_ = None
pyplot.show()
#data = buff

In [None]:
# 差分スペクトル
buff = data.copy()
for i in range(len(data) - 1): buff.iloc[i] = buff.iloc[i + 1] - buff.iloc[i]
buff = buff.iloc[:-1]
buff.T.plot()
pyplot.gca().invert_xaxis()
pyplot.gca().legend_ = None
pyplot.show()
#data = buff