In [1]:
import pandas as pd
import numpy as np
from datetime import date, timedelta, datetime
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import cross_val_score
from sklearn.svm import LinearSVC
from sklearn.metrics import log_loss, accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
%matplotlib inline

plt.rcParams["figure.figsize"] = (20,3)

In [2]:
df = pd.read_csv('./all_merged_info_data.tsv',sep='\t',index_col='Unnamed: 0')
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 34992 entries, 0 to 34991
Data columns (total 90 columns):
hour                     34992 non-null int64
climate                  34992 non-null object
weather_deterioration    34992 non-null int64
swell                    34392 non-null float64
22453_일시                 33979 non-null float64
22453_수온(°C)             33082 non-null float64
22453_최대파고(m)            33632 non-null float64
22453_유의파고(m)            33632 non-null float64
22453_평균파고(m)            33632 non-null float64
22453_파주기(sec)           33634 non-null float64
22490_일시                 19639 non-null float64
22490_수온(°C)             19464 non-null float64
22490_최대파고(m)            19224 non-null float64
22490_유의파고(m)            19224 non-null float64
22490_평균파고(m)            19224 non-null float64
22490_파주기(sec)           19562 non-null float64
21229_GUST풍속 1(m/s)      8637 non-null float64
21229_GUST풍속(m/s)        26091 non-null float64
21229_기온(°C)             33915 no

In [3]:
df['swell'].value_counts()

0.0    31800
1.0     2592
Name: swell, dtype: int64

In [4]:
df['swell'] = df['swell'] + df['weather_deterioration']

In [5]:
df['swell'].value_counts()

0.0    26863
1.0     4937
2.0     2592
Name: swell, dtype: int64

In [6]:
s = (df[df['swell'].isnull()]['hour']//100).value_counts()
s

20170315    17
20171211    17
20161130    17
20170331    17
20141023    17
20161020    17
20140518    17
20170212    17
20150718    17
20140706    17
20150627    17
20160830    17
20171023    17
20171012    17
20141221    17
20170916    17
20160122    17
20150404    17
20150113    17
20140925    17
20170603    17
20161226    17
20151213    17
20160304    17
20151113    17
20160831     7
20170604     7
20171024     7
20140926     7
20171013     7
20150114     7
20141222     7
20151214     7
20150405     7
20150628     7
20140707     7
20150719     7
20170401     7
20171212     7
20170917     7
20151114     7
20170213     7
20140519     7
20160305     7
20161021     7
20170316     7
20161227     7
20161201     7
20160123     7
20141024     7
Name: hour, dtype: int64

In [7]:
s[s > 10].index.tolist()[0]

20170315

In [8]:
def get_prev_date(today):
    today = date(int(today[:4]),int(today[4:6]),int(today[6:]))
    return (today - timedelta(1)).strftime('%Y%m%d%H')

In [9]:
def get_today_from_07(today):
    today = date(int(today[:4]),int(today[4:6]),int(today[6:]))
    return today.strftime('%Y%m%d%H')

In [10]:
get_prev_date('20170101')

'2016123100'

In [11]:
get_today_from_07('20170101')

'2017010100'

In [12]:
s[s>10].index.astype(str).sort_values()

Index(['20140518', '20140706', '20140925', '20141023', '20141221', '20150113',
       '20150404', '20150627', '20150718', '20151113', '20151213', '20160122',
       '20160304', '20160830', '20161020', '20161130', '20161226', '20170212',
       '20170315', '20170331', '20170603', '20170916', '20171012', '20171023',
       '20171211'],
      dtype='object')

In [13]:
test_date_hour = []
for d in s[s>10].index.astype(str):
    #day = get_prev_date(d)
    day = get_today_from_07(d)
    tomorrow_day = int(day) +  100 
    
    if tomorrow_day == 2016113100:
        tomorrow_day = 2016120100

    if tomorrow_day == 2017033200:
        tomorrow_day = 2017040100
        
    print(day, tomorrow_day)
    for hour in range(7,24):
        test_date_hour.append((int(day) + hour))
            
    for hour in range(0,7):
        test_date_hour.append((tomorrow_day + hour))

test_date_hour = list(map(str,test_date_hour))
len(test_date_hour)

2017031500 2017031600
2017121100 2017121200
2016113000 2016120100
2017033100 2017040100
2014102300 2014102400
2016102000 2016102100
2014051800 2014051900
2017021200 2017021300
2015071800 2015071900
2014070600 2014070700
2015062700 2015062800
2016083000 2016083100
2017102300 2017102400
2017101200 2017101300
2014122100 2014122200
2017091600 2017091700
2016012200 2016012300
2015040400 2015040500
2015011300 2015011400
2014092500 2014092600
2017060300 2017060400
2016122600 2016122700
2015121300 2015121400
2016030400 2016030500
2015111300 2015111400


600

In [14]:
df['swell(t+1)'] = df['swell'].shift(-1)
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 34992 entries, 0 to 34991
Data columns (total 91 columns):
hour                     34992 non-null int64
climate                  34992 non-null object
weather_deterioration    34992 non-null int64
swell                    34392 non-null float64
22453_일시                 33979 non-null float64
22453_수온(°C)             33082 non-null float64
22453_최대파고(m)            33632 non-null float64
22453_유의파고(m)            33632 non-null float64
22453_평균파고(m)            33632 non-null float64
22453_파주기(sec)           33634 non-null float64
22490_일시                 19639 non-null float64
22490_수온(°C)             19464 non-null float64
22490_최대파고(m)            19224 non-null float64
22490_유의파고(m)            19224 non-null float64
22490_평균파고(m)            19224 non-null float64
22490_파주기(sec)           19562 non-null float64
21229_GUST풍속 1(m/s)      8637 non-null float64
21229_GUST풍속(m/s)        26091 non-null float64
21229_기온(°C)             33915 no

In [15]:
df.columns

Index(['hour', 'climate', 'weather_deterioration', 'swell', '22453_일시',
       '22453_수온(°C)', '22453_최대파고(m)', '22453_유의파고(m)', '22453_평균파고(m)',
       '22453_파주기(sec)', '22490_일시', '22490_수온(°C)', '22490_최대파고(m)',
       '22490_유의파고(m)', '22490_평균파고(m)', '22490_파주기(sec)',
       '21229_GUST풍속 1(m/s)', '21229_GUST풍속(m/s)', '21229_기온(°C)',
       '21229_수온(°C)', '21229_습도(%)', '21229_유의파고(m)', '21229_일시',
       '21229_최대파고(m)', '21229_파주기(sec)', '21229_파향(deg)', '21229_평균파고(m)',
       '21229_풍속(m/s)', '21229_풍속1(m/s)', '21229_풍향(deg)', '21229_풍향1(deg)',
       '21229_현지기압(hPa)', '22105_GUST풍속 1(m/s)', '22105_GUST풍속(m/s)',
       '22105_기온(°C)', '22105_수온(°C)', '22105_습도(%)', '22105_유의파고(m)',
       '22105_일시', '22105_최대파고(m)', '22105_파주기(sec)', '22105_파향(deg)',
       '22105_평균파고(m)', '22105_풍속(m/s)', '22105_풍속1(m/s)', '22105_풍향(deg)',
       '22105_풍향1(deg)', '22105_현지기압(hPa)', '22106_GUST풍속 1(m/s)',
       '22106_GUST풍속(m/s)', '22106_기온(°C)', '22106_수온(°C)', '22106_습도(%)',
       '

In [16]:
def output_change(x):
    if x == 2:
        return 1
    else:
        return 0

In [17]:
df['hour'] = df['hour'].astype(str)
df['index'] = df.index
df.set_index('hour',inplace=True,)

In [18]:
scale_features = ['22453_파주기(sec)','21229_파주기(sec)','22105_파주기(sec)','22106_파주기(sec)']
#['swell']#,'22453_파주기(sec)','22106_파주기(sec)','22106_현지기압(hPa)']
nor_features = []#['month_coord']#,'hour_coord']
index_feature = ['index']
target = ['swell(t+1)']

In [19]:
minmax_scaler = MinMaxScaler(copy=True, feature_range=(0,1))
temp_df = df[index_feature+scale_features+nor_features+target]

minmax_scaler.fit(temp_df[scale_features].dropna())
    
train = temp_df[index_feature+scale_features+nor_features + target].drop(test_date_hour).dropna()
test = temp_df[index_feature+scale_features+nor_features + target].loc[test_date_hour]
train.info()
test.info()

<class 'pandas.core.frame.DataFrame'>
Index: 31494 entries, 2014010407 to 2017123123
Data columns (total 6 columns):
index             31494 non-null int64
22453_파주기(sec)    31494 non-null float64
21229_파주기(sec)    31494 non-null float64
22105_파주기(sec)    31494 non-null float64
22106_파주기(sec)    31494 non-null float64
swell(t+1)        31494 non-null float64
dtypes: float64(5), int64(1)
memory usage: 1.7+ MB
<class 'pandas.core.frame.DataFrame'>
Index: 600 entries, 2017031507 to 2015111406
Data columns (total 6 columns):
index             600 non-null int64
22453_파주기(sec)    558 non-null float64
21229_파주기(sec)    597 non-null float64
22105_파주기(sec)    572 non-null float64
22106_파주기(sec)    595 non-null float64
swell(t+1)        25 non-null float64
dtypes: float64(5), int64(1)
memory usage: 32.8+ KB


In [20]:
test = test.fillna(method='ffill').fillna(method='bfill')
test.info()

<class 'pandas.core.frame.DataFrame'>
Index: 600 entries, 2017031507 to 2015111406
Data columns (total 6 columns):
index             600 non-null int64
22453_파주기(sec)    600 non-null float64
21229_파주기(sec)    600 non-null float64
22105_파주기(sec)    600 non-null float64
22106_파주기(sec)    600 non-null float64
swell(t+1)        600 non-null float64
dtypes: float64(5), int64(1)
memory usage: 32.8+ KB


In [21]:
test['swell(t+1)']=np.NaN

In [22]:
test.info()

<class 'pandas.core.frame.DataFrame'>
Index: 600 entries, 2017031507 to 2015111406
Data columns (total 6 columns):
index             600 non-null int64
22453_파주기(sec)    600 non-null float64
21229_파주기(sec)    600 non-null float64
22105_파주기(sec)    600 non-null float64
22106_파주기(sec)    600 non-null float64
swell(t+1)        0 non-null float64
dtypes: float64(5), int64(1)
memory usage: 32.8+ KB


### 테스트 기준 각각 이전의 데이터만 사용하도록 매번 새롭게 학습

In [23]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import minmax_scale

In [24]:
pred_result = []
for index in test.index:
    order_index = test.loc[index]['index']
    print(index, order_index)
    train_before_test = train[train['index']<order_index]
    print(len(train_before_test))
    test_row = pd.DataFrame(test.loc[index]).T
    cluster_df = pd.concat([train_before_test,test_row])
#     cluster_df.iloc[-1,1] = cluster_df.iloc[-2,1]
#     test.loc[index,'swell'] = cluster_df.ix[-2,1]
    
    train_X = minmax_scale(cluster_df[scale_features].values)

    x_list = []
    for col in nor_features:
        X_arr = np.array(cluster_df[col].apply(lambda x:np.array(x)).tolist())
        x_list.append(X_arr)

    if len(nor_features) > 0:
        nor_X = np.concatenate(x_list,axis=1)
        train_X = np.concatenate([train_X,nor_X], axis=1)

    print(train_X.shape)
    K = 4

    kmeans = KMeans(n_clusters=K,max_iter=10000)
    pred = kmeans.fit_predict(train_X)
    print(pred)
    cluster_df['pred'] = pred
    print(cluster_df.groupby(['pred','swell(t+1)']).count()[['index']])
    
    pred_num = cluster_df['pred'].value_counts()
    
    swell_num = []
    for i in range(K):
        swell_num.append(len(cluster_df[(cluster_df['pred'] == i) & (cluster_df['swell(t+1)']==2)]))

    test_pred = int(cluster_df.loc[index]['pred'])
    print(test_pred , swell_num[test_pred] / pred_num.loc[test_pred])
    pred_result.append((test_pred , swell_num[test_pred] / pred_num.loc[test_pred]))

2017031507 27984.0
24976
(24977, 4)
[2 2 2 ..., 0 1 1]
                 index
pred swell(t+1)       
0    0.0          5251
     1.0          1113
     2.0           567
1    0.0          1287
     1.0           376
     2.0          1403
2    0.0          6699
     1.0          1465
     2.0            42
3    0.0          5844
     1.0           929
1 0.457450277144
2017031508 27985.0
24976
(24977, 4)
[1 1 1 ..., 0 2 2]
                 index
pred swell(t+1)       
0    0.0          5255
     1.0          1113
     2.0           568
1    0.0          6709
     1.0          1467
     2.0            43
2    0.0          1279
     1.0           376
     2.0          1401
3    0.0          5838
     1.0           927
2 0.458292443572
2017031509 27986.0
24976
(24977, 4)
[0 0 0 ..., 1 1 3]
                 index
pred swell(t+1)       
0    0.0          6797
     1.0          1499
     2.0            46
1    0.0          5118
     1.0          1080
     2.0           617
2    0.0          5

[0 0 0 ..., 3 1 1]
                 index
pred swell(t+1)       
0    0.0          6707
     1.0          1467
     2.0            43
1    0.0          1279
     1.0           376
     2.0          1401
2    0.0          5842
     1.0           929
3    0.0          5253
     1.0          1111
     2.0           568
1 0.458292443572
2017031606 28007.0
24976
(24977, 4)
[1 1 1 ..., 0 3 3]
                 index
pred swell(t+1)       
0    0.0          5239
     1.0          1108
     2.0           570
1    0.0          6704
     1.0          1469
     2.0            43
2    0.0          5867
     1.0           933
3    0.0          1271
     1.0           373
     2.0          1399
3 0.459592641261
2017121107 34488.0
31039
(31040, 4)
[2 2 2 ..., 2 2 1]
                 index
pred swell(t+1)       
0    0.0          7674
     1.0          1168
1    0.0          6472
     1.0          1247
     2.0           642
2    0.0          8652
     1.0          1660
     2.0            63
3    0.0 

[2 2 2 ..., 2 2 1]
                 index
pred swell(t+1)       
0    0.0          7870
     1.0          1214
1    0.0          6333
     1.0          1220
     2.0           695
2    0.0          8696
     1.0          1666
     2.0            66
3    0.0          1437
     1.0           362
     2.0          1480
1 0.0842526366832
2017121205 34510.0
31039
(31040, 4)
[0 0 0 ..., 0 0 1]
                 index
pred swell(t+1)       
0    0.0          8629
     1.0          1655
     2.0            64
1    0.0          6434
     1.0          1239
     2.0           635
2    0.0          7721
     1.0          1174
3    0.0          1552
     1.0           394
     2.0          1542
1 0.0764231556144
2017121206 34511.0
31039
(31040, 4)
[0 0 0 ..., 0 0 3]
                 index
pred swell(t+1)       
0    0.0          8701
     1.0          1670
     2.0            66
1    0.0          7879
     1.0          1214
2    0.0          1437
     1.0           362
     2.0          1480
3    0.

[3 3 3 ..., 2 1 3]
                 index
pred swell(t+1)       
0    0.0          5304
     1.0           820
1    0.0          4811
     1.0          1100
     2.0           423
2    0.0          1317
     1.0           412
     2.0          1124
3    0.0          6181
     1.0          1361
     2.0            32
3 0.00422442244224
2016120104 25485.0
22885
(22886, 4)
[3 3 3 ..., 2 1 3]
                 index
pred swell(t+1)       
0    0.0          5408
     1.0           833
1    0.0          4749
     1.0          1091
     2.0           437
2    0.0          1193
     1.0           383
     2.0          1105
3    0.0          6263
     1.0          1386
     2.0            37
3 0.00481332119162
2016120105 25486.0
22885
(22886, 4)
[0 0 0 ..., 2 1 0]
                 index
pred swell(t+1)       
0    0.0          6038
     1.0          1323
     2.0            30
1    0.0          4865
     1.0          1119
     2.0           390
2    0.0          1431
     1.0           436
     

[2 2 2 ..., 2 2 2]
                 index
pred swell(t+1)       
0    0.0          5994
     1.0           964
1    0.0          5283
     1.0          1114
     2.0           596
2    0.0          6865
     1.0          1481
     2.0            44
3    0.0          1239
     1.0           365
     2.0          1387
2 0.00524371350256
2017040103 28388.0
25332
(25333, 4)
[0 0 0 ..., 0 0 0]
                 index
pred swell(t+1)       
0    0.0          6906
     1.0          1496
     2.0            50
1    0.0          6215
     1.0          1014
2    0.0          1143
     1.0           335
     2.0          1333
3    0.0          5117
     1.0          1079
     2.0           644
0 0.0059150597421
2017040104 28389.0
25332
(25333, 4)
[0 0 0 ..., 0 0 3]
                 index
pred swell(t+1)       
0    0.0          6933
     1.0          1499
     2.0            50
1    0.0          1167
     1.0           345
     2.0          1346
2    0.0          6108
     1.0           988
3    0

[2 2 2 ..., 1 1 2]
                 index
pred swell(t+1)       
0    0.0          1310
     1.0           290
     2.0           109
1    0.0          1151
     1.0           229
2    0.0          1602
     1.0           561
     2.0             8
3    0.0           453
     1.0           142
     2.0           299
2 0.0036832412523
2014102403 7028.0
6154
(6155, 4)
[1 1 1 ..., 2 2 3]
                 index
pred swell(t+1)       
0    0.0           460
     1.0           147
     2.0           302
1    0.0          1594
     1.0           560
     2.0             8
2    0.0          1152
     1.0           228
3    0.0          1310
     1.0           287
     2.0           106
3 0.06220657277
2014102404 7029.0
6154
(6155, 4)
[2 2 2 ..., 1 1 2]
                 index
pred swell(t+1)       
0    0.0          1304
     1.0           289
     2.0           109
1    0.0          1154
     1.0           231
2    0.0          1605
     1.0           560
     2.0             8
3    0.0       

[1 1 1 ..., 3 3 0]
                 index
pred swell(t+1)       
0    0.0          4525
     1.0          1069
     2.0           382
1    0.0          6007
     1.0          1351
     2.0            33
2    0.0          1180
     1.0           387
     2.0          1027
3    0.0          5236
     1.0           816
0 0.0639116613686
2016102102 24499.0
22013
(22014, 4)
[0 0 0 ..., 3 3 1]
                 index
pred swell(t+1)       
0    0.0          6097
     1.0          1379
     2.0            37
1    0.0          1042
     1.0           357
     2.0           988
2    0.0          4472
     1.0          1055
     2.0           417
3    0.0          5337
     1.0           832
1 0.413735343384
2016102103 24500.0
22013
(22014, 4)
[1 1 1 ..., 2 2 0]
                 index
pred swell(t+1)       
0    0.0          1020
     1.0           350
     2.0           978
1    0.0          6121
     1.0          1383
     2.0            40
2    0.0          5420
     1.0           853
3    0.0

[1 1 1 ..., 0 0 0]
                 index
pred swell(t+1)       
0    0.0           361
     1.0           115
     2.0           173
1    0.0           702
     1.0           127
     2.0             9
2    0.0           616
     1.0           164
     2.0           100
3    0.0           612
     1.0            83
0 0.266153846154
2014051901 3234.0
3062
(3063, 4)
[2 2 2 ..., 3 3 3]
                 index
pred swell(t+1)       
0    0.0           612
     1.0            83
1    0.0           617
     1.0           164
     2.0           100
2    0.0           701
     1.0           127
     2.0             9
3    0.0           361
     1.0           115
     2.0           173
3 0.266153846154
2014051902 3235.0
3062
(3063, 4)
[2 2 2 ..., 0 0 0]
                 index
pred swell(t+1)       
0    0.0           361
     1.0           115
     2.0           173
1    0.0           614
     1.0            83
2    0.0           704
     1.0           128
     2.0            10
3    0.0       

[0 0 0 ..., 2 1 1]
                 index
pred swell(t+1)       
0    0.0          6613
     1.0          1432
     2.0            42
1    0.0          1276
     1.0           376
     2.0          1377
2    0.0          5146
     1.0          1082
     2.0           538
3    0.0          5714
     1.0           921
1 0.454455445545
2017021300 27257.0
24517
(24518, 4)
[3 3 3 ..., 2 1 2]
                 index
pred swell(t+1)       
0    0.0          5724
     1.0           923
1    0.0          1276
     1.0           376
     2.0          1377
2    0.0          5143
     1.0          1081
     2.0           538
3    0.0          6606
     1.0          1431
     2.0            42
2 0.0795504953423
2017021301 27258.0
24517
(24518, 4)
[2 2 2 ..., 1 3 1]
                 index
pred swell(t+1)       
0    0.0          5913
     1.0           951
1    0.0          5016
     1.0          1054
     2.0           597
2    0.0          6663
     1.0          1463
     2.0            48
3    0.0

[3 3 3 ..., 1 2 2]
                 index
pred swell(t+1)       
0    0.0          2655
     1.0           491
1    0.0           714
     1.0           177
     2.0           566
2    0.0          2548
     1.0           520
     2.0           272
3    0.0          2858
     1.0           840
     2.0            18
2 0.0814127506735
2015071823 13456.0
11659
(11660, 4)
[0 0 0 ..., 1 3 3]
                 index
pred swell(t+1)       
0    0.0          2872
     1.0           843
     2.0            18
1    0.0           712
     1.0           175
     2.0           565
2    0.0          2649
     1.0           490
3    0.0          2542
     1.0           520
     2.0           273
3 0.0818345323741
2015071900 13457.0
11659
(11660, 4)
[0 0 0 ..., 3 1 0]
                 index
pred swell(t+1)       
0    0.0          2860
     1.0           842
     2.0            18
1    0.0          2541
     1.0           520
     2.0           273
2    0.0          2662
     1.0           491
3    0.

[2 2 2 ..., 3 3 3]
                 index
pred swell(t+1)       
0    0.0           840
     1.0           129
1    0.0           409
     1.0           111
     2.0           225
2    0.0          1077
     1.0           255
     2.0             9
3    0.0           810
     1.0           202
     2.0           101
3 0.0906642728905
2014070623 4408.0
4168
(4169, 4)
[3 3 3 ..., 0 0 3]
                 index
pred swell(t+1)       
0    0.0           801
     1.0           199
     2.0           101
1    0.0           844
     1.0           129
2    0.0           408
     1.0           111
     2.0           225
3    0.0          1083
     1.0           258
     2.0             9
3 0.00666173205033
2014070700 4409.0
4168
(4169, 4)
[3 3 3 ..., 2 2 2]
                 index
pred swell(t+1)       
0    0.0           834
     1.0           127
1    0.0           412
     1.0           115
     2.0           225
2    0.0           819
     1.0           201
     2.0           101
3    0.0    

[1 1 1 ..., 2 2 0]
                 index
pred swell(t+1)       
0    0.0           694
     1.0           170
     2.0           536
1    0.0          2784
     1.0           808
     2.0            18
2    0.0          2487
     1.0           476
     2.0           268
3    0.0          2543
     1.0           433
0 0.382583868665
2015062722 12951.0
11217
(11218, 4)
[0 0 0 ..., 2 2 1]
                 index
pred swell(t+1)       
0    0.0          2787
     1.0           810
     2.0            18
1    0.0           690
     1.0           169
     2.0           536
2    0.0          2484
     1.0           475
     2.0           268
3    0.0          2547
     1.0           433
1 0.383954154728
2015062723 12952.0
11217
(11218, 4)
[0 0 0 ..., 2 2 1]
                 index
pred swell(t+1)       
0    0.0          2776
     1.0           807
     2.0            18
1    0.0           676
     1.0           165
     2.0           528
2    0.0          2495
     1.0           477
     2.0 

[0 0 0 ..., 3 3 3]
                 index
pred swell(t+1)       
0    0.0          5631
     1.0          1225
     2.0            34
1    0.0          5128
     1.0           763
2    0.0          4282
     1.0           963
     2.0           372
3    0.0          1082
     1.0           353
     2.0           999
3 0.410266940452
2016083021 23270.0
20832
(20833, 4)
[0 0 0 ..., 1 1 1]
                 index
pred swell(t+1)       
0    0.0          5581
     1.0          1216
     2.0            29
1    0.0          1166
     1.0           372
     2.0          1008
2    0.0          4387
     1.0           983
     2.0           368
3    0.0          4989
     1.0           733
1 0.395759717314
2016083022 23271.0
20832
(20833, 4)
[1 1 1 ..., 3 3 3]
                 index
pred swell(t+1)       
0    0.0          4271
     1.0           966
     2.0           405
1    0.0          5745
     1.0          1254
     2.0            36
2    0.0          5126
     1.0           755
3    0.0 

[3 3 3 ..., 2 2 2]
                 index
pred swell(t+1)       
0    0.0          7200
     1.0          1114
1    0.0          6197
     1.0          1289
     2.0           543
2    0.0          1763
     1.0           450
     2.0          1533
3    0.0          8172
     1.0          1601
     2.0            49
2 0.409127301841
2017102320 33325.0
29911
(29912, 4)
[3 3 3 ..., 1 1 1]
                 index
pred swell(t+1)       
0    0.0          7167
     1.0          1107
1    0.0          1755
     1.0           450
     2.0          1532
2    0.0          6215
     1.0          1292
     2.0           544
3    0.0          8195
     1.0          1605
     2.0            49
1 0.409844836811
2017102321 33326.0
29911
(29912, 4)
[0 0 0 ..., 2 2 2]
                 index
pred swell(t+1)       
0    0.0          8169
     1.0          1598
     2.0            49
1    0.0          7183
     1.0          1113
2    0.0          1778
     1.0           454
     2.0          1538
3    0.0 

[1 1 1 ..., 2 2 0]
                 index
pred swell(t+1)       
0    0.0          1728
     1.0           434
     2.0          1522
1    0.0          8109
     1.0          1592
     2.0            43
2    0.0          6170
     1.0          1280
     2.0           536
3    0.0          7155
     1.0          1107
0 0.41302578019
2017101219 33060.0
29676
(29677, 4)
[2 2 2 ..., 3 3 3]
                 index
pred swell(t+1)       
0    0.0          1755
     1.0           436
     2.0          1532
1    0.0          7147
     1.0          1107
2    0.0          8096
     1.0          1587
     2.0            43
3    0.0          6164
     1.0          1283
     2.0           526
3 0.0659643842488
2017101220 33061.0
29676
(29677, 4)
[1 1 1 ..., 2 2 2]
                 index
pred swell(t+1)       
0    0.0          7225
     1.0          1119
1    0.0          8099
     1.0          1593
     2.0            45
2    0.0          6113
     1.0          1268
     2.0           533
3    0.0 

[3 3 3 ..., 2 1 2]
                 index
pred swell(t+1)       
0    0.0          1721
     1.0           394
1    0.0          1606
     1.0           314
     2.0           191
2    0.0           336
     1.0            99
     2.0           288
3    0.0          1824
     1.0           578
     2.0            19
2 0.397790055249
2014122118 8435.0
7370
(7371, 4)
[2 2 2 ..., 3 0 3]
                 index
pred swell(t+1)       
0    0.0          1600
     1.0           313
     2.0           191
1    0.0          1718
     1.0           390
2    0.0          1837
     1.0           584
     2.0            19
3    0.0           332
     1.0            98
     2.0           288
3 0.400556328234
2014122119 8436.0
7370
(7371, 4)
[0 0 0 ..., 1 1 1]
                 index
pred swell(t+1)       
0    0.0          1816
     1.0           578
     2.0            18
1    0.0           375
     1.0           103
     2.0           301
2    0.0          1619
     1.0           322
     2.0       

[1 1 1 ..., 0 0 0]
                 index
pred swell(t+1)       
0    0.0          5993
     1.0          1249
     2.0           532
1    0.0          7935
     1.0          1557
     2.0            45
2    0.0          7106
     1.0          1097
3    0.0          1631
     1.0           426
     2.0          1512
0 0.068424437299
2017091617 32434.0
29083
(29084, 4)
[3 3 3 ..., 0 0 0]
                 index
pred swell(t+1)       
0    0.0          6010
     1.0          1253
     2.0           536
1    0.0          7078
     1.0          1092
2    0.0          1626
     1.0           425
     2.0          1510
3    0.0          7951
     1.0          1559
     2.0            43
0 0.0687179487179
2017091618 32435.0
29083
(29084, 4)
[1 1 1 ..., 2 2 2]
                 index
pred swell(t+1)       
0    0.0          7072
     1.0          1091
1    0.0          7946
     1.0          1557
     2.0            43
2    0.0          6012
     1.0          1255
     2.0           533
3    0.0

[0 0 0 ..., 2 1 1]
                 index
pred swell(t+1)       
0    0.0          4218
     1.0          1063
     2.0            36
1    0.0           671
     1.0           254
     2.0           747
2    0.0          3318
     1.0           724
     2.0           429
3    0.0          3741
     1.0           649
1 0.446503287507
2016012216 17961.0
15850
(15851, 4)
[0 0 0 ..., 1 3 1]
                 index
pred swell(t+1)       
0    0.0          4192
     1.0          1055
     2.0            36
1    0.0          3321
     1.0           723
     2.0           420
2    0.0          3750
     1.0           653
3    0.0           685
     1.0           259
     2.0           756
1 0.0940649496081
2016012217 17962.0
15850
(15851, 4)
[1 1 1 ..., 0 3 0]
                 index
pred swell(t+1)       
0    0.0          3310
     1.0           718
     2.0           436
1    0.0          4203
     1.0          1065
     2.0            36
2    0.0          3782
     1.0           657
3    0.0

[2 2 2 ..., 2 2 1]
                 index
pred swell(t+1)       
0    0.0          2206
     1.0           457
1    0.0          2136
     1.0           355
     2.0           287
2    0.0          2169
     1.0           658
     2.0            25
3    0.0           564
     1.0           130
     2.0           500
1 0.103274559194
2015040415 10928.0
9487
(9488, 4)
[0 0 0 ..., 0 0 2]
                 index
pred swell(t+1)       
0    0.0          2180
     1.0           663
     2.0            26
1    0.0           552
     1.0           129
     2.0           491
2    0.0          2120
     1.0           347
     2.0           295
3    0.0          2223
     1.0           461
2 0.106768005791
2015040416 10929.0
9487
(9488, 4)
[2 2 2 ..., 2 2 1]
                 index
pred swell(t+1)       
0    0.0          2204
     1.0           458
1    0.0          2130
     1.0           349
     2.0           296
2    0.0          2189
     1.0           664
     2.0            25
3    0.0     

[3 3 3 ..., 0 0 0]
                 index
pred swell(t+1)       
0    0.0           398
     1.0           106
     2.0           371
1    0.0          1731
     1.0           312
     2.0           201
2    0.0          1770
     1.0           407
3    0.0          1820
     1.0           572
     2.0            19
0 0.423515981735
2015011314 8983.0
7707
(7708, 4)
[0 0 0 ..., 1 1 1]
                 index
pred swell(t+1)       
0    0.0          1797
     1.0           569
     2.0            19
1    0.0           401
     1.0           106
     2.0           371
2    0.0          1740
     1.0           313
     2.0           201
3    0.0          1781
     1.0           409
1 0.422070534699
2015011315 8984.0
7707
(7708, 4)
[0 0 0 ..., 1 1 1]
                 index
pred swell(t+1)       
0    0.0          1817
     1.0           572
     2.0            19
1    0.0           398
     1.0           106
     2.0           371
2    0.0          1773
     1.0           407
3    0.0       

[0 0 0 ..., 1 1 2]
                 index
pred swell(t+1)       
0    0.0          1555
     1.0           555
     2.0             8
1    0.0          1251
     1.0           288
     2.0           100
2    0.0           449
     1.0           150
     2.0           290
3    0.0          1119
     1.0           222
2 0.325842696629
2014092514 6343.0
5987
(5988, 4)
[0 0 0 ..., 3 3 1]
                 index
pred swell(t+1)       
0    0.0          1573
     1.0           556
     2.0             8
1    0.0           438
     1.0           142
     2.0           283
2    0.0          1139
     1.0           228
3    0.0          1224
     1.0           289
     2.0           107
1 0.327546296296
2014092515 6344.0
5987
(5988, 4)
[1 1 1 ..., 2 2 2]
                 index
pred swell(t+1)       
0    0.0           449
     1.0           150
     2.0           290
1    0.0          1552
     1.0           555
     2.0             8
2    0.0          1257
     1.0           288
     2.0       

[3 3 3 ..., 2 2 2]
                 index
pred swell(t+1)       
0    0.0          5502
     1.0          1169
     2.0           574
1    0.0          6515
     1.0          1006
2    0.0          1417
     1.0           385
     2.0          1433
3    0.0          7289
     1.0          1494
     2.0            47
2 0.44283065513
2017060313 29910.0
26831
(26832, 4)
[3 3 3 ..., 2 2 2]
                 index
pred swell(t+1)       
0    0.0          6758
     1.0          1055
1    0.0          5355
     1.0          1140
     2.0           644
2    0.0          1269
     1.0           347
     2.0          1359
3    0.0          7341
     1.0          1512
     2.0            51
2 0.456653225806
2017060314 29911.0
26831
(26832, 4)
[0 0 0 ..., 1 1 1]
                 index
pred swell(t+1)       
0    0.0          7291
     1.0          1494
     2.0            47
1    0.0          1417
     1.0           385
     2.0          1433
2    0.0          5500
     1.0          1169
     2.0  

[0 0 0 ..., 3 3 0]
                 index
pred swell(t+1)       
0    0.0          6405
     1.0          1409
     2.0            41
1    0.0          5693
     1.0           909
2    0.0          1121
     1.0           359
     2.0          1133
3    0.0          4814
     1.0          1070
     2.0           519
0 0.00521894093686
2016122612 26093.0
23473
(23474, 4)
[0 0 0 ..., 1 1 0]
                 index
pred swell(t+1)       
0    0.0          6470
     1.0          1417
     2.0            41
1    0.0          4828
     1.0          1076
     2.0           519
2    0.0          5615
     1.0           895
3    0.0          1120
     1.0           359
     2.0          1133
0 0.00517089166351
2016122613 26094.0
23473
(23474, 4)
[3 3 3 ..., 1 1 3]
                 index
pred swell(t+1)       
0    0.0          5689
     1.0           909
1    0.0          4815
     1.0          1070
     2.0           519
2    0.0          1120
     1.0           359
     2.0          1133
3    

[0 0 0 ..., 0 0 3]
                 index
pred swell(t+1)       
0    0.0          3775
     1.0          1028
     2.0            31
1    0.0           745
     1.0           267
     2.0           752
2    0.0          3501
     1.0           606
3    0.0          3137
     1.0           726
     2.0           381
3 0.0897526501767
2015121311 16996.0
14949
(14950, 4)
[0 0 0 ..., 0 0 3]
                 index
pred swell(t+1)       
0    0.0          3795
     1.0          1029
     2.0            30
1    0.0           768
     1.0           277
     2.0           763
2    0.0          3436
     1.0           592
3    0.0          3159
     1.0           729
     2.0           371
3 0.0870892018779
2015121312 16997.0
14949
(14950, 4)
[0 0 0 ..., 0 0 2]
                 index
pred swell(t+1)       
0    0.0          3795
     1.0          1029
     2.0            30
1    0.0           768
     1.0           277
     2.0           763
2    0.0          3159
     1.0           729
     2.

[1 1 1 ..., 1 3 1]
                 index
pred swell(t+1)       
0    0.0           723
     1.0           260
     2.0           837
1    0.0          4512
     1.0          1102
     2.0            45
2    0.0          3537
     1.0           776
     2.0           440
3    0.0          3918
     1.0           665
1 0.00795053003534
2016030410 18963.0
16815
(16816, 4)
[0 0 0 ..., 0 1 1]
                 index
pred swell(t+1)       
0    0.0          4476
     1.0          1097
     2.0            47
1    0.0          3972
     1.0           675
2    0.0           721
     1.0           259
     2.0           833
3    0.0          3521
     1.0           772
     2.0           442
1 0.0
2016030411 18964.0
16815
(16816, 4)
[2 2 2 ..., 2 1 2]
                 index
pred swell(t+1)       
0    0.0          3536
     1.0           775
     2.0           440
1    0.0          3919
     1.0           665
2    0.0          4512
     1.0          1103
     2.0            45
3    0.0          

[0 0 0 ..., 0 1 0]
                 index
pred swell(t+1)       
0    0.0          3678
     1.0           982
     2.0            33
1    0.0          2993
     1.0           663
     2.0           322
2    0.0           785
     1.0           262
     2.0           708
3    0.0          3391
     1.0           580
0 0.00703025138475
2015111309 16274.0
14397
(14398, 4)
[0 0 0 ..., 0 2 0]
                 index
pred swell(t+1)       
0    0.0          3647
     1.0           974
     2.0            33
1    0.0          3433
     1.0           590
2    0.0          2981
     1.0           661
     2.0           322
3    0.0           786
     1.0           262
     2.0           708
0 0.00708915145005
2015111310 16275.0
14397
(14398, 4)
[1 1 1 ..., 1 2 1]
                 index
pred swell(t+1)       
0    0.0           787
     1.0           263
     2.0           708
1    0.0          3664
     1.0           977
     2.0            33
2    0.0          2987
     1.0           662
     

In [25]:
pred_df = pd.DataFrame(pred_result)

In [26]:
def result(x):
    if x >0.20:
        return 1
    
    return 0

In [27]:
pred_df['swell'] = pred_df[1].apply(lambda x: result(x))
pred_df

Unnamed: 0,0,1,swell
0,1,0.457450,1
1,2,0.458292,1
2,3,0.470035,1
3,1,0.458990,1
4,1,0.470035,1
5,0,0.466394,1
6,0,0.470035,1
7,1,0.458839,1
8,2,0.466394,1
9,2,0.469260,1


In [28]:
pred_df['swell'].value_counts()

0    363
1    237
Name: swell, dtype: int64

In [29]:
pred_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 600 entries, 0 to 599
Data columns (total 3 columns):
0        600 non-null int64
1        600 non-null float64
swell    600 non-null int64
dtypes: float64(1), int64(2)
memory usage: 14.1 KB


In [30]:
test['pred'] = pred_df['swell'].tolist()

In [31]:
def smooth_func(x):
    if  x[0]==1 and x[-1]==1:
        return 1
    
    if x[0]==0 and x[-1]==0:
        return 0
    
    return x[1]

test['smooth_pred'] = test.pred.rolling(window=3).apply(lambda x:smooth_func(x)).shift(-1)
test['smooth_pred'] = test['smooth_pred'].fillna(method='ffill').fillna(method='bfill')

In [32]:
test['smooth_pred'].value_counts()

0.0    365
1.0    235
Name: smooth_pred, dtype: int64

In [33]:
index=576
test.sort_index().iloc[index:index+24].T.loc[['smooth_pred']]

hour,2017121107,2017121108,2017121109,2017121110,2017121111,2017121112,2017121113,2017121114,2017121115,2017121116,...,2017121121,2017121122,2017121123,2017121200,2017121201,2017121202,2017121203,2017121204,2017121205,2017121206
smooth_pred,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0


In [34]:
mat =[]
for i in range(int(600/24)):
    index=24*i
    print(test.sort_index().iloc[index:index+24].T.loc[['smooth_pred']])
    mat.append(test.sort_index().iloc[index:index+24].T.loc['smooth_pred'].tolist())

hour         2014051807  2014051808  2014051809  2014051810  2014051811  \
smooth_pred         1.0         1.0         1.0         1.0         1.0   

hour         2014051812  2014051813  2014051814  2014051815  2014051816  \
smooth_pred         1.0         1.0         1.0         1.0         1.0   

hour            ...      2014051821  2014051822  2014051823  2014051900  \
smooth_pred     ...             1.0         1.0         1.0         1.0   

hour         2014051901  2014051902  2014051903  2014051904  2014051905  \
smooth_pred         1.0         1.0         1.0         1.0         1.0   

hour         2014051906  
smooth_pred         1.0  

[1 rows x 24 columns]
hour         2014070607  2014070608  2014070609  2014070610  2014070611  \
smooth_pred         0.0         0.0         0.0         0.0         0.0   

hour         2014070612  2014070613  2014070614  2014070615  2014070616  \
smooth_pred         0.0         0.0         0.0         0.0         0.0   

hour            ...

[1 rows x 24 columns]
hour         2017031507  2017031508  2017031509  2017031510  2017031511  \
smooth_pred         1.0         1.0         1.0         1.0         1.0   

hour         2017031512  2017031513  2017031514  2017031515  2017031516  \
smooth_pred         1.0         1.0         1.0         1.0         1.0   

hour            ...      2017031521  2017031522  2017031523  2017031600  \
smooth_pred     ...             1.0         1.0         1.0         1.0   

hour         2017031601  2017031602  2017031603  2017031604  2017031605  \
smooth_pred         1.0         1.0         1.0         1.0         1.0   

hour         2017031606  
smooth_pred         1.0  

[1 rows x 24 columns]
hour         2017033107  2017033108  2017033109  2017033110  2017033111  \
smooth_pred         0.0         0.0         0.0         0.0         0.0   

hour         2017033112  2017033113  2017033114  2017033115  2017033116  \
smooth_pred         0.0         0.0         0.0         0.0         0.0  

In [36]:
pd.DataFrame(np.array(mat)).to_csv('./sub_swell1.csv')