# Load data and imports

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from joblib import dump, load
import warnings
warnings.filterwarnings("ignore")
import sys
sys.path.append(r'D:\Hacks\re_new')

In [2]:
from src.utils import load_config, load_data, comp_score
from src.engine import get_model

config = load_config()
random_seed = config['RAND']

In [3]:
train_data = pd.read_csv('../data/raw/train.csv')
test_data = pd.read_csv('../data/raw/test.csv')
train_data.shape, test_data.shape

((909604, 16), (303202, 15))

In [4]:
traint1, testt1 = load_data()
traint1.shape, testt1.shape

((909604, 21), (303202, 20))

In [5]:
test_data.timestamp = pd.to_datetime(test_data.timestamp)
train_data.timestamp = pd.to_datetime(train_data.timestamp)

# Simple Interpolation

In [37]:
# lineraly interpolate the current target values
test_data['is_test'] = 1
train_data['is_test'] = 0
df = pd.concat([train_data, test_data], axis=0)
df.reset_index(inplace=True)
df.sort_values(by='timestamp', inplace=True)
# df = df[['index', 'timestamp', 'is_test', 'Target', 'turbine_id']]  # doesnot share same memory
fav_cols = ['index', 'timestamp', 'is_test', 'Target', 'turbine_id']
df[fav_cols].head()

Unnamed: 0,index,timestamp,is_test,Target,turbine_id
45421,45421,2021-01-01 00:01:00,0,43.43087,Turbine_108
658721,658721,2021-01-01 00:02:00,0,40.715544,Turbine_120
545675,545675,2021-01-01 00:03:00,0,40.706413,Turbine_120
772206,772206,2021-01-01 00:04:00,0,40.769313,Turbine_139
1155106,245502,2021-01-01 00:06:00,1,,Turbine_158


In [38]:
df['simple_interp'] = df.Target.interpolate(method='linear')
df

Unnamed: 0,index,timestamp,active_power_calculated_by_converter,active_power_raw,ambient_temperature,generator_speed,generator_winding_temp_max,grid_power10min_average,nc1_inside_temp,nacelle_temp,reactice_power_calculated_by_converter,reactive_power,wind_direction_raw,wind_speed_raw,wind_speed_turbulence,turbine_id,Target,is_test,simple_interp
45421,45421,2021-01-01 00:01:00,828.392253,847.524780,23.749945,1164.353088,56.914901,833.398712,23.851615,23.613581,138.066790,166.923848,228.054690,8.184143,0.535525,Turbine_108,43.430870,0,43.430870
658721,658721,2021-01-01 00:02:00,734.902262,751.122874,19.137422,1119.673584,55.196616,809.185425,27.618101,25.004128,124.705297,153.301956,43.115978,6.780914,0.600904,Turbine_120,40.715544,0,40.715544
545675,545675,2021-01-01 00:03:00,748.204336,767.003316,19.113608,1128.340881,55.682657,804.593099,27.610003,25.033796,122.845019,157.638268,43.992285,6.605300,0.533855,Turbine_120,40.706413,0,40.706413
772206,772206,2021-01-01 00:04:00,1115.585986,1143.363770,18.536383,1200.487305,62.660133,1161.573462,25.932306,24.076670,192.730591,224.663141,72.256642,8.286794,0.523392,Turbine_139,40.769313,0,40.769313
1155106,245502,2021-01-01 00:06:00,975.188029,1005.913961,25.460711,1200.243095,57.926095,1018.267802,26.737586,23.928331,158.431951,196.259565,50.205800,7.899686,0.339865,Turbine_158,,1,41.650291
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
554025,554025,2021-12-31 23:58:00,635.630473,647.702566,9.400681,973.038859,49.969453,625.765503,30.920345,26.883738,106.336980,139.864756,123.650436,6.972114,0.578641,Turbine_103,42.614979,0,42.614979
83942,83942,2021-12-31 23:58:00,951.135590,977.182892,8.780563,1197.069234,58.330886,902.190328,20.349027,21.095316,157.545405,194.551659,78.606181,7.534583,0.606685,Turbine_105,43.700722,0,43.700722
668082,668082,2021-12-31 23:58:00,858.007161,885.301178,31.106428,1178.025533,59.719817,807.887004,30.799557,27.352941,146.320142,172.715530,212.963961,7.155286,0.445924,Turbine_20,43.792723,0,43.792723
214355,214355,2021-12-31 23:58:00,908.267832,933.286235,18.620179,1198.289958,57.573762,779.126373,30.178709,25.337340,137.384988,180.152397,69.111694,7.281371,0.530622,Turbine_13,43.219498,0,43.219498


In [39]:
df.simple_interp.isna().sum()

0

In [40]:
# interpolated values for test data
simple_interp_df = df[df.is_test == 1].sort_values(by='index').reset_index(drop=True)
simple_interp_df.set_index('index', inplace=True)
simple_interp_df

Unnamed: 0_level_0,timestamp,active_power_calculated_by_converter,active_power_raw,ambient_temperature,generator_speed,generator_winding_temp_max,grid_power10min_average,nc1_inside_temp,nacelle_temp,reactice_power_calculated_by_converter,reactive_power,wind_direction_raw,wind_speed_raw,wind_speed_turbulence,turbine_id,Target,is_test,simple_interp
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
0,2021-05-06 15:02:00,15.507537,19.956882,40.233264,410.538834,57.357366,84.008399,48.826597,45.363477,2.419202,1.468876,156.399396,2.438629,0.790435,Turbine_14,,1,49.696873
1,2021-12-04 06:11:00,472.577255,478.373881,12.306687,965.077563,51.796727,492.531362,27.457511,24.045565,96.152070,119.378235,56.443802,5.900371,0.544198,Turbine_19,,1,43.345530
2,2021-08-31 19:03:00,447.829615,451.158264,29.106771,948.035899,60.493098,393.151576,39.410553,35.715697,90.251529,117.192635,224.540263,5.924923,0.513481,Turbine_14,,1,47.575203
3,2021-09-19 19:01:00,254.014336,249.438965,27.074900,804.963776,55.741899,178.715101,29.476841,31.151895,89.273838,105.086257,182.151426,4.409707,0.448881,Turbine_120,,1,47.205669
4,2021-04-04 02:21:00,1174.930566,1202.654077,32.148434,1199.754858,67.272313,1115.850317,31.920549,30.864277,189.731989,236.053320,261.672180,7.822326,0.561465,Turbine_158,,1,51.052348
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
303197,2021-06-15 20:14:00,1754.986613,1797.301392,39.138250,1201.708130,85.610317,1631.455973,38.890467,38.542006,341.591410,404.388580,322.968750,10.402939,1.133143,Turbine_10,,1,49.211527
303198,2021-04-17 20:23:00,961.416738,970.462962,33.718848,1194.447693,72.564436,979.040192,33.418560,33.190632,158.035688,186.806035,309.976791,7.312443,0.829828,Turbine_97,,1,48.308387
303199,2021-10-16 09:14:00,5.583539,18.309381,29.148418,771.026876,62.844919,114.049692,29.836640,31.198618,56.898274,62.813501,330.593165,3.404537,0.622060,Turbine_120,,1,47.199278
303200,2021-12-11 18:55:00,34.711766,26.472111,31.942072,769.952612,63.219437,28.972680,30.332543,31.662434,15.277295,21.483844,133.001864,3.597401,0.252044,Turbine_01,,1,44.427677


In [41]:
# check 
(simple_interp_df.timestamp == test_data.timestamp).sum()

303202

In [42]:
simple_interp_preds = simple_interp_df.simple_interp.values
simple_interp_preds.shape

(303202,)

In [43]:
submission = pd.read_csv(r'../data/raw/submission.csv')
display(submission.head())
submission.Target = simple_interp_preds
submission.head()

Unnamed: 0,Target
0,0.0
1,0.0
2,0.0
3,0.0
4,0.0


Unnamed: 0,Target
0,49.696873
1,43.34553
2,47.575203
3,47.205669
4,51.052348


In [44]:
submission.to_csv('../submissions/simple_interp.csv', index=False)

# Interpolate by Turbine

In [45]:
df.head() # sorted by timestamp

Unnamed: 0,index,timestamp,active_power_calculated_by_converter,active_power_raw,ambient_temperature,generator_speed,generator_winding_temp_max,grid_power10min_average,nc1_inside_temp,nacelle_temp,reactice_power_calculated_by_converter,reactive_power,wind_direction_raw,wind_speed_raw,wind_speed_turbulence,turbine_id,Target,is_test,simple_interp
45421,45421,2021-01-01 00:01:00,828.392253,847.52478,23.749945,1164.353088,56.914901,833.398712,23.851615,23.613581,138.06679,166.923848,228.05469,8.184143,0.535525,Turbine_108,43.43087,0,43.43087
658721,658721,2021-01-01 00:02:00,734.902262,751.122874,19.137422,1119.673584,55.196616,809.185425,27.618101,25.004128,124.705297,153.301956,43.115978,6.780914,0.600904,Turbine_120,40.715544,0,40.715544
545675,545675,2021-01-01 00:03:00,748.204336,767.003316,19.113608,1128.340881,55.682657,804.593099,27.610003,25.033796,122.845019,157.638268,43.992285,6.6053,0.533855,Turbine_120,40.706413,0,40.706413
772206,772206,2021-01-01 00:04:00,1115.585986,1143.36377,18.536383,1200.487305,62.660133,1161.573462,25.932306,24.07667,192.730591,224.663141,72.256642,8.286794,0.523392,Turbine_139,40.769313,0,40.769313
1155106,245502,2021-01-01 00:06:00,975.188029,1005.913961,25.460711,1200.243095,57.926095,1018.267802,26.737586,23.928331,158.431951,196.259565,50.2058,7.899686,0.339865,Turbine_158,,1,41.650291


In [46]:
df['turbine_interp'] = df.groupby('turbine_id').Target.transform(lambda x: x.interpolate(method='linear'))
df.head()

Unnamed: 0,index,timestamp,active_power_calculated_by_converter,active_power_raw,ambient_temperature,generator_speed,generator_winding_temp_max,grid_power10min_average,nc1_inside_temp,nacelle_temp,reactice_power_calculated_by_converter,reactive_power,wind_direction_raw,wind_speed_raw,wind_speed_turbulence,turbine_id,Target,is_test,simple_interp,turbine_interp
45421,45421,2021-01-01 00:01:00,828.392253,847.52478,23.749945,1164.353088,56.914901,833.398712,23.851615,23.613581,138.06679,166.923848,228.05469,8.184143,0.535525,Turbine_108,43.43087,0,43.43087,43.43087
658721,658721,2021-01-01 00:02:00,734.902262,751.122874,19.137422,1119.673584,55.196616,809.185425,27.618101,25.004128,124.705297,153.301956,43.115978,6.780914,0.600904,Turbine_120,40.715544,0,40.715544,40.715544
545675,545675,2021-01-01 00:03:00,748.204336,767.003316,19.113608,1128.340881,55.682657,804.593099,27.610003,25.033796,122.845019,157.638268,43.992285,6.6053,0.533855,Turbine_120,40.706413,0,40.706413,40.706413
772206,772206,2021-01-01 00:04:00,1115.585986,1143.36377,18.536383,1200.487305,62.660133,1161.573462,25.932306,24.07667,192.730591,224.663141,72.256642,8.286794,0.523392,Turbine_139,40.769313,0,40.769313,40.769313
1155106,245502,2021-01-01 00:06:00,975.188029,1005.913961,25.460711,1200.243095,57.926095,1018.267802,26.737586,23.928331,158.431951,196.259565,50.2058,7.899686,0.339865,Turbine_158,,1,41.650291,


In [69]:
df.turbine_interp.isna().sum()

6

In [136]:
df[df.turbine_interp.isna()]

Unnamed: 0_level_0,index,active_power_calculated_by_converter,active_power_raw,ambient_temperature,generator_speed,generator_winding_temp_max,grid_power10min_average,nc1_inside_temp,nacelle_temp,reactice_power_calculated_by_converter,reactive_power,wind_direction_raw,wind_speed_raw,wind_speed_turbulence,turbine_id,Target,is_test,simple_interp,turbine_interp,turbine_time_interp
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1


In [134]:
df.turbine_time_interp.isna().sum()

0

In [99]:
def fill_nan(time_filt, turb_filt, fill_val):
    print(len(df.loc[time_filt & turb_filt, 'turbine_interp']))
    df.loc[time_filt & turb_filt, 'turbine_interp'] = fill_val

In [115]:
time_filt = df.timestamp == '2021-01-02 02:25:00'
turb_filt = df.turbine_id == 'Turbine_20'
fill_val  = 47.852784

fill_nan(time_filt, turb_filt, fill_val)

1


In [117]:
df.turbine_interp.isna().sum()

0

In [118]:
df.groupby('turbine_id').turbine_interp.mean()

turbine_id
Turbine_01     49.280993
Turbine_10     47.182159
Turbine_103    45.612789
Turbine_105    46.583535
Turbine_108    46.548586
Turbine_120    45.514540
Turbine_123    44.641471
Turbine_13     46.859653
Turbine_139    45.642383
Turbine_14     47.729593
Turbine_15     46.393593
Turbine_158    45.562730
Turbine_18     45.186908
Turbine_19     44.469251
Turbine_20     48.427286
Turbine_97     45.833282
Name: turbine_interp, dtype: float64

In [119]:
df.groupby('turbine_id').Target.mean()

turbine_id
Turbine_01     49.288458
Turbine_10     47.186210
Turbine_103    45.604759
Turbine_105    46.585878
Turbine_108    46.549006
Turbine_120    45.517760
Turbine_123    44.642011
Turbine_13     46.855068
Turbine_139    45.643603
Turbine_14     47.722849
Turbine_15     46.388378
Turbine_158    45.563365
Turbine_18     45.178387
Turbine_19     44.463866
Turbine_20     48.420630
Turbine_97     45.834314
Name: Target, dtype: float64

In [114]:
turbine_grps = df.groupby('turbine_id')
turbine_grps.get_group('Turbine_20').head()

Unnamed: 0,index,timestamp,active_power_calculated_by_converter,active_power_raw,ambient_temperature,generator_speed,generator_winding_temp_max,grid_power10min_average,nc1_inside_temp,nacelle_temp,reactice_power_calculated_by_converter,reactive_power,wind_direction_raw,wind_speed_raw,wind_speed_turbulence,turbine_id,Target,is_test,simple_interp,turbine_interp
987897,78293,2021-01-02 02:25:00,202.260071,198.010391,27.106218,770.538574,53.749584,237.285889,26.956478,23.938574,92.307285,100.151318,99.667236,4.888677,0.322256,Turbine_20,,1,44.409175,
238012,238012,2021-01-02 02:27:00,190.664602,187.240105,27.06981,770.172348,53.649899,230.102872,26.945115,23.936914,77.515058,100.441306,96.662727,4.993029,0.327042,Turbine_20,47.852784,0,47.852784,47.852784
584533,584533,2021-01-02 02:31:00,204.342817,199.319427,27.059847,769.806122,53.830396,216.297534,26.928567,23.933576,86.258389,100.384851,97.811091,4.966969,0.291123,Turbine_20,47.857822,0,47.857822,47.857822
725340,725340,2021-01-02 02:35:00,207.485819,203.88827,27.054995,770.172348,52.962278,210.900416,26.940365,23.925137,80.264147,100.696611,102.918546,4.699339,0.33228,Turbine_20,47.857327,0,47.857327,47.857327
1035661,126057,2021-01-02 02:36:00,193.378916,183.169317,27.050652,770.172348,53.148089,209.176145,26.920683,23.919107,89.113515,99.804035,98.526863,4.893309,0.361944,Turbine_20,,1,46.328551,47.859572


In [138]:
df.reset_index(inplace=True)

In [152]:
df[df.is_test == 1].sort_values(by ='index').sort_index().index

Int64Index([      4,       7,      16,      17,      18,      20,      29,
                 33,      39,      43,
            ...
            1212754, 1212756, 1212769, 1212772, 1212778, 1212782, 1212785,
            1212790, 1212793, 1212797],
           dtype='int64', length=303202)

In [140]:
turbine_interp_df = df[df.is_test == 1].sort_values(by='index')
turbine_interp = turbine_interp_df.turbine_interp.values
turbine_interp

array([49.97506141662595, 41.068369971381294, 48.0619128545125, ...,
       47.02201223373415, 47.21695709228521, 44.23345413208008],
      dtype=object)

In [150]:
test_data.index

RangeIndex(start=0, stop=303202, step=1)

In [141]:
turbine_interp_df.timestamp == test_data.timestamp

ValueError: Can only compare identically-labeled Series objects

In [131]:
turbine_interp_df.timestamp.equals(test_data.timestamp)

False

In [121]:
submission.Target = turbine_interp
submission.to_csv('../submissions/turbine_interp.csv', index=False)

In [157]:
interpolated_test_df = df[df.is_test == 1]
interpolated_test_df.sort_values(by="index", inplace=True)
interpolated_test_df

Unnamed: 0,timestamp,index,active_power_calculated_by_converter,active_power_raw,ambient_temperature,generator_speed,generator_winding_temp_max,grid_power10min_average,nc1_inside_temp,nacelle_temp,...,reactive_power,wind_direction_raw,wind_speed_raw,wind_speed_turbulence,turbine_id,Target,is_test,simple_interp,turbine_interp,turbine_time_interp
400232,2021-05-06 15:02:00,0,15.507537,19.956882,40.233264,410.538834,57.357366,84.008399,48.826597,45.363477,...,1.468876,156.399396,2.438629,0.790435,Turbine_14,,1,49.696873,49.975061,49.987435
1120790,2021-12-04 06:11:00,1,472.577255,478.373881,12.306687,965.077563,51.796727,492.531362,27.457511,24.045565,...,119.378235,56.443802,5.900371,0.544198,Turbine_19,,1,43.345530,41.06837,41.052191
813024,2021-08-31 19:03:00,2,447.829615,451.158264,29.106771,948.035899,60.493098,393.151576,39.410553,35.715697,...,117.192635,224.540263,5.924923,0.513481,Turbine_14,,1,47.575203,48.061913,48.082066
873943,2021-09-19 19:01:00,3,254.014336,249.438965,27.074900,804.963776,55.741899,178.715101,29.476841,31.151895,...,105.086257,182.151426,4.409707,0.448881,Turbine_120,,1,47.205669,46.36187,46.362888
291474,2021-04-04 02:21:00,4,1174.930566,1202.654077,32.148434,1199.754858,67.272313,1115.850317,31.920549,30.864277,...,236.053320,261.672180,7.822326,0.561465,Turbine_158,,1,51.052348,45.220352,45.270599
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
534516,2021-06-15 20:14:00,303197,1754.986613,1797.301392,39.138250,1201.708130,85.610317,1631.455973,38.890467,38.542006,...,404.388580,322.968750,10.402939,1.133143,Turbine_10,,1,49.211527,51.116238,51.118635
337595,2021-04-17 20:23:00,303198,961.416738,970.462962,33.718848,1194.447693,72.564436,979.040192,33.418560,33.190632,...,186.806035,309.976791,7.312443,0.829828,Turbine_97,,1,48.308387,47.857227,47.878211
954881,2021-10-16 09:14:00,303199,5.583539,18.309381,29.148418,771.026876,62.844919,114.049692,29.836640,31.198618,...,62.813501,330.593165,3.404537,0.622060,Turbine_120,,1,47.199278,47.022012,47.038485
1148857,2021-12-11 18:55:00,303200,34.711766,26.472111,31.942072,769.952612,63.219437,28.972680,30.332543,31.662434,...,21.483844,133.001864,3.597401,0.252044,Turbine_01,,1,44.427677,47.216957,47.227842


In [158]:
interpolated_test_df.isna().sum()

timestamp                                      0
index                                          0
active_power_calculated_by_converter           0
active_power_raw                               0
ambient_temperature                            0
generator_speed                                0
generator_winding_temp_max                     0
grid_power10min_average                        0
nc1_inside_temp                                0
nacelle_temp                                   0
reactice_power_calculated_by_converter         0
reactive_power                                 0
wind_direction_raw                             0
wind_speed_raw                                 0
wind_speed_turbulence                          0
turbine_id                                     0
Target                                    303202
is_test                                        0
simple_interp                                  0
turbine_interp                                 0
turbine_time_interp 

In [161]:
interpolated_test_df.reset_index(inplace=True, drop=True)
interpolated_test_df.index

RangeIndex(start=0, stop=303202, step=1)

In [163]:
(test_data.timestamp == interpolated_test_df.timestamp).sum()

303202

In [164]:
interpolated_test_df.to_csv('../data/test_interpolations.csv', index=False)

In [169]:
train_data_interpolations = df[df.is_test == 0]
train_data_interpolations.sort_values(by='index', inplace=True)
train_data_interpolations.reset_index(drop=True, inplace=True)
train_data_interpolations

Unnamed: 0,timestamp,index,active_power_calculated_by_converter,active_power_raw,ambient_temperature,generator_speed,generator_winding_temp_max,grid_power10min_average,nc1_inside_temp,nacelle_temp,...,reactive_power,wind_direction_raw,wind_speed_raw,wind_speed_turbulence,turbine_id,Target,is_test,simple_interp,turbine_interp,turbine_time_interp
0,2021-02-19 20:18:00,0,816.636759,834.917206,31.694380,1159.616602,65.954214,917.897085,31.881972,31.504713,...,165.501518,280.864782,7.057000,0.544082,Turbine_108,47.582787,0,47.582787,47.582787,47.582787
1,2021-04-27 04:55:00,1,419.107829,421.050873,12.894948,928.747996,59.571319,445.554250,32.423705,32.755770,...,113.835236,299.552460,5.474937,0.469031,Turbine_18,46.070328,0,46.070328,46.070328,46.070328
2,2021-01-25 06:26:00,2,1303.530558,1337.566142,16.648388,1201.219775,61.270498,1364.716003,11.446849,18.332985,...,281.452253,84.960106,8.092457,0.622318,Turbine_105,39.989236,0,39.989236,39.989236,39.989236
3,2021-10-30 03:47:00,3,61.494872,53.481008,28.388141,769.806122,40.674348,14.324897,34.253204,32.662889,...,75.017531,87.261119,4.071032,0.760719,Turbine_15,46.056587,0,46.056587,46.056587,46.056587
4,2021-03-15 00:39:00,4,593.514364,611.659108,31.519527,1046.916768,64.341763,599.020172,32.405586,31.466387,...,160.202421,313.724818,6.357943,0.346068,Turbine_01,54.346095,0,54.346095,54.346095,54.346095
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
909599,2021-04-25 19:12:00,909599,929.101908,948.441905,32.736076,1187.303192,83.187140,1343.320577,32.291391,35.152280,...,191.088800,189.901812,7.155343,1.028960,Turbine_13,50.768675,0,50.768675,50.768675,50.768675
909600,2021-02-20 17:37:00,909600,100.730526,88.694599,30.540500,770.245593,56.235497,177.620552,30.434302,29.932141,...,96.621675,93.417590,4.121607,0.595874,Turbine_158,44.234821,0,44.234821,44.234821,44.234821
909601,2021-10-22 14:18:00,909601,1120.915965,1165.016907,30.901129,1170.456860,62.942943,873.414622,30.491818,33.085164,...,74.111173,230.988398,8.551669,0.951241,Turbine_15,46.942486,0,46.942486,46.942486,46.942486
909602,2021-02-08 22:03:00,909602,123.444564,116.066919,32.697933,770.190796,57.245207,95.865704,32.338824,31.525384,...,97.470984,65.408801,4.324064,0.247335,Turbine_97,46.392221,0,46.392221,46.392221,46.392221


In [171]:
train_data_interpolations.to_csv('../data/train_interp.csv', index=False)

In [170]:
df.isna().sum()

timestamp                                      0
index                                          0
active_power_calculated_by_converter           0
active_power_raw                               0
ambient_temperature                            0
generator_speed                                0
generator_winding_temp_max                     0
grid_power10min_average                        0
nc1_inside_temp                                0
nacelle_temp                                   0
reactice_power_calculated_by_converter         0
reactive_power                                 0
wind_direction_raw                             0
wind_speed_raw                                 0
wind_speed_turbulence                          0
turbine_id                                     0
Target                                    303202
is_test                                        0
simple_interp                                  0
turbine_interp                                 0
turbine_time_interp 

0

## timebased interpolation

In [122]:
df.set_index('timestamp', inplace=True)
df.head()

Unnamed: 0_level_0,index,active_power_calculated_by_converter,active_power_raw,ambient_temperature,generator_speed,generator_winding_temp_max,grid_power10min_average,nc1_inside_temp,nacelle_temp,reactice_power_calculated_by_converter,reactive_power,wind_direction_raw,wind_speed_raw,wind_speed_turbulence,turbine_id,Target,is_test,simple_interp,turbine_interp
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2021-01-01 00:01:00,45421,828.392253,847.52478,23.749945,1164.353088,56.914901,833.398712,23.851615,23.613581,138.06679,166.923848,228.05469,8.184143,0.535525,Turbine_108,43.43087,0,43.43087,43.43087
2021-01-01 00:02:00,658721,734.902262,751.122874,19.137422,1119.673584,55.196616,809.185425,27.618101,25.004128,124.705297,153.301956,43.115978,6.780914,0.600904,Turbine_120,40.715544,0,40.715544,40.715544
2021-01-01 00:03:00,545675,748.204336,767.003316,19.113608,1128.340881,55.682657,804.593099,27.610003,25.033796,122.845019,157.638268,43.992285,6.6053,0.533855,Turbine_120,40.706413,0,40.706413,40.706413
2021-01-01 00:04:00,772206,1115.585986,1143.36377,18.536383,1200.487305,62.660133,1161.573462,25.932306,24.07667,192.730591,224.663141,72.256642,8.286794,0.523392,Turbine_139,40.769313,0,40.769313,40.769313
2021-01-01 00:06:00,245502,975.188029,1005.913961,25.460711,1200.243095,57.926095,1018.267802,26.737586,23.928331,158.431951,196.259565,50.2058,7.899686,0.339865,Turbine_158,,1,41.650291,41.221699


In [123]:
df.index = pd.to_datetime(df.index)
df.sort_index(inplace=True)
df.index

DatetimeIndex(['2021-01-01 00:01:00', '2021-01-01 00:02:00',
               '2021-01-01 00:03:00', '2021-01-01 00:04:00',
               '2021-01-01 00:06:00', '2021-01-01 00:07:00',
               '2021-01-01 00:08:00', '2021-01-01 00:08:00',
               '2021-01-01 00:09:00', '2021-01-01 00:09:00',
               ...
               '2021-12-31 23:56:00', '2021-12-31 23:56:00',
               '2021-12-31 23:57:00', '2021-12-31 23:57:00',
               '2021-12-31 23:57:00', '2021-12-31 23:58:00',
               '2021-12-31 23:58:00', '2021-12-31 23:58:00',
               '2021-12-31 23:58:00', '2021-12-31 23:59:00'],
              dtype='datetime64[ns]', name='timestamp', length=1212806, freq=None)

In [124]:
df['turbine_time_interp'] = df.groupby('turbine_id').Target.transform(lambda x: x.interpolate(method='time', limit_direction='both', limit=5))
df.head()

Unnamed: 0_level_0,index,active_power_calculated_by_converter,active_power_raw,ambient_temperature,generator_speed,generator_winding_temp_max,grid_power10min_average,nc1_inside_temp,nacelle_temp,reactice_power_calculated_by_converter,reactive_power,wind_direction_raw,wind_speed_raw,wind_speed_turbulence,turbine_id,Target,is_test,simple_interp,turbine_interp,turbine_time_interp
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2021-01-01 00:01:00,45421,828.392253,847.52478,23.749945,1164.353088,56.914901,833.398712,23.851615,23.613581,138.06679,166.923848,228.05469,8.184143,0.535525,Turbine_108,43.43087,0,43.43087,43.43087,43.43087
2021-01-01 00:02:00,658721,734.902262,751.122874,19.137422,1119.673584,55.196616,809.185425,27.618101,25.004128,124.705297,153.301956,43.115978,6.780914,0.600904,Turbine_120,40.715544,0,40.715544,40.715544,40.715544
2021-01-01 00:03:00,545675,748.204336,767.003316,19.113608,1128.340881,55.682657,804.593099,27.610003,25.033796,122.845019,157.638268,43.992285,6.6053,0.533855,Turbine_120,40.706413,0,40.706413,40.706413,40.706413
2021-01-01 00:04:00,772206,1115.585986,1143.36377,18.536383,1200.487305,62.660133,1161.573462,25.932306,24.07667,192.730591,224.663141,72.256642,8.286794,0.523392,Turbine_139,40.769313,0,40.769313,40.769313,40.769313
2021-01-01 00:06:00,245502,975.188029,1005.913961,25.460711,1200.243095,57.926095,1018.267802,26.737586,23.928331,158.431951,196.259565,50.2058,7.899686,0.339865,Turbine_158,,1,41.650291,41.221699,41.221699


In [127]:
turbine_time_interp = df[df.is_test == 1].turbine_time_interp
turbine_time_interp.shape

(303202,)

In [125]:
df.turbine_time_interp.isna().sum()

0

In [175]:
interpolated_test_df.timestamp.equals(test_data.timestamp)

True

In [176]:
submission.Target = interpolated_test_df.turbine_time_interp
submission.to_csv('../submissions/turbine_time_interp.csv', index=False)

6

In [46]:
df.sample(10)

Unnamed: 0_level_0,is_test,Target,turbine_id,i1,turbine_interp,turbine_time_interp
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-04-15 13:57:00,,46.709452,Turbine_105,46.709452,46.709452,46.709452
2021-11-21 15:10:00,,46.151436,Turbine_20,46.151436,46.151436,46.151436
2021-06-23 05:58:00,,47.200356,Turbine_01,47.200356,47.200356,47.200356
2021-12-16 11:14:00,,43.899961,Turbine_108,43.899961,43.899961,43.899961
2021-10-31 21:36:00,1.0,,Turbine_20,48.90218,43.485014,47.069935
2021-06-24 08:43:00,,46.659775,Turbine_20,46.659775,46.659775,46.659775
2021-04-08 23:18:00,,46.844733,Turbine_18,46.844733,46.844733,46.844733
2021-04-01 09:19:00,,52.640194,Turbine_01,52.640194,52.640194,52.640194
2021-06-22 16:09:00,,48.318395,Turbine_20,48.318395,48.318395,48.318395
2021-09-25 17:24:00,,49.210909,Turbine_01,49.210909,49.210909,49.210909


# 