In [1]:
#general libraries
import pandas as pd

In [78]:
#Load train clean dataset
train=pd.read_csv('train_clean.csv')

In [79]:
train.drop(columns='Unnamed: 0',inplace=True)
train['WellFailure'].unique()

array(['NoEvent', 'yes', 'Manual off'], dtype=object)

In [80]:
train['WellFailure'].value_counts()

NoEvent       8044056
yes                83
Manual off         20
Name: WellFailure, dtype: int64

In [81]:
events=train.loc[train['WellFailure']!='NoEvent']
events=events[['WELL_ID','DATE','WellFailure']]

In [83]:
telemetry=train.drop(columns=['CODE','lat','lon','WellFailure','API'])
telemetry['DATE']=pd.to_datetime(telemetry['DATE'],format="%Y-%m-%d %H:%M:%S")
fields=['CURRENT', 'PRESS_DESC', 'FREQUENCY', 'PRESS_INT',
       'TEMP_INT', 'TEMP_MOT', 'OUT_VOLT', 'VIBRATION', 'BFPD',
       'BOPD', 'BWPD', 'MSCF', 'BSW', 'GOR(MSFC/BPPD)', 'GLR(SCF/BFPD)',
       'PROF_INTAKE', 'PSI_CAB']

# Calculate mean values for telemetry features
temp = []
for col in fields:
    temp.append(pd.pivot_table(telemetry,
                               index='DATE',
                               columns='WELL_ID',
                               values=col).resample('3H', closed='left', label='right').mean().unstack())
telemetry_mean_3h = pd.concat(temp, axis=1)
telemetry_mean_3h.columns = [i + 'mean_3h' for i in fields]
telemetry_mean_3h.reset_index(inplace=True)

# repeat for standard deviation
temp = []
for col in fields:
    temp.append(pd.pivot_table(telemetry,
                               index='DATE',
                               columns='WELL_ID',
                               values=col).resample('3H', closed='left', label='right').std().unstack())
telemetry_sd_3h = pd.concat(temp, axis=1)
telemetry_sd_3h.columns = [i + 'sd_3h' for i in fields]
telemetry_sd_3h.reset_index(inplace=True)

telemetry_mean_3h.head()

Unnamed: 0,WELL_ID,DATE,CURRENTmean_3h,PRESS_DESCmean_3h,FREQUENCYmean_3h,PRESS_INTmean_3h,TEMP_INTmean_3h,TEMP_MOTmean_3h,OUT_VOLTmean_3h,VIBRATIONmean_3h,BFPDmean_3h,BOPDmean_3h,BWPDmean_3h,MSCFmean_3h,BSWmean_3h,GOR(MSFC/BPPD)mean_3h,GLR(SCF/BFPD)mean_3h,PROF_INTAKEmean_3h,PSI_CABmean_3h
0,DEEPWATER-000XAR,2019-02-25 03:00:00,72.42,4252.87,57.44,1443.97,1443.97,281.56,0.0,0.17,1826.0,91.3,1734.7,11.28,95.0,123.55,6.18,8915.0,330.0
1,DEEPWATER-000XAR,2019-02-25 06:00:00,,,,,,,,,,,,,,,,,
2,DEEPWATER-000XAR,2019-02-25 09:00:00,,,,,,,,,,,,,,,,,
3,DEEPWATER-000XAR,2019-02-25 12:00:00,,,,,,,,,,,,,,,,,
4,DEEPWATER-000XAR,2019-02-25 15:00:00,,,,,,,,,,,,,,,,,


In [86]:
telemetry_mean_3h.isna().sum()

WELL_ID                       0
DATE                          0
CURRENTmean_3h           387313
PRESS_DESCmean_3h        387313
FREQUENCYmean_3h         387313
PRESS_INTmean_3h         387313
TEMP_INTmean_3h          387313
TEMP_MOTmean_3h          387313
OUT_VOLTmean_3h          387313
VIBRATIONmean_3h         387313
BFPDmean_3h              387313
BOPDmean_3h              387313
BWPDmean_3h              387313
MSCFmean_3h              387313
BSWmean_3h               387313
GOR(MSFC/BPPD)mean_3h    387313
GLR(SCF/BFPD)mean_3h     387313
PROF_INTAKEmean_3h       387313
PSI_CABmean_3h           387313
dtype: int64

In [88]:
telemetry_mean_3h.dropna(inplace=True)

In [89]:
telemetry_mean_3h

Unnamed: 0,WELL_ID,DATE,CURRENTmean_3h,PRESS_DESCmean_3h,FREQUENCYmean_3h,PRESS_INTmean_3h,TEMP_INTmean_3h,TEMP_MOTmean_3h,OUT_VOLTmean_3h,VIBRATIONmean_3h,BFPDmean_3h,BOPDmean_3h,BWPDmean_3h,MSCFmean_3h,BSWmean_3h,GOR(MSFC/BPPD)mean_3h,GLR(SCF/BFPD)mean_3h,PROF_INTAKEmean_3h,PSI_CABmean_3h
0,DEEPWATER-000XAR,2019-02-25 03:00:00,72.420000,4252.870000,57.44,1443.970000,1443.970000,281.560000,0.000000,0.17,1826.0,91.30,1734.70,11.28,95.0,123.55,6.18,8915.0,330.0
8,DEEPWATER-000XAR,2019-02-26 03:00:00,72.770000,4203.620000,57.44,1414.840000,1414.840000,281.090000,0.000000,0.15,1826.0,91.30,1734.70,11.28,95.0,123.55,6.18,8915.0,330.0
16,DEEPWATER-000XAR,2019-02-27 03:00:00,72.290000,4271.550000,57.45,1449.170000,1449.170000,281.550000,0.000000,0.15,1826.0,91.30,1734.70,11.28,95.0,123.55,6.18,8915.0,330.0
48,DEEPWATER-000XAR,2019-03-03 03:00:00,72.250000,4235.130000,57.44,1432.860000,1432.860000,280.890000,0.000000,0.17,1802.0,90.10,1711.90,11.16,95.0,123.86,6.19,8915.0,330.0
56,DEEPWATER-000XAR,2019-03-04 03:00:00,71.970000,4216.120000,57.44,1425.790000,1425.790000,280.570000,0.000000,0.16,1802.0,90.10,1711.90,11.16,95.0,123.86,6.19,8915.0,330.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
630139,DEEPWATER-ZVH2KY,2021-05-25 00:00:00,35.958824,4140.511765,55.00,351.235294,351.235294,290.529412,339.984574,0.00,364.0,131.04,232.96,19.26,64.0,146.98,52.91,9462.0,140.0
630140,DEEPWATER-ZVH2KY,2021-05-25 03:00:00,35.983240,4137.424581,55.00,350.547486,350.547486,290.776536,339.984574,0.00,340.0,122.40,217.60,17.99,64.0,146.98,52.91,9462.0,140.0
630141,DEEPWATER-ZVH2KY,2021-05-25 06:00:00,35.949153,4141.412429,55.00,350.621469,350.621469,290.887006,339.984574,0.00,340.0,122.40,217.60,17.99,64.0,146.98,52.91,9462.0,140.0
630142,DEEPWATER-ZVH2KY,2021-05-25 09:00:00,35.525140,4143.843575,55.00,352.055866,352.055866,290.927374,339.984574,0.00,340.0,122.40,217.60,17.99,64.0,146.98,52.91,9462.0,140.0


In [91]:
events

Unnamed: 0,WELL_ID,DATE,WellFailure
34361,DEEPWATER-TPS1RK,2020-01-22 00:00:00,yes
56229,DEEPWATER-SF2VLY,2019-04-20 00:00:00,yes
204790,DEEPWATER-BP2VWC,2019-08-16 00:00:00,yes
204818,DEEPWATER-JCS1GP,2020-06-10 00:19:00,yes
368067,DEEPWATER-U3TTRI,2019-09-12 00:00:00,yes
...,...,...,...
7907968,DEEPWATER-MU3BOW,2019-10-15 00:00:00,Manual off
7910909,DEEPWATER-WJXDET,2019-06-23 00:00:00,yes
7911213,DEEPWATER-4TYG1P,2019-10-20 00:00:00,yes
8028466,DEEPWATER-BOYHWX,2021-02-17 00:19:53,yes


In [74]:
#AQUI COMIENZA OTRO ANALISIS RESULTADO -2.2
#imbalanced dataset

x=train.drop(columns='WellFailure')
y=train['WellFailure']

from imblearn.over_sampling import RandomOverSampler
from imblearn.under_sampling import RandomUnderSampler

sampling_strategy_under={'NoEvent':1000000,'yes':83,'Manual off':20}
sampling_strategy_over={'NoEvent':1000000,'yes':1000000,'Manual off':1000000}
over=RandomOverSampler(sampling_strategy=sampling_strategy_over)
under=RandomUnderSampler(sampling_strategy=sampling_strategy_under)

x,y=under.fit_resample(x,y)
x,y=over.fit_resample(x,y)




In [26]:
x_resample=pd.DataFrame(x)
y_resample=pd.DataFrame({"WellFailure":y})
train_resample=pd.concat([x_resample,y_resample],axis=1)
train_resample

Unnamed: 0,WELL_ID,DATE,CURRENT,PRESS_DESC,FREQUENCY,PRESS_INT,TEMP_INT,TEMP_MOT,OUT_VOLT,VIBRATION,...,BSW,GOR(MSFC/BPPD),GLR(SCF/BFPD),API,PROF_INTAKE,PSI_CAB,CODE,lat,lon,WellFailure
0,DEEPWATER-TU2XFX,2019-09-09 00:00:00,50.30,4226.6900,48.47,1235.68000,1235.68000,273.240000,380.290000,0.190000,...,92.00,123.47,9.88,17.8,9860.0,35.00,23,-30.601283,114.184325,Manual off
1,DEEPWATER-ISTQBO,2020-09-25 00:19:02,30.00,3863.0000,66.00,633.00000,633.00000,337.000000,339.984574,0.000000,...,5.65,54.50,0.00,18.2,9374.0,314.31,11,-30.653357,114.123028,Manual off
2,DEEPWATER-5DEUPB,2020-06-08 00:03:59,10.00,3340.0000,177.00,1209.00000,1209.00000,269.000000,339.984574,0.061469,...,1.00,84.01,83.17,17.7,9270.0,20.00,15,-30.573709,114.110298,Manual off
3,DEEPWATER-55KSI1,2020-02-02 00:00:00,25.00,4351.1997,64.00,468.19998,468.19998,273.899990,339.984574,0.800543,...,44.00,17.99,10.07,32.9,10589.0,225.00,71,-30.925794,114.073437,Manual off
4,DEEPWATER-MIVF4C,2019-06-08 00:00:00,44.67,0.0000,36.00,0.00000,0.00000,0.000000,319.720000,0.000000,...,99.00,18.15,0.18,30.0,10209.0,80.00,8,-30.979703,114.065784,Manual off
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2999995,DEEPWATER-CEO3A4,2019-08-02 00:00:00,80.51,0.0000,56.00,0.00000,0.00000,0.000000,448.760000,0.000000,...,66.00,84.03,28.57,16.7,9593.0,90.00,48,-30.789403,114.119879,yes
2999996,DEEPWATER-AHJLPP,2020-09-13 00:00:02,41.00,3617.0000,60.00,227.00000,227.00000,306.000000,339.984574,0.000000,...,14.00,167.88,144.38,26.7,9622.0,60.00,2,-30.518340,114.159216,yes
2999997,DEEPWATER-LBWAYA,2019-10-17 00:00:00,23.44,0.0000,54.00,868.60000,868.60000,242.800000,471.750000,0.200000,...,2.00,55.99,54.88,17.3,10440.0,220.00,69,-30.918752,114.079632,yes
2999998,DEEPWATER-05YWY5,2020-03-25 00:00:00,24.00,0.0000,82.50,94906.89800,94906.89800,30.200001,339.984574,0.061469,...,32.00,147.31,100.17,19.2,9589.0,60.00,64,-30.485102,114.108150,yes


In [32]:
train_resample['DATE']=pd.to_datetime(train_resample['DATE'])
train_resample=train_resample.sort_values(by=['DATE','WELL_ID'])
train_resample

Unnamed: 0,WELL_ID,DATE,CURRENT,PRESS_DESC,FREQUENCY,PRESS_INT,TEMP_INT,TEMP_MOT,OUT_VOLT,VIBRATION,...,BSW,GOR(MSFC/BPPD),GLR(SCF/BFPD),API,PROF_INTAKE,PSI_CAB,CODE,lat,lon,WellFailure
898302,DEEPWATER-453GJ0,2019-02-25 00:00:00,33.300000,4182.6500,63.00,952.18,952.18,271.86000,462.970000,0.17,...,78.0,123.44,27.16,17.3,9507.0,80.0,50,-30.456114,114.210049,NoEvent
503444,DEEPWATER-4TYG1P,2019-02-25 00:00:00,28.840000,4167.8900,49.00,2138.42,2138.42,254.45000,443.680000,0.18,...,80.0,9.01,1.80,32.2,9464.0,240.0,32,-30.690934,114.124038,NoEvent
4781,DEEPWATER-5MZAFB,2019-02-25 00:00:00,26.080000,3909.4600,51.06,181.10,181.10,289.54000,405.210000,0.31,...,1.0,84.03,83.19,18.7,9606.0,70.0,69,-30.640580,114.127680,NoEvent
215196,DEEPWATER-A5W4QG,2019-02-25 00:00:00,23.130000,3909.7400,54.00,308.29,308.29,262.47000,432.410000,0.24,...,30.1,229.02,160.09,26.5,9944.0,45.0,42,-30.595742,114.098613,NoEvent
850604,DEEPWATER-AHJLPP,2019-02-25 00:00:00,28.940000,3510.0100,59.95,263.68,263.68,281.15000,0.000000,0.32,...,18.0,167.95,137.72,26.7,9622.0,50.0,2,-30.518340,114.159216,NoEvent
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
631877,DEEPWATER-EKA0CA,2021-05-25 09:25:58,21.299999,4097.3999,56.50,240.30,240.30,258.10001,339.984574,0.30,...,80.0,164.09,32.82,18.5,9482.0,160.0,42,-30.468663,114.096579,NoEvent
115163,DEEPWATER-1AD32W,2021-05-25 09:26:21,32.000000,3962.0000,61.00,225.00,225.00,327.00000,339.984574,0.00,...,69.0,162.99,50.53,18.6,9840.0,140.0,2,-30.470930,114.088172,NoEvent
309233,DEEPWATER-BP2VWC,2021-05-25 09:26:51,23.000000,4203.0000,60.00,330.00,330.00,267.00000,339.984574,0.00,...,34.0,163.02,107.59,19.4,9565.0,280.0,42,-30.485944,114.090897,NoEvent
816559,DEEPWATER-UY0V1O,2021-05-25 09:26:55,37.000000,4179.0000,62.00,343.00,343.00,337.00000,339.984574,0.00,...,84.0,84.00,13.44,17.3,9614.0,50.0,8,-30.786970,114.125435,NoEvent


In [33]:
#create date features
train_resample['YearEvent']=train_resample['DATE'].dt.year
train_resample['MonthEvent']=train_resample['DATE'].dt.month
train_resample['DayEvent']=train_resample['DATE'].dt.day
train_resample['HourEvent']=train_resample['DATE'].dt.hour
train_resample['MinuteEvent']=train_resample['DATE'].dt.minute


In [36]:
train_resample.reset_index(drop=True,inplace=True)
train_resample

Unnamed: 0,WELL_ID,DATE,CURRENT,PRESS_DESC,FREQUENCY,PRESS_INT,TEMP_INT,TEMP_MOT,OUT_VOLT,VIBRATION,...,PSI_CAB,CODE,lat,lon,WellFailure,YearEvent,MonthEvent,DayEvent,HourEvent,MinuteEvent
0,DEEPWATER-453GJ0,2019-02-25 00:00:00,33.300000,4182.6500,63.00,952.18,952.18,271.86000,462.970000,0.17,...,80.0,50,-30.456114,114.210049,NoEvent,2019,2,25,0,0
1,DEEPWATER-4TYG1P,2019-02-25 00:00:00,28.840000,4167.8900,49.00,2138.42,2138.42,254.45000,443.680000,0.18,...,240.0,32,-30.690934,114.124038,NoEvent,2019,2,25,0,0
2,DEEPWATER-5MZAFB,2019-02-25 00:00:00,26.080000,3909.4600,51.06,181.10,181.10,289.54000,405.210000,0.31,...,70.0,69,-30.640580,114.127680,NoEvent,2019,2,25,0,0
3,DEEPWATER-A5W4QG,2019-02-25 00:00:00,23.130000,3909.7400,54.00,308.29,308.29,262.47000,432.410000,0.24,...,45.0,42,-30.595742,114.098613,NoEvent,2019,2,25,0,0
4,DEEPWATER-AHJLPP,2019-02-25 00:00:00,28.940000,3510.0100,59.95,263.68,263.68,281.15000,0.000000,0.32,...,50.0,2,-30.518340,114.159216,NoEvent,2019,2,25,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2999995,DEEPWATER-EKA0CA,2021-05-25 09:25:58,21.299999,4097.3999,56.50,240.30,240.30,258.10001,339.984574,0.30,...,160.0,42,-30.468663,114.096579,NoEvent,2021,5,25,9,25
2999996,DEEPWATER-1AD32W,2021-05-25 09:26:21,32.000000,3962.0000,61.00,225.00,225.00,327.00000,339.984574,0.00,...,140.0,2,-30.470930,114.088172,NoEvent,2021,5,25,9,26
2999997,DEEPWATER-BP2VWC,2021-05-25 09:26:51,23.000000,4203.0000,60.00,330.00,330.00,267.00000,339.984574,0.00,...,280.0,42,-30.485944,114.090897,NoEvent,2021,5,25,9,26
2999998,DEEPWATER-UY0V1O,2021-05-25 09:26:55,37.000000,4179.0000,62.00,343.00,343.00,337.00000,339.984574,0.00,...,50.0,8,-30.786970,114.125435,NoEvent,2021,5,25,9,26


In [37]:
from sklearn.ensemble import RandomForestClassifier

x=train_resample.drop(columns=['WELL_ID','WellFailure','DATE'])
y=train_resample['WellFailure']
rf=RandomForestClassifier(max_depth=5,n_estimators=100,random_state=42)

rf.fit(x,y)

RandomForestClassifier(max_depth=5, random_state=42)

In [39]:
#load test dataset
test=pd.read_csv('test_clean.csv')
test.drop(columns='Unnamed: 0',inplace=True)


Unnamed: 0,WELL_ID,DATE,CURRENT,PRESS_DESC,FREQUENCY,PRESS_INT,TEMP_INT,TEMP_MOT,OUT_VOLT,VIBRATION,...,MSCF,BSW,GOR(MSFC/BPPD),GLR(SCF/BFPD),API,PROF_INTAKE,PSI_CAB,CODE,lat,lon
0,DEEPWATER-0RCI25,2020-02-18 00:00:00,56.0,4032.8999,52.900002,307.0,225.700010,265.79999,338.945726,0.442897,...,60.51,78.0,127.0,27.86,19.7,9143.0,140.0,11,-30.510954,114.155737
1,DEEPWATER-0RCI25,2020-02-18 00:10:00,56.0,4032.7000,53.000000,307.0,225.700010,266.00000,338.945726,0.442897,...,60.51,78.0,127.0,27.86,19.7,9143.0,140.0,11,-30.510954,114.155737
2,DEEPWATER-0RCI25,2020-02-18 00:20:00,56.0,4033.2000,53.000000,307.0,225.700010,266.00000,338.945726,0.442897,...,60.51,78.0,127.0,27.86,19.7,9143.0,140.0,11,-30.510954,114.155737
3,DEEPWATER-0RCI25,2020-02-18 00:30:00,56.0,4032.7000,53.000000,307.0,225.700010,266.00000,338.945726,0.442897,...,60.51,78.0,127.0,27.86,19.7,9143.0,140.0,11,-30.510954,114.155737
4,DEEPWATER-0RCI25,2020-02-18 00:40:00,56.0,4033.4998,52.900002,307.0,225.700010,265.79999,338.945726,0.442897,...,60.51,78.0,127.0,27.86,19.7,9143.0,140.0,11,-30.510954,114.155737
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1050458,DEEPWATER-CY3WQW,2021-05-25 07:59:00,62.0,4180.0000,60.000000,754.0,226.899994,294.00000,338.945726,0.000000,...,48.72,60.0,84.0,33.60,19.8,9457.0,320.0,59,-30.668295,114.122952
1050459,DEEPWATER-CY3WQW,2021-05-25 08:19:00,62.0,4173.0000,60.000000,753.0,226.899994,294.00000,338.945726,0.000000,...,48.72,60.0,84.0,33.60,19.8,9457.0,320.0,59,-30.668295,114.122952
1050460,DEEPWATER-CY3WQW,2021-05-25 08:39:00,62.0,4180.0000,60.000000,752.0,226.899994,293.00000,338.945726,0.000000,...,48.72,60.0,84.0,33.60,19.8,9457.0,320.0,59,-30.668295,114.122952
1050461,DEEPWATER-CY3WQW,2021-05-25 08:59:00,62.0,4191.0000,60.000000,754.0,226.899994,293.00000,338.945726,0.000000,...,48.72,60.0,84.0,33.60,19.8,9457.0,320.0,59,-30.668295,114.122952


In [40]:
test['DATE']=pd.to_datetime(test['DATE'])
#create date features
test['YearEvent']=test['DATE'].dt.year
test['MonthEvent']=test['DATE'].dt.month
test['DayEvent']=test['DATE'].dt.day
test['HourEvent']=test['DATE'].dt.hour
test['MinuteEvent']=test['DATE'].dt.minute


In [44]:
x_test=test.drop(columns=['WELL_ID','WellFailure','DATE'])
x_test

Unnamed: 0,CURRENT,PRESS_DESC,FREQUENCY,PRESS_INT,TEMP_INT,TEMP_MOT,OUT_VOLT,VIBRATION,BFPD,BOPD,...,PROF_INTAKE,PSI_CAB,CODE,lat,lon,YearEvent,MonthEvent,DayEvent,HourEvent,MinuteEvent
0,56.0,4032.8999,52.900002,307.0,225.700010,265.79999,338.945726,0.442897,2172.0,477.84,...,9143.0,140.0,11,-30.510954,114.155737,2020,2,18,0,0
1,56.0,4032.7000,53.000000,307.0,225.700010,266.00000,338.945726,0.442897,2172.0,477.84,...,9143.0,140.0,11,-30.510954,114.155737,2020,2,18,0,10
2,56.0,4033.2000,53.000000,307.0,225.700010,266.00000,338.945726,0.442897,2172.0,477.84,...,9143.0,140.0,11,-30.510954,114.155737,2020,2,18,0,20
3,56.0,4032.7000,53.000000,307.0,225.700010,266.00000,338.945726,0.442897,2172.0,477.84,...,9143.0,140.0,11,-30.510954,114.155737,2020,2,18,0,30
4,56.0,4033.4998,52.900002,307.0,225.700010,265.79999,338.945726,0.442897,2172.0,477.84,...,9143.0,140.0,11,-30.510954,114.155737,2020,2,18,0,40
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1050458,62.0,4180.0000,60.000000,754.0,226.899994,294.00000,338.945726,0.000000,1450.0,580.00,...,9457.0,320.0,59,-30.668295,114.122952,2021,5,25,7,59
1050459,62.0,4173.0000,60.000000,753.0,226.899994,294.00000,338.945726,0.000000,1450.0,580.00,...,9457.0,320.0,59,-30.668295,114.122952,2021,5,25,8,19
1050460,62.0,4180.0000,60.000000,752.0,226.899994,293.00000,338.945726,0.000000,1450.0,580.00,...,9457.0,320.0,59,-30.668295,114.122952,2021,5,25,8,39
1050461,62.0,4191.0000,60.000000,754.0,226.899994,293.00000,338.945726,0.000000,1450.0,580.00,...,9457.0,320.0,59,-30.668295,114.122952,2021,5,25,8,59


In [53]:
target_prediction=rf.predict(x_test)

In [58]:
test_results=test[["WELL_ID","DATE"]]
test_results['WellFailure']=target_prediction.tolist()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_results['WellFailure']=target_prediction.tolist()


In [61]:
test_results['WellFailure'].value_counts()

NoEvent       1028971
yes             19754
Manual off       1738
Name: WellFailure, dtype: int64

In [63]:
results=test_results.loc[test_results['WellFailure']!='NoEvent']
results.reset_index(drop=True)

Unnamed: 0,WELL_ID,DATE,WellFailure
0,DEEPWATER-0RCI25,2020-02-18 00:00:00,yes
1,DEEPWATER-0RCI25,2020-02-18 00:10:00,yes
2,DEEPWATER-0RCI25,2020-02-19 00:00:00,yes
3,DEEPWATER-0RCI25,2020-02-19 00:10:00,yes
4,DEEPWATER-0RCI25,2020-02-20 00:00:00,yes
...,...,...,...
21487,DEEPWATER-CY3WQW,2021-05-21 00:19:00,yes
21488,DEEPWATER-CY3WQW,2021-05-22 00:19:00,yes
21489,DEEPWATER-CY3WQW,2021-05-23 00:19:00,yes
21490,DEEPWATER-CY3WQW,2021-05-24 00:19:00,yes


In [68]:
import numpy as np
test_wells=test['WELL_ID'].unique()
result = []
for i in test_wells:
    df = results[results['WELL_ID']==i][['WELL_ID','DATE','WellFailure']].reset_index().drop('index',axis=1)
    result.append(df.sample(1))

result = np.vstack(result)
result = pd.DataFrame(result,columns=['WELL_ID','DATE','WellFailure'])
result

Unnamed: 0,WELL_ID,DATE,WellFailure
0,DEEPWATER-0RCI25,2020-09-28 00:16:01,yes
1,DEEPWATER-F022ZE,2019-09-22 00:00:00,yes
2,DEEPWATER-RAPG5L,2020-10-04 00:04:04,yes
3,DEEPWATER-BJE55K,2020-12-15 00:04:16,yes
4,DEEPWATER-4ISLB0,2020-06-16 00:27:53,yes
5,DEEPWATER-S5VD4I,2021-01-03 00:18:46,yes
6,DEEPWATER-WJW1K1,2019-03-28 00:00:00,yes
7,DEEPWATER-YDSE0D,2019-06-19 00:00:00,Manual off
8,DEEPWATER-3OKFRE,2021-05-19 00:19:18,yes
9,DEEPWATER-GBQGBP,2021-04-28 00:00:06,yes


In [70]:
index_test=[0,9,6,10,2,3,8,5,13,12,1,7,14,4,11]
result=result.reindex(index_test)
result.reset_index(drop=True,inplace=True)

In [71]:
result

Unnamed: 0,WELL_ID,DATE,WellFailure
0,DEEPWATER-0RCI25,2020-09-28 00:16:01,yes
1,DEEPWATER-GBQGBP,2021-04-28 00:00:06,yes
2,DEEPWATER-WJW1K1,2019-03-28 00:00:00,yes
3,DEEPWATER-4OBKXS,2020-10-26 00:08:04,yes
4,DEEPWATER-RAPG5L,2020-10-04 00:04:04,yes
5,DEEPWATER-BJE55K,2020-12-15 00:04:16,yes
6,DEEPWATER-3OKFRE,2021-05-19 00:19:18,yes
7,DEEPWATER-S5VD4I,2021-01-03 00:18:46,yes
8,DEEPWATER-HFLRME,2019-10-06 00:00:00,yes
9,DEEPWATER-K0RPBL,2019-10-15 00:00:00,yes


In [73]:
# Write the prediction to a zip file
import zipfile
result.to_csv('predictions.csv',index=False)
zipfile.ZipFile('predictions.zip', mode='w').write("predictions.csv")