In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_breast_cancer
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn import metrics
from sklearn.linear_model import LinearRegression
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LeakyReLU, PReLU
from keras.layers import Dropout

In [52]:
def main():
    #importing data
    train_df = pd.read_csv('train.csv')
    test_df = pd.read_csv('test.csv')
    
    order_id = test_df['order_id']
    
    #preprocessing data
    train_df = fix_columns(train_df)
    test_df = fix_columns_2(test_df)
   
     
    y = train_df['cancelled']
    X = train_df.drop(['cancelled'], axis = 1)
    
    test_df = fix_na(test_df)
    X = fix_na(X)    
    
    X = scale_numeric(X)
    test_df = scale_numeric(test_df)
    
    classifier = Sequential()
    #first hidden layer
    classifier.add(Dense(units=9,kernel_initializer='he_uniform',activation='relu',input_dim=9))
    #second hidden layer
    classifier.add(Dense(units=9,kernel_initializer='he_uniform',activation='relu'))
    # last layer or output layer
    classifier.add(Dense(units=1,kernel_initializer='glorot_uniform',activation='sigmoid'))
    #taking summary of layers
    classifier.summary()
    #compiling the ANN
    classifier.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
    model = train(classifier,X,y)
    y_pred = model.predict(test_df)   
    print(y_pred)
    y_pred = (y_pred>0.5).astype(int)
    y_pred = y_pred.reshape((144844,))
    ans = pd.DataFrame({'order_id': order_id, 'cancelled': y_pred})
    ans.to_csv('ans.csv', index = False)
    

In [51]:
#Preprocess data
def fix_columns(df):
    df = df.drop(['order_id','first_mile_distance', 'reassigned_order', 'reassignment_method', 'reassignment_reason','rider_id',], axis = 1)
    df['order_time'] = pd.to_datetime(df['order_time'])
    df['allot_time'] = pd.to_datetime(df['allot_time'])
    df['accept_time'] = pd.to_datetime(df['accept_time'])
    t1 = df['allot_time'] - df['order_time']
    t2 = df['accept_time'] - df['allot_time']
    df['allot_duration'] = t1
    df['accept_duration'] = t2
    df = df.drop(['allot_time','accept_time','pickup_time','cancelled_time','delivered_time','order_time'], axis = 1)
    df['accept_duration'] = pd.to_timedelta(df['accept_duration']).dt.total_seconds()
    df['allot_duration'] = pd.to_timedelta(df['allot_duration']).dt.total_seconds()
    return df
    
def fix_columns_2(df):
    df = df.drop(['order_id','first_mile_distance', 'reassigned_order', 'reassignment_method', 'reassignment_reason','rider_id',], axis = 1)
    df['order_time'] = pd.to_datetime(df['order_time'])
    df['allot_time'] = pd.to_datetime(df['allot_time'])
    df['accept_time'] = pd.to_datetime(df['accept_time'])
    t1 = df['allot_time'] - df['order_time']
    t2 = df['accept_time'] - df['allot_time']
    df['allot_duration'] = t1
    df['accept_duration'] = t2
    df = df.drop(['allot_time','accept_time','order_time'], axis = 1)
    df['accept_duration'] = pd.to_timedelta(df['accept_duration']).dt.total_seconds()
    df['allot_duration'] = pd.to_timedelta(df['allot_duration']).dt.total_seconds()
    return df

def fix_na(df):
    lr = LinearRegression()
    imp = IterativeImputer(estimator = lr, tol = 1e-10, max_iter = 50, verbose = 2, imputation_order = 'roman')
    imp.fit_transform(df)
    df = imp.transform(df)
    df = pd.DataFrame(df)
    return df

def scale_numeric(df):
    x = df.values 
    scaler = preprocessing.StandardScaler()
    x_scaled = scaler.fit_transform(x)
    df = pd.DataFrame(x_scaled)
    return df

In [53]:
# Train model
def train(model,X_train, y_train):
    model.fit(X_train,y_train,batch_size=100,epochs=50)
    return model

In [54]:
# call the main function
if __name__ == '__main__':
    main()

ValueError: could not convert string to float: '2021-02-06 00:00:00'