In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.model_selection import train_test_split
import joblib
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import PowerTransformer
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

ImportError: Traceback (most recent call last):
  File "C:\Users\Lenovo\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\LocalCache\local-packages\Python312\site-packages\tensorflow\python\pywrap_tensorflow.py", line 73, in <module>
    from tensorflow.python._pywrap_tensorflow_internal import *
ImportError: DLL load failed while importing _pywrap_tensorflow_internal: A dynamic link library (DLL) initialization routine failed.


Failed to load the native TensorFlow runtime.
See https://www.tensorflow.org/install/errors for some common causes and solutions.
If you need help, create an issue at https://github.com/tensorflow/tensorflow/issues and include the entire stack trace above this error message.

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
data_path = "../Datasets/fin_ds.csv" 
df = pd.read_csv(data_path)

In [None]:
df['tpep_pickup_datetime'] = pd.to_datetime(df['tpep_pickup_datetime'])
df['tpep_dropoff_datetime'] = pd.to_datetime(df['tpep_dropoff_datetime'])

In [None]:
df['trip_duration'] = (df['tpep_dropoff_datetime'] - df['tpep_pickup_datetime']).dt.total_seconds() / 60

In [None]:
df = df[df['trip_duration'] > 0]


In [189]:
df.columns

Index(['VendorID', 'tpep_pickup_datetime', 'tpep_dropoff_datetime',
       'passenger_count', 'trip_distance', 'pickup_longitude',
       'pickup_latitude', 'RateCodeID', 'store_and_fwd_flag',
       'dropoff_longitude', 'dropoff_latitude', 'payment_type', 'fare_amount',
       'extra', 'mta_tax', 'tip_amount', 'tolls_amount',
       'improvement_surcharge', 'total_amount', 'RatecodeID', 'date_ordinal',
       'SNOW_pred', 'TAVG_pred', 'PRCP_pred', 'trip_duration'],
      dtype='object')

In [190]:
df['ordinal_datetime'] = df['tpep_pickup_datetime'].apply(lambda x: x.toordinal())

In [191]:
features = ['trip_distance', 'pickup_longitude', 'pickup_latitude', 'dropoff_longitude', 'dropoff_latitude', 'ordinal_datetime', 'SNOW_pred', 'TAVG_pred', 'PRCP_pred']
target = 'trip_duration'

X = df[features]
y = df[target]

In [192]:
skew = X.skew()

In [193]:
cols = []
for i,v in enumerate(skew):
    if -1<v<1:
        pass
    else:
        cols.append(X.columns[i])  

In [194]:
cols

['trip_distance',
 'pickup_longitude',
 'pickup_latitude',
 'dropoff_longitude',
 'dropoff_latitude',
 'ordinal_datetime']

In [195]:
preprocessor = ColumnTransformer(
    transformers=[
        ('pt', PowerTransformer(method='yeo-johnson'), cols[:-1])
    ],
    remainder='passthrough'
)

In [196]:
pipe = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('scaler', StandardScaler())
])

In [197]:
X_processed = pipe.fit_transform(X)

In [198]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_processed.shape[1],)),
    # tf.keras.layers.Dense(256,activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1)  
])

In [199]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='mse')

In [200]:
history = model.fit(X_processed, y,
                    validation_split=0.2,
                    epochs=250,
                    batch_size=32,
                    verbose=1)

Epoch 1/250


[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 1081.8394 - val_loss: 134.1349
Epoch 2/250
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1884.9956 - val_loss: 50.0827
Epoch 3/250
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 586.6311 - val_loss: 69.3591
Epoch 4/250
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1701.0925 - val_loss: 44.8429
Epoch 5/250
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1050.5221 - val_loss: 73.0820
Epoch 6/250
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1502.4534 - val_loss: 62.3733
Epoch 7/250
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 1869.7634 - val_loss: 55.4725
Epoch 8/250
[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 889.8628 - val_loss: 70.8047
Epoch 9/250
[1m50/50[0m [3

In [None]:
model_filename = '../models/trip_duration_model.h5'
model.save(model_filename)
print(f"Trained model saved as {model_filename}")



Trained model saved as trip_duration_model.h5


In [None]:
joblib.dump(pipe, '../preprocessors/preprocessing_pipeline.pkl')

print(f"Saved pt as preprocessing_pipeline.pkl")

Saved pt as preprocessing_pipeline.pkl
