In [2]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
tf.random.set_seed(100)

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [3]:
df = sns.load_dataset('mpg')
print(df.shape)
df.describe(include='all')

df['horsepower'].fillna(df['horsepower'].median(), inplace=True)
df = df.drop(['name'], axis=1)
df = pd.get_dummies(df, columns=['cylinders'], drop_first=True, prefix='Cylinder')
df = pd.get_dummies(df, columns=['model_year'], drop_first=True, prefix='Year')
df = pd.get_dummies(df, columns=['origin'], drop_first=True, prefix='Origin')
df.head()

(398, 9)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['horsepower'].fillna(df['horsepower'].median(), inplace=True)


Unnamed: 0,mpg,displacement,horsepower,weight,acceleration,Cylinder_4,Cylinder_5,Cylinder_6,Cylinder_8,Year_71,...,Year_75,Year_76,Year_77,Year_78,Year_79,Year_80,Year_81,Year_82,Origin_japan,Origin_usa
0,18.0,307.0,130.0,3504,12.0,False,False,False,True,False,...,False,False,False,False,False,False,False,False,False,True
1,15.0,350.0,165.0,3693,11.5,False,False,False,True,False,...,False,False,False,False,False,False,False,False,False,True
2,18.0,318.0,150.0,3436,11.0,False,False,False,True,False,...,False,False,False,False,False,False,False,False,False,True
3,16.0,304.0,150.0,3433,12.0,False,False,False,True,False,...,False,False,False,False,False,False,False,False,False,True
4,17.0,302.0,140.0,3449,10.5,False,False,False,True,False,...,False,False,False,False,False,False,False,False,False,True


In [8]:
target_variable = ['mpg']
predictors = list(set(list(df.columns))-set(target_variable))
X = df[predictors].values
X = X.astype(float)
y = df[target_variable].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=100)

print(X_train.shape); print(X_test.shape)

model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(32, input_shape = (X_train.shape[1],), activation = 'relu'))
model.add(tf.keras.layers.Dense(16, activation= "relu"))
model.add(tf.keras.layers.Dense(8, activation= "relu"))
model.add(tf.keras.layers.Dense(4, activation= "relu"))
model.add(tf.keras.layers.Dense(1))

optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
model.compile(loss = 'mae', metrics = ['mae'], optimizer = optimizer)

model.fit(X_train, y_train, epochs=50)

print(model.evaluate(X_train, y_train))

print(model.evaluate(X_test, y_test))

(278, 22)
(120, 22)
Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 108.3466 - mae: 108.3466
Epoch 2/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 13.5544 - mae: 13.5544  
Epoch 3/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 11.9070 - mae: 11.9070 
Epoch 4/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 11.7164 - mae: 11.7164 
Epoch 5/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 11.2032 - mae: 11.2032 
Epoch 6/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 10.6609 - mae: 10.6609
Epoch 7/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 10.6225 - mae: 10.6225
Epoch 8/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 10.3112 - mae: 10.3112
Epoch 9/50
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 10.1145 - 