In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import copy
import scipy as sp
sns.set_theme()

# New section

In [None]:
# ## connecting to the google drive
# from google.colab import drive
# drive.mount('/content/drive')
# gpath = r'/content/drive/MyDrive/'
# df = pd.read_csv(gpath)
#!pip uninstall tensorflow
!pip install tensorflow


In [None]:
#!gdown --id '1rJ6b2nPGtvrkTz_0KCcAnmL8uECAr6Cy' --output CarPrice_Assignment.csv
df = pd.read_csv('CarPrice_Assignment.csv')

In [None]:
df

In [None]:
df.isna().sum()

In [None]:
df1 = copy.deepcopy(df)
df1['CompanyName'] = df1.CarName.str.split(" ").str[0]
df1.drop(['CarName', 'car_ID', 'symboling'], axis=1, inplace=True)
df1

In [None]:
df1['CompanyName'].unique()

In [None]:
df2 = copy.deepcopy(df1)
df2.loc[df2['CompanyName'] == 'maxda', 'CompanyName'] = 'mazda'
df2.loc[df2['CompanyName'] == 'Nissan', 'CompanyName'] = 'nissan'
df2.loc[df2['CompanyName'] == 'porcshce', 'CompanyName'] = 'porsche'
df2.loc[df2['CompanyName'] == 'toyouta', 'CompanyName'] = 'toyota'
df2.loc[df2['CompanyName'] == 'vokswagen', 'CompanyName'] = 'volkswagen'
df2.loc[df2['CompanyName'] == 'vw', 'CompanyName'] = 'volkswagen'

df2['CompanyName'].unique()

In [None]:
df2.dtypes

In [None]:
df3 = copy.deepcopy(df2)

# get the dummies and store it in a variable
dummies = pd.get_dummies(data=df2, columns=['fueltype', 'aspiration', 'doornumber', 'carbody', 'drivewheel', 'enginelocation', 'enginetype', 'cylindernumber', 'fuelsystem', 'CompanyName'])
 
# Concatenate the dummies to original dataframe
df4 = pd.concat([df3, dummies], axis='columns')
 
# drop the values
df4.drop(['fueltype', 'aspiration', 'doornumber', 'carbody', 'drivewheel', 'enginelocation', 'enginetype', 'cylindernumber', 'fuelsystem', 'CompanyName'], axis='columns', inplace=True)
df4[['curbweight', 'enginesize', 'horsepower', 'peakrpm', 'citympg', 'highwaympg']] = df4[['curbweight', 'enginesize', 'horsepower', 'peakrpm', 'citympg', 'highwaympg']].astype(float)
df4

In [None]:
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline

class MultiColumnLabelEncoder:
    def __init__(self,columns = None):
        self.columns = columns # array of column names to encode

    def fit(self,X,y=None):
        return self # not relevant here

    def transform(self,X):
        '''
        Transforms columns of X specified in self.columns using
        LabelEncoder(). If no columns specified, transforms all
        columns in X.
        '''
        output = X.copy()
        if self.columns is not None:
            for col in self.columns:
                output[col] = LabelEncoder().fit_transform(output[col])
        else:
            for colname,col in output.iteritems():
                output[colname] = LabelEncoder().fit_transform(col)
        return output

    def fit_transform(self,X,y=None):
        return self.fit(X,y).transform(X)



df5 = MultiColumnLabelEncoder(columns = ['fueltype', 'aspiration', 'doornumber', 'carbody', 'drivewheel', 'enginelocation', 'enginetype', 'cylindernumber', 'fuelsystem', 'CompanyName']).fit_transform(df3)
df5


In [None]:
corr = df5.corr()
corr.style.background_gradient(cmap='coolwarm')

In [None]:
sns.histplot(data=df5, x='price')
plt.title('Distribution of Prices')
plt.show()

In [None]:
sns.scatterplot(data=df5, x='enginesize', y='price')
plt.title('Scatter plot of price based on engine size')
plt.show()

In [None]:
from sklearn.model_selection import train_test_split

X = df5.drop('price', axis=1)
y = df5['price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=2023)


In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()

# Fit
scaler.fit(X_train)

# Apply the transformation
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

# X_train = X_train.toarray()
# X_test = X_test.toarray()

# New section

In [None]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras import backend as K
from sklearn.metrics import r2_score



In [None]:
def rmse(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true)))

def mse(y_true, y_pred):
    squared_difference = tf.square(y_true - y_pred)
    return tf.reduce_mean(squared_difference, axis=-1)  # Note the `axis=-1`


#model = Sequential([ Dense(256, activation='relu'), Dense(256, activation='relu'), Dense(128, activation='relu'), Dense(1)])

model = Sequential([Dense(256, activation='relu'), Dense(256, activation='relu'), Dense(1)])
#model = Sequential([ Dense(256, activation='relu'), Dense(1) ])

model.compile(
    loss=mse,              ### or mse
    optimizer=Adam(),    ### or adam
    metrics=[rmse]
)

history = model.fit(X_train, y_train, validation_split=0.25, epochs=5000)

In [None]:
# Train set Accuracy and Validation set Accuracy
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('َAdam Optimizer - Relu Activation Function')
plt.ylabel('RMSE')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='best')
plt.show()

In [None]:
predictions = model.predict(X_test)
r2_score(y_test, predictions)

In [None]:
predictions = model.predict(X_test)
predictions[:5]

In [None]:
y_test[:5]

In [None]:
from sklearn.metrics import mean_squared_error

mean_squared_error(y_test[:5],predictions[:5])