<a href="https://colab.research.google.com/github/omid-sakaki-ghazvini/Practices/blob/main/used_Car_prices.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1. Install Dependencies and Setup

<div style="direction:rtl">
<font color='green' size="5px">
 کتابخانه های مورد نیاز را نصب میکنیم
    </font>
</div>

In [None]:
#Data Analysis
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

#preprocessing
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

#models
import tensorflow as tf
from tensorflow import keras

<div style="direction:rtl">
<font color='green' size="5px">
 از لینک زیر دیتاست را دانلود کرده و در پوشه هم مسیر همین ژوپیتر نوت بوک قرار دهید
    </font>
</div>

## https://www.kaggle.com/datasets/taeefnajib/used-car-price-prediction-dataset

# 2. Load Data

<div style="direction:rtl">
<font color='green' size="5px">
توسط خط فرمان زیر، دیتا را فراخوانی میکنیم
    </font>
</div>

In [None]:
df=pd.read_csv('/kaggle/input/playground-series-s4e9/train.csv')
df.head(10)

In [None]:
df.info()

In [None]:
df.describe()

# 3.Data preparation

In [None]:
missing_values_count = df.isnull().sum()
missing_values_count

In [None]:
total_cells = np.product(df.shape)
total_missing = missing_values_count.sum()

percent_missing = (total_missing/total_cells) * 100
print(percent_missing)

In [None]:
df.dropna(subset=['fuel_type','accident','clean_title'],inplace=True)

In [None]:
missing_values_count = df.isnull().sum()
missing_values_count

In [None]:
def remove_outliers(df, column):
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    df_out = df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]
    return df_out

df = remove_outliers(df, 'milage')
df = remove_outliers(df, 'price')
df.reset_index(drop=True, inplace=True)

In [None]:
encoder=LabelEncoder()
df['brand']=encoder.fit_transform(df['brand'])
df['model']=encoder.fit_transform(df['model'])
df['fuel_type']=encoder.fit_transform(df['fuel_type'])
df['engine']=encoder.fit_transform(df['engine'])
df['transmission']=encoder.fit_transform(df['transmission'])
df['ext_col']=encoder.fit_transform(df['ext_col'])
df['int_col']=encoder.fit_transform(df['int_col'])
df['accident']=encoder.fit_transform(df['accident'])
df['clean_title']=encoder.fit_transform(df['clean_title'])

# 4. Split & Scale Data

<div style="direction:rtl">
<font color='green' size="5px">
 نرمالسازی دیتا
    </font>
</div>

In [None]:
shuffled_data = shuffle(df, random_state = 100)

In [None]:
X=shuffled_data.drop(columns={'id','price'})
y=shuffled_data.price

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

print("The size of the input train data is: {}".format(X_train.shape))
print("The size of the output train data is: {}".format(y_train.shape))
print("The size of the input test data is: {}".format(X_test.shape))
print("The size of the output test data is: {}".format(y_test.shape))

In [None]:
scaler = MinMaxScaler()
scaler.fit(X_train)
X_train_new = scaler.transform(X_train)
X_test_new = scaler.transform(X_test)

# 5. Build Models

In [None]:
print(X_train_new.shape)
print(X_train_new[0].shape)
print(X_train_new[0])

In [None]:
sample_size = X_train_new.shape[0] # number of samples in train set
time_steps  = X_train_new.shape[1] # number of features in train set
input_dimension = 1               # each feature is represented by 1 number

train_data_reshaped = X_train_new.reshape(sample_size,time_steps,input_dimension)
print("After reshape train data set shape:\n", train_data_reshaped.shape)
print("1 Sample shape:\n",train_data_reshaped[0].shape)
print("An example sample:\n", train_data_reshaped[0])

In [None]:
def build_conv1D_model():

    n_timesteps = train_data_reshaped.shape[1]
    n_features  = train_data_reshaped.shape[2]

    model = keras.Sequential(name="model_conv1D")
    model.add(keras.layers.Input(shape=(n_timesteps,n_features)))
    model.add(keras.layers.Conv1D(filters=128, kernel_size=7, activation='relu', name="Conv1D_1"))
    model.add(keras.layers.Dropout(0.5))
    model.add(keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu', name="Conv1D_2"))
    model.add(keras.layers.Conv1D(filters=32, kernel_size=2, activation='relu', name="Conv1D_3"))
    model.add(keras.layers.MaxPooling1D(pool_size=2, name="MaxPooling1D"))
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(32, activation='relu', name="Dense_1"))
    model.add(keras.layers.Dense(n_features, name="Dense_2"))


    optimizer = tf.keras.optimizers.RMSprop(0.001)

    model.compile(loss="mean_squared_error",metrics=[tf.keras.metrics.RootMeanSquaredError()])
    return model

model_conv1D = build_conv1D_model()
model_conv1D.summary()

In [None]:
Model: "model_conv1D"

In [None]:
history = model_conv1D.fit(train_data_reshaped, y_train, epochs=500, validation_split=0.2, verbose=1)

In [None]:
plt.figure()
plt.xlabel('Epoch')
plt.ylabel('Root Mean Squared Error')
plt.plot(history.epoch, np.array(history.history['root_mean_squared_error']), label='Train')
plt.plot(history.epoch, np.array(history.history['val_root_mean_squared_error']),label = 'Val')
plt.legend()

# 6.Result

In [None]:
sample_size = X_test_new.shape[0] # number of samples in train set
time_steps  = X_test_new.shape[1] # number of features in train set
input_dimension = 1               # each feature is represented by 1 number

test_data_reshaped = X_test_new.reshape(sample_size,time_steps,input_dimension)
print("After reshape test data set shape:\n", test_data_reshaped.shape)
print("1 Sample shape:\n",test_data_reshaped[0].shape)
print("An example sample:\n", test_data_reshaped[0])

In [None]:
y_predict = model_conv1D.predict(X_test_new)
y_predict = pd.DataFrame(y_predict, columns = ['Predicted price'])
results = pd.concat([y_predict, y_test.to_frame().reset_index(drop = True)], axis = 1, ignore_index = False)
results.head()
print(results.head(),'\n')

[loss, root_mean_squared_error] = model_conv1D.evaluate(test_data_reshaped, y_test, verbose=0)
print("Testing set RMSE: ", root_mean_squared_error)

In [None]:
plt.figure(figsize = (10, 10))
sns.regplot(data = results, y = 'Predicted price', x = 'price',
            scatter_kws = {"color": "black", "alpha": 0.5},
            line_kws = {"color": "red"},
            ci = 99)
plt.title("Comparision of predicted values and the actual values", fontsize = 20)
plt.show()

In [None]:
df_test=pd.read_csv('/kaggle/input/playground-series-s4e9/test.csv')
df_test.head(10)

In [None]:
encoder=LabelEncoder()
df_test['brand']=encoder.fit_transform(df_test['brand'])
df_test['model']=encoder.fit_transform(df_test['model'])
df_test['fuel_type']=encoder.fit_transform(df_test['fuel_type'])
df_test['engine']=encoder.fit_transform(df_test['engine'])
df_test['transmission']=encoder.fit_transform(df_test['transmission'])
df_test['ext_col']=encoder.fit_transform(df_test['ext_col'])
df_test['int_col']=encoder.fit_transform(df_test['int_col'])
df_test['accident']=encoder.fit_transform(df_test['accident'])
df_test['clean_title']=encoder.fit_transform(df_test['clean_title'])

In [None]:
X=df_test.drop(columns={'id'})

scaler = MinMaxScaler()
scaler.fit(X)
X = scaler.transform(X)

y_pred = model_conv1D.predict(X)

In [None]:
df_target = pd.read_csv('/kaggle/input/playground-series-s4e9/sample_submission.csv')
df_target.head()

In [None]:
test_preds_final = y_pred.copy()
submission_file = df_test.reset_index()[['id']]
submission_file['Predicted price'] = test_preds_final
submission_file = submission_file.set_index("id")
submission_file

In [None]:
submission_file.to_csv("/kaggle/working/submission.csv")