In [None]:
# Importing libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from sklearn import preprocessing
# Cross Validation
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

In [None]:
import warnings
warnings.simplefilter("ignore")
plt.style.use('seaborn')

In [None]:
# Making data frame from csv
data = pd.read_csv('../input/car-dataset/cardata.csv')
df = pd.DataFrame(data)
df

In [None]:
# to print the full summary
df.info()

In [None]:
# to return the shape
df.shape

In [None]:
# getting summaries of all columns
df.describe(include = 'all')

In [None]:
# getting summaries of only numeric columns
df.describe()

In [None]:
# getting summaries of only object columns
df.describe(include = 'object')

In [None]:
# to detect missing values
df.isnull().sum()

In [None]:
# to return the column labels
df.columns

In [None]:
# Column to list using Series.tolist()
df.columns.tolist()

In [None]:
# Finding unique values in columns to determine if there is any missing values including (-,?...)
for i in df.columns:
    print(df[i].unique())

In [None]:
df.shape
print("number of rows: ", df.shape)
print("number of duplicate rows: ", df[df.duplicated()].shape)

In [None]:
df[df.duplicated()]

In [None]:
df[(df['Car_Name'] == 'ertiga' ) & (df['Year'] == 2016)]

In [None]:
df[(df['Car_Name'] == 'fortuner' ) & (df['Selling_Price'] == 23.00)]

In [None]:
df = df.drop_duplicates()
df.shape

In [None]:
df.reset_index(drop=True, inplace=True)

In [None]:
df

In [None]:
# T0 creat sets of Categorical Variables to analyize them
cat_cols = ['Year', 'Fuel_Type','Seller_Type','Transmission', 'Owner']

In [None]:
plt.figure(figsize = [15,10])
plt.suptitle('Categorical variables countplot\n', fontsize=30)
i=1
for col in cat_cols:
        plt.subplot(3,2,i)
        ax=sns.countplot(x = df[col], data = df)
        plt.title(f'{col} countplot\n', fontdict={'size':25})
        plt.xlabel(col, size=15)
        plt.xticks(size=12, c='r', rotation = 30)
        i+=1

        # show count (+ annotate)
        for rect in ax.patches:
            ax.text (rect.get_x() + rect.get_width()  / 2,rect.get_height()+ 0.75,rect.get_height(),horizontalalignment='center', fontsize = 15)




plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize = [15,10])
plt.suptitle('Categorical variable analysis(Barplot)\n', fontsize=30)
i=1
for col in cat_cols:
        plt.subplot(3,2,i)
        sns.barplot(x=df[col],y=df['Selling_Price'])
        plt.title(f'{col} vs selling_price\n', fontdict={'size':25})
        plt.xlabel(col, size=15)
        plt.xticks(size=12, c='r', rotation = 45)
        i+=1


plt.tight_layout()

In [None]:
Loc_order = df['Car_Name'].value_counts().index
plt.figure(figsize=(20, 8))
ax = sns.countplot(data=df, x='Car_Name', order=Loc_order)
plt.title('Number of cars', color = 'red', fontsize = 20)
plt.xlabel('Car_Name')
plt.ylabel('counts')
plt.xticks(rotation = 90)

# show count (+ annotate)
for rect in ax.patches:
    ax.text (rect.get_x() + rect.get_width()  / 2,rect.get_height()+ 0.75,rect.get_height(),horizontalalignment='center', fontsize = 15)

plt.show()

In [None]:
#percentage of car brands
percent = df['Car_Name'].value_counts()*100/sum(df['Car_Name'].value_counts())

# Top 10 car brands
popular = percent.index[:10]

# Plot
plt.barh(popular, width=percent[:10])
plt.title('Top 10 Car brands')
plt.show()

In [None]:
plt.figure(figsize = [10,6])

sns.distplot(df['Selling_Price'])
plt.show()

In [None]:
maxYear = df['Year'].max()
maxYear

In [None]:
Car_Age = []
for i in df.Year:
    age= 2019-i
    Car_Age.append(age)
Car_Age = pd.DataFrame([Car_Age]).T
Car_Age = Car_Age.rename(columns={0: 'Car_Age'})
Car_Age

In [None]:
df

In [None]:
# Using DataFrame.insert() to add a column
df.insert(1, "Car_Age", Car_Age)
df.drop(['Year'], axis=1 , inplace=True)

In [None]:
df

In [None]:
# To change columns orders
df = df.reindex(columns=['Car_Name','Kms_Driven', 'Fuel_Type','Seller_Type','Transmission', 'Owner', 'Car_Age',
                         'Present_Price', 'Selling_Price'])

df

In [None]:
df['Fuel_Type'].unique()

In [None]:
df['Seller_Type'].unique()

In [None]:
df['Transmission'].unique()

In [None]:
#converting the categorical to numeric
df["Fuel_Type"] = df["Fuel_Type"].map({"Petrol": 2, "Diesel": 3, "CNG": 4})
df["Seller_Type"] = df["Seller_Type"].map({"Dealer": 2, "Individual": 3})
df["Transmission"] = df["Transmission"].map({"Manual": 2, "Automatic": 3})
df.head()

In [None]:
#count of each column variable
from collections import Counter
Counter(df['Fuel_Type'])

In [None]:
df['Seller_Type'].value_counts()

In [None]:
Counter(df['Seller_Type'])

In [None]:
df['Transmission'].value_counts()

In [None]:
Counter(df['Transmission'])

In [None]:
plt.figure(figsize=(6, 8))
heatmap = sns.heatmap(df.corr()[['Selling_Price']].sort_values(by='Selling_Price', ascending=False), vmin=-1, vmax=1, annot=True, cmap='RdYlGn')
heatmap.set_title('Features Correlating with Selling_Price', fontdict={'fontsize':18}, pad=16);

In [None]:
plt.figure(figsize=(10,8))
#get correlations of each features in dataset
sns.heatmap(df.corr(), annot=True, cmap='BrBG')

In [None]:
df.corr()

In [None]:
sns.pairplot(df)

In [None]:
df

In [None]:
df.describe()

In [None]:
# T0 creat sets of Categorical Variables to analyize them
num_cols = ['Kms_Driven', 'Car_Age','Present_Price','Selling_Price']

In [None]:
plt.figure(figsize = [20,10])
plt.suptitle('Boxplot Analysis\n', fontsize=30)
i=1
for col in num_cols:
        plt.subplot(2,2,i)
        sns.boxplot(x=df[col])
        plt.title(f'{col} Boxplot\n', fontdict={'size':25})
        plt.xlabel(f'{col}\n\n', size=15)
        plt.xticks(size=15, c='r', rotation = 0)
        i+=1


plt.tight_layout()

In [None]:
df[df['Present_Price'] > 75]

In [None]:
df[df['Kms_Driven'] > 400000]

In [None]:
# To drop Car_Name column
#del df["Car_Name"]
df.drop('Car_Name', axis=1, inplace=True)

In [None]:
df

<font color='red'> **Price prediction:** <font>

> **Car with these features:**
  *   Present_Price = 11.23
  *   Kms_Driven = 42000
  *   Fuel_Type = Petrol
  *   Seller_Type = Dealer
  *   Transmission = Manual
  *   Owner = 1
  *   Age = 10
  *   Price = ??

In [None]:
# sepereting target variable and features
x = pd.DataFrame(df, columns = ['Kms_Driven', 'Fuel_Type', 'Seller_Type', 'Transmission', 'Owner',
                                'Car_Age', 'Present_Price'])
y = df['Selling_Price'].values.reshape(-1, 1)

In [None]:

    # Splitting the dataset into the Training set and Test set
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)

    print('<===================== Shape =======================>')
    print("x_train", x_train.shape)
    print("x_test", x_test.shape)
    print("y_train", y_train.shape)
    print("y_test", y_test.shape)

    # Fitting Multiple Linear Regression to the Training set
    regressor = LinearRegression()
    regressor.fit(x_train, y_train)

    # make predictions
    y_pred = regressor.predict(x_test)


    print('<=================== coefficient ==================>')
    #Intercept:
    print(regressor.intercept_)
    #Slope:
    print(regressor.coef_)


    print('<==================== evaluation ===================>')
    print('Mean Absolute Error: ', metrics.mean_absolute_error(y_test, y_pred))
    print('Mean Squared Error: ', metrics.mean_squared_error(y_test, y_pred))
    print('Root Mean Squared Error: ', np.sqrt (metrics.mean_squared_error(y_test, y_pred)))
    print('R2 Score: ', metrics.r2_score(y_test, y_pred))
    print('<========================== Evaluation Plot ============================>')

    preds = pd.DataFrame({'y_pred': y_pred.flatten(), 'y_test':y_test.flatten()})
    preds = preds.sort_values(by='y_test')
    preds = preds.reset_index()

    plt.figure(figsize=(15, 5))
    plt.plot(preds['y_pred'], label='pred', marker='o')
    plt.plot(preds['y_test'], label='actual', marker='o', c = 'r')
    plt.legend()
    plt.show()

In [None]:
Scaler = preprocessing.MinMaxScaler(feature_range=(0,1))
Norm1 = Scaler.fit_transform(df)
Norm1_df = pd.DataFrame (Norm1, columns = ['Kms_Driven', 'Fuel_Type', 'Seller_Type', 'Transmission', 'Owner',
                                           'Car_Age', 'Present_Price', 'Selling_Price'])

In [None]:
Norm1_df

In [None]:
Norm1_df.describe()

In [None]:
# sepereting target variable and features
x = pd.DataFrame(Norm1_df, columns = ['Kms_Driven', 'Fuel_Type', 'Seller_Type', 'Transmission', 'Owner',
                                      'Car_Age', 'Present_Price'])
y = Norm1_df['Selling_Price'].values.reshape(-1, 1)

In [None]:

    # Splitting the dataset into the Training set and Test set
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)

    print('<===================== Shape =======================>')
    print("x_train", x_train.shape)
    print("x_test", x_test.shape)
    print("y_train", y_train.shape)
    print("y_test", y_test.shape)

    # Fitting Multiple Linear Regression to the Training set
    regressor = LinearRegression()
    regressor.fit(x_train, y_train)

    # make predictions
    y_pred = regressor.predict(x_test)


    print('<=================== coefficient ==================>')
    #Intercept:
    print(regressor.intercept_)
    #Slope:
    print(regressor.coef_)

    # evaluate predictions
    print('<==================== evaluation ===================>')
    print('Mean Absolute Error: ', metrics.mean_absolute_error(y_test, y_pred))
    print('Mean Squared Error: ', metrics.mean_squared_error(y_test, y_pred))
    print('Root Mean Squared Error: ', np.sqrt (metrics.mean_squared_error(y_test, y_pred)))
    print('R2 Score: ', metrics.r2_score(y_test, y_pred))
    print('<========================== Evaluation Plot ============================>')

    preds = pd.DataFrame({'y_pred': y_pred.flatten(), 'y_test':y_test.flatten()})
    preds = preds.sort_values(by='y_test')
    preds = preds.reset_index()

    plt.figure(figsize=(15, 5))
    plt.plot(preds['y_pred'], label='pred', marker='o')
    plt.plot(preds['y_test'], label='actual', marker='o', c = 'r')
    plt.legend()
    plt.show()

In [None]:

    # Splitting the dataset into the Training set and Test set
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 0)

    print('<===================== Shape =======================>')
    print("x_train", x_train.shape)
    print("x_test", x_test.shape)
    print("y_train", y_train.shape)
    print("y_test", y_test.shape)

    # Fitting Multiple Linear Regression to the Training set
    regressor = LinearRegression()
    regressor.fit(x_train, y_train)

    # make predictions
    y_pred = regressor.predict(x_test)


    print('<=================== coefficient ==================>')
    #Intercept:
    print(regressor.intercept_)
    #Slope:
    print(regressor.coef_)

    # evaluate predictions
    print('<==================== evaluation ===================>')
    print('Mean Absolute Error: ', metrics.mean_absolute_error(y_test, y_pred))
    print('Mean Squared Error: ', metrics.mean_squared_error(y_test, y_pred))
    print('Root Mean Squared Error: ', np.sqrt (metrics.mean_squared_error(y_test, y_pred)))
    print('R2 Score: ', metrics.r2_score(y_test, y_pred))
    print('<========================== Evaluation Plot ============================>')

    preds = pd.DataFrame({'y_pred': y_pred.flatten(), 'y_test':y_test.flatten()})
    preds = preds.sort_values(by='y_test')
    preds = preds.reset_index()

    plt.figure(figsize=(15, 5))
    plt.plot(preds['y_pred'], label='pred', marker='o')
    plt.plot(preds['y_test'], label='actual', marker='o', c = 'r')
    plt.legend()
    plt.show()

In [None]:
# sepereting target variable and features
x = pd.DataFrame(df, columns = ['Kms_Driven', 'Fuel_Type', 'Seller_Type', 'Transmission', 'Owner',
                                'Car_Age', 'Present_Price'])
y = df['Selling_Price'].values.reshape(-1, 1)

In [None]:

    # Splitting the dataset into the Training set and Test set
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 0)

    print('<===================== Shape =======================>')
    print("x_train", x_train.shape)
    print("x_test", x_test.shape)
    print("y_train", y_train.shape)
    print("y_test", y_test.shape)

    # Fitting Multiple Linear Regression to the Training set
    regressor = LinearRegression()
    regressor.fit(x_train, y_train)

    # make predictions
    y_pred = regressor.predict(x_test)


    print('<=================== coefficient ==================>')
    #Intercept:
    print(regressor.intercept_)
    #Slope:
    print(regressor.coef_)

    # evaluate predictions
    print('<==================== evaluation ===================>')
    print('Mean Absolute Error: ', metrics.mean_absolute_error(y_test, y_pred))
    print('Mean Squared Error: ', metrics.mean_squared_error(y_test, y_pred))
    print('Root Mean Squared Error: ', np.sqrt (metrics.mean_squared_error(y_test, y_pred)))
    print('R2 Score: ', metrics.r2_score(y_test, y_pred))
    print('<========================== Evaluation Plot ============================>')

    preds = pd.DataFrame({'y_pred': y_pred.flatten(), 'y_test':y_test.flatten()})
    preds = preds.sort_values(by='y_test')
    preds = preds.reset_index()

    plt.figure(figsize=(15, 5))
    plt.plot(preds['y_pred'], label='pred', marker='o')
    plt.plot(preds['y_test'], label='actual', marker='o', c = 'r')
    plt.legend()
    plt.show()

In [None]:
print('<============ Cross validation ================>')
# create new model
model_new = LinearRegression()
# Cross validation
KF = 10
kfold_validation = KFold(KF)
# evaluate model
results = cross_val_score (model_new, x , y , cv=kfold_validation)
print(results)
print(np.mean(results))

In [None]:
# cross validation
# Number of Folds
KF = 10
# F = fold/folds we want to delete
F = 5, 6
A =round (len(df)/KF)

# we drop fold 5,6
DF_ = df[~((df.index > 120) & (df.index <= 180))]
DF_.reset_index(drop=True, inplace=True)
DF_

In [None]:
# sepereting target variable and features
x = pd.DataFrame(DF_, columns = ['Kms_Driven', 'Fuel_Type', 'Seller_Type', 'Transmission', 'Owner',
                                 'Car_Age', 'Present_Price'])
y = DF_['Selling_Price'].values.reshape(-1, 1)

In [None]:

    # Splitting the dataset into the Training set and Test set
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.3, random_state = 0)

    print('<===================== Shape =======================>')
    print("x_train", x_train.shape)
    print("x_test", x_test.shape)
    print("y_train", y_train.shape)
    print("y_test", y_test.shape)

    # Fitting Multiple Linear Regression to the Training set
    regressor = LinearRegression()
    regressor.fit(x_train, y_train)

    # make predictions
    y_pred = regressor.predict(x_test)


    print('<=================== coefficient ==================>')
    #Intercept:
    print(regressor.intercept_)
    #Slope:
    print(regressor.coef_)

    # evaluate predictions
    print('<==================== evaluation ===================>')
    print('Mean Absolute Error: ', metrics.mean_absolute_error(y_test, y_pred))
    print('Mean Squared Error: ', metrics.mean_squared_error(y_test, y_pred))
    print('Root Mean Squared Error: ', np.sqrt (metrics.mean_squared_error(y_test, y_pred)))
    print('R2 Score: ', metrics.r2_score(y_test, y_pred))
    print('<========================== Evaluation Plot ============================>')

    preds = pd.DataFrame({'y_pred': y_pred.flatten(), 'y_test':y_test.flatten()})
    preds = preds.sort_values(by='y_test')
    preds = preds.reset_index()

    plt.figure(figsize=(15, 5))
    plt.plot(preds['y_pred'], label='pred', marker='o')
    plt.plot(preds['y_test'], label='actual', marker='o', c = 'r')
    plt.legend()
    plt.show()

In [None]:
# sepereting target variable and features
X = pd.DataFrame(df, columns = ['Kms_Driven', 'Fuel_Type', 'Seller_Type', 'Transmission', 'Owner',
                                'Car_Age', 'Present_Price'])
Y = df['Selling_Price'].values.reshape(-1, 1)

In [None]:
# To creat function to find a better Score throught dimention increasing
def check (Dimension,testsize):
    r2 = 0.8098343974158059
    for column in X:
        New_Col_Name = column + '^' + str(Dimension)
        New_Col_Val = X[column]**Dimension
        X.insert(0, New_Col_Name, New_Col_Val)
        X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size = testsize, random_state = 0)
        New_Model = LinearRegression()
        New_Model.fit(X_train, Y_train)
        Y_pred = New_Model.predict(X_test)
        r2_new = metrics.r2_score(Y_test, Y_pred)
        if r2_new < r2:
            X.drop([New_Col_Name], axis = 1, inplace = True)
        else:
            r2 = r2_new
    print('<==================== evaluation: R2_score ===================>')
    print("R2_score: ", r2)

    print("========= Plot ===========\n")
    preds = pd.DataFrame({'y_pred': Y_pred.flatten(), 'y_test':Y_test.flatten()})
    preds = preds.sort_values(by='y_test')
    preds = preds.reset_index()

    plt.figure(figsize=(15, 5))
    plt.plot(preds['y_pred'], label='pred', marker='o')
    plt.plot(preds['y_test'], label='actual', marker='o', c = 'r')
    plt.legend()
    plt.show()

In [None]:
check (2, 0.3)

In [None]:
#X['Kms_Driven^2'] = X['Kms_Driven']**2
#X['Fuel_Type^2'] = X['Fuel_Type']**2
#X['Owner^2'] = X['Owner']**2
##X['Car_Age^2'] = ['Car_Age']**2
#X['Present_Price^2'] = X['Present_Price']**2

In [None]:
X

In [None]:
print('<============ Cross validation ================>')
# create new model
model_new = LinearRegression()
# Cross validation
KF = 10
kfold_validation = KFold(KF)
# evaluate model
results = cross_val_score (model_new, X , Y , cv=kfold_validation)
print(results)
print(np.mean(results))

In [None]:
DF_D2 = X.copy()
DF_D2['Selling_Price'] = df['Selling_Price']
DF_D2

In [None]:
# cross validation
# Number of Folds
KF = 10
# F = fold/folds we want to delete
F = 5, 6
A =round (len(DF_D2)/KF)

# we drop fold 5,6
DF_D2= DF_D2[~((DF_D2.index > 120) & (DF_D2.index <= 180))]
DF_D2.reset_index(drop=True, inplace=True)
DF_D2

In [None]:
# sepereting target variable and features
X_ = pd.DataFrame(DF_D2, columns = ['Kms_Driven', 'Fuel_Type', 'Seller_Type', 'Transmission', 'Owner',
                                    'Car_Age', 'Present_Price', 'Car_Age^2','Owner^2','Fuel_Type^2'])
Y_ = DF_D2['Selling_Price'].values.reshape(-1, 1)

In [None]:

    # Splitting the dataset into the Training set and Test set
    x_train, x_test, y_train, y_test = train_test_split(X_, Y_, test_size = 0.3, random_state = 0)

    print('<===================== Shape =======================>')
    print("x_train", x_train.shape)
    print("x_test", x_test.shape)
    print("y_train", y_train.shape)
    print("y_test", y_test.shape)

    # Fitting Multiple Linear Regression to the Training set
    regressor = LinearRegression()
    regressor.fit(x_train, y_train)

    # make predictions
    y_pred = regressor.predict(x_test)


    print('<=================== coefficient ==================>')
    #Intercept:
    print(regressor.intercept_)
    #Slope:
    print(regressor.coef_)

    # evaluate predictions
    print('<==================== evaluation ===================>')
    print('Mean Absolute Error: ', metrics.mean_absolute_error(y_test, y_pred))
    print('Mean Squared Error: ', metrics.mean_squared_error(y_test, y_pred))
    print('Root Mean Squared Error: ', np.sqrt (metrics.mean_squared_error(y_test, y_pred)))
    print('R2 Score: ', metrics.r2_score(y_test, y_pred))
    print('<========================== Evaluation Plot ============================>')

    preds = pd.DataFrame({'y_pred': y_pred.flatten(), 'y_test':y_test.flatten()})
    preds = preds.sort_values(by='y_test')
    preds = preds.reset_index()

    plt.figure(figsize=(15, 5))
    plt.plot(preds['y_pred'], label='pred', marker='o')
    plt.plot(preds['y_test'], label='actual', marker='o', c = 'r')
    plt.legend()
    plt.show()

In [None]:
X['pres_kms'] = X["Present_Price"] * X["Kms_Driven"]
X['pres_kms2'] = X["Present_Price"] * (X['Kms_Driven']**2)
X['pres_fuel'] = X["Present_Price"] * X["Fuel_Type"]
X['pres_fuel2'] = X["Present_Price"] * (X['Fuel_Type']**2)
X['pres2_kms'] = (X['Present_Price']**2) * X["Kms_Driven"]
X['pres2_kms2'] = (X['Present_Price']**2) * (X['Kms_Driven']**2)
X['pres2_fuel'] = (X['Present_Price']**2) * X["Fuel_Type"]
X['pres2_fuel2'] = (X['Present_Price']**2) * (X['Fuel_Type']**2)

In [None]:
X

In [None]:

    # Splitting the dataset into the Training set and Test set
    x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.3, random_state = 0)

    print('<===================== Shape =======================>')
    print("x_train", x_train.shape)
    print("x_test", x_test.shape)
    print("y_train", y_train.shape)
    print("y_test", y_test.shape)

    # Fitting Multiple Linear Regression to the Training set
    regressor = LinearRegression()
    regressor.fit(x_train, y_train)

    # make predictions
    y_pred = regressor.predict(x_test)


    print('<=================== coefficient ==================>')
    #Intercept:
    print(regressor.intercept_)
    #Slope:
    print(regressor.coef_)

    # evaluate predictions
    print('<==================== evaluation ===================>')
    print('Mean Absolute Error: ', metrics.mean_absolute_error(y_test, y_pred))
    print('Mean Squared Error: ', metrics.mean_squared_error(y_test, y_pred))
    print('Root Mean Squared Error: ', np.sqrt (metrics.mean_squared_error(y_test, y_pred)))
    print('R2 Score: ', metrics.r2_score(y_test, y_pred))
    print('<========================== Evaluation Plot ============================>')

    preds = pd.DataFrame({'y_pred': y_pred.flatten(), 'y_test':y_test.flatten()})
    preds = preds.sort_values(by='y_test')
    preds = preds.reset_index()

    plt.figure(figsize=(15, 5))
    plt.plot(preds['y_pred'], label='pred', marker='o')
    plt.plot(preds['y_test'], label='actual', marker='o', c = 'r')
    plt.legend()
    plt.show()

In [None]:
DF_D3 = X.copy()
DF_D3['Selling_Price'] = df['Selling_Price']
DF_D3

In [None]:
Present_Price = 11.23
Kms_Driven = 42000
Fuel_Type = 2 #petrol
Seller_Type = 2 #Dealer
Transmission = 2 #manual
Owner = 1
Car_Age = 10
Selling_Price = 5

In [None]:
Model_input = pd.DataFrame({ "Kms_Driven":[Kms_Driven],
                             "Fuel_Type":[Fuel_Type],
                             "Seller_Type":[Seller_Type],
                             "Transmission":[Transmission],
                             "Owner":[Owner],
                             "Car_Age":[Car_Age],
                             "Present_Price":[Present_Price],
                             "Selling_Price": [Selling_Price]})

In [None]:
Model_input

In [None]:
#Car_Age^2	Owner^2	Seller_Type^2	Fuel_Type^2	 # owner چون صفر داشت استفاده نکردیم

Model_input['Car_Age^2'] = Model_input['Car_Age']**2
Model_input['Owner^2'] = Model_input['Owner']**2
Model_input['Seller_Type^2'] = Model_input['Seller_Type']**2
Model_input['Fuel_Type^2'] = Model_input['Fuel_Type']**2
Model_input['pres_kms'] = Model_input["Present_Price"] * Model_input["Kms_Driven"]
Model_input['pres_kms2'] = Model_input["Present_Price"] * (Model_input['Kms_Driven']**2)
Model_input['pres_fuel'] = Model_input["Present_Price"] * Model_input["Fuel_Type"]
Model_input['pres_fuel2'] = Model_input["Present_Price"] * (Model_input['Fuel_Type']**2)
Model_input['pres2_kms'] = (Model_input['Present_Price']**2) * Model_input["Kms_Driven"]
Model_input['pres2_kms2'] = (Model_input['Present_Price']**2) * (Model_input['Kms_Driven']**2)
Model_input['pres2_fuel'] = (Model_input['Present_Price']**2) * Model_input["Fuel_Type"]
Model_input['pres2_fuel2'] = (Model_input['Present_Price']**2) * (Model_input['Fuel_Type']**2)

In [None]:
Model_input

In [None]:
Finall_data = DF_D3.append(Model_input)
Finall_data

In [None]:
X = Finall_data.drop(["Selling_Price"], axis=1)[:299]
Y = Finall_data[["Selling_Price"]][:299]
X_Finall = Finall_data.drop(["Selling_Price"], axis =1)[299:]  #X_test

regressor = LinearRegression()
regressor.fit(X,Y)
Y_pred = regressor.predict(X_Finall)
Y_pred