## import required packages

In [None]:
import pandas as pd 
import numpy as np
from sklearn.linear_model import LinearRegression # Model to train the data
from sklearn.preprocessing import PolynomialFeatures # Model for polinomial data
from sklearn.model_selection import train_test_split # splits the dataset into train set and test set
from sklearn.metrics import mean_squared_error,r2_score # to find the error rate and accuracy scores
import matplotlib.pyplot as plt  # library helps to plot graphs
import seaborn as sns

## Read dataset

In [None]:
# read csv file
df=pd.read_csv('CarPrice_Assignment.csv')
df.head()

## Data Preprocessing

In [None]:
# find columns with integer and float datatypes
df_list=[]
for i in df.columns:
    if df[i].dtype=='int64' or df[i].dtype=='float64':
        df_list.append(i)

In [None]:
new_df=pd.DataFrame() # create new dataframe 

In [None]:
new_df=df[df_list] # store columns with integer and float types

In [None]:
new_df.shape # gives number of recordds and features

In [None]:
# find the relation between independent features with price variable
corr=new_df.corr()
corr

In [None]:
df['doornumber'].astype('category') # converting 'doornumber' column from object to category

In [None]:
#change the text labels to numeric labels
df['doornumber']=df['doornumber'].map({'two':2,'four':4})

## Normalization

In [None]:
from sklearn.preprocessing import StandardScaler # standardize the data in (-1,1) range
from sklearn.preprocessing import MinMaxScaler # standardize the data in (0,1) range

In [None]:
standrd=MinMaxScaler(feature_range=(0,1)) # creates MinMaxScaler object

In [None]:
fitteddata=standrd.fit_transform(new_df) # fit and transform the data to the model

In [None]:
norm_data=pd.DataFrame(fitteddata,columns=new_df.columns) # create a dataframe with standardized data

## Feature Selection

In [None]:
# select independent features excluding target variable and convert to array 
X=norm_data.iloc[:,0:15].to_numpy() 
# select target variable and convert to array and reshape to 2D array
y=norm_data.iloc[:,-1].to_numpy().reshape(-1,1)

In [None]:
from sklearn.feature_selection import SelectKBest # selects top 10 best features
from sklearn.feature_selection import f_classif # calcultes the score to select the features

In [None]:
featurebest=SelectKBest(score_func=f_classif,k=10) # create SelectKBest object

In [None]:
fitteddata=featurebest.fit(X,y) # fit model to data

In [None]:
dfscores=pd.DataFrame(fitteddata.scores_) # create a dataframe to keep data scores
dfcols=pd.DataFrame(norm_data.iloc[:,0:15].columns) # get columns from original dataset

In [None]:
featurescores=pd.concat([dfcols,dfscores],axis=1) # merge both scores and columns dataframes

In [None]:
featurescores.columns=['Specs','Score'] # assign colimns with new names

In [None]:
featurescores.nlargest(10,'Score') # display the top 10 best features

In [None]:
#create dataframe for selected features frol feature selection model 
new_data2=pd.DataFrame()
new_data2=norm_data[['enginesize','curbweight','carwidth','horsepower', 'carlength', 'citympg', 'wheelbase', 'highwaympg', 'peakrpm', 'carheight','price']]

## Correlation of Features

In [None]:
#to check the relationship between the features
sns.pairplot(new_data2)

In [None]:
# finds the positive correlation between the features for feature selection
sns.heatmap(new_data2.corr(),annot=True)

## Linear Regression Model for traning the data and checking the Accuracy

In [None]:
from sklearn.linear_model import LinearRegression # Linear Regression class
from sklearn.metrics import mean_squared_error,r2_score # metrics for findind error rate and accuracy

In [None]:
X_new=new_data2.iloc[:,0:11].to_numpy() # independentfeatures
y_new=new_data2.iloc[:,-1].to_numpy().reshape(-1,1) #  dependent feature

In [None]:
lreg=LinearRegression() # creates object of LinearRegression

In [None]:
lreg.fit(X_new,y_new) # fits the data to the model

In [None]:
lreg.score(X_new,y_new) # finds the accuracy 

## Matplotlib for Visualization

In [None]:
from mpl_toolkits.mplot3d import Axes3D # plots 3d graphs
%matplotlib notebook # displays the interactive graphs in notebook 

In [None]:
#plot a graph for price with respect to engine size and horse power
fig=plt.figure(1)
ax=fig.add_subplot(111,projection='3d')
ax.scatter(X_new[:,0],X_new[:,3],y_new,c=X_new[:,1])

ax.set_xlabel('engine size')
ax.set_ylabel('horse power')
ax.set_zlabel('price')
ax.set_title('Price vs Engine size and Horse power')

In [None]:
from matplotlib.animation import FuncAnimation # creates an animation of plotted graph

In [None]:
bits = 8
fig = plt.figure()
fig.subplots_adjust(left=0, bottom=0, right=1, top=1)
ax = fig.add_subplot(111, projection='3d')
ax.set_facecolor((0.5, 0.5, 0.5))
gradient = np.linspace(0, 1, 2**bits)
X,Y,Z = np.meshgrid(gradient, gradient, gradient)

colors=np.stack((X_new[:,0].flatten(),X_new[:,1].flatten(),X_new[:,4].flatten()),axis=1)
ax.scatter(X_new[:,0],X_new[:,3],y_new,alpha=1.0,s=100./2,c=colors,marker='o',linewidth=0)
ax.set_xlabel('engine size')
ax.set_ylabel('horse power')
ax.set_zlabel('price')
ax.set_title('Price vs Engine size and Horse power')

In [None]:
def update(i, fig, ax):
    """
    function update: displays the animation for scattered plot

    param i: number of frames

    param fig: creates a figure on which the plot is drafted

    param ax: plots the scatter and set the labels for x-axis, y-axis and z-axis
    return: returns figure and plot
    """
    ax.view_init(elev=20., azim=i)
    return fig, ax

#FuncAnimation is a class that takes figure, frames and update function to create an animation 
anim = FuncAnimation(fig, update, frames=np.arange(
    0, 360, 2), repeat=True, fargs=(fig, ax))
# saves the animated graph in 'gif' format
anim.save('car_price_cube.gif', dpi=80, writer='imagemagick', fps=24)