In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt


#Data preperation

diamonds=pd.read_csv('/diamond.csv')

diamonds = diamonds[diamonds['color'].isin(['G'])]
diamonds = diamonds[diamonds['carat']<1.75]
diamonds = diamonds.reset_index()
diamonds = diamonds[['carat','cut','price']]



In [None]:
diamonds.info()

In [None]:
diamonds['cut'].unique()

In [None]:
diamonds.head()

In [None]:
sns.jointplot(x = diamonds['carat'], y= diamonds['price'], kind = 'reg')

In [None]:
sns.lmplot(x = 'carat', y= 'price', data = diamonds, fit_reg = True , hue = 'cut', scatter = True, legend = True, palette = "Set2")

In [None]:
diamonds=pd.get_dummies(diamonds)

diamonds.head()

In [None]:
import statsmodels.api as sm

X=diamonds.drop(['price','cut_Ideal'], axis=1)
y=diamonds['price']

X=sm.add_constant(X)
lm_fit=sm.OLS(y,X).fit()
lm_fit.summary()

In [None]:
pred_y=lm_fit.predict(X)

sns.regplot(x=pred_y , y = y , scatter_kws = {"color":"darkred", "alpha" : 0.15 , "s" : 10})

In [None]:
X=diamonds.drop(['price','cut_Ideal'], axis=1)
y_log=np.log(diamonds['price'])

X=sm.add_constant(X)
lm_fit_log=sm.OLS(y_log, X).fit()
print('R square : {0:.4f}'.format(lm_fit_log.rsquared))

lm_fitted_log=lm_fit_log.predict(X)

sns.regplot(x=lm_fitted_log , y = y_log , scatter_kws = {"color":"darkred", "alpha" : 0.15 , "s" : 10})

In [None]:
X=diamonds.drop(['cut_Ideal','price'], axis=1).values
Y_actual=diamonds['price'].values

print(X.shape)
print(Y_actual.shape)

In [None]:
from keras import models
from keras import layers

dense_fit=models.Sequential()
dense_fit.add(layers.Dense(units=16, activation='relu', input_shape=(X.shape[1],)))
dense_fit.add(layers.Dense(units=16, activation='relu'))
dense_fit.add(layers.Dense(units=1, activation='linear'))
dense_fit.summary()

In [None]:
dense_fit.compile(loss='mse', optimizer='adam')

history=dense_fit.fit(X, Y_actual, epochs=30, batch_size=16)

In [None]:
plt.plot(range(30), history.history['loss'], label='Traning loss' , marker='o')
plt.title('Training loss')
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.show()


In [None]:
Y_pred=dense_fit.predict(X).reshape(-1)

results=pd.DataFrame({'Y_pred':Y_pred, 'price':Y_actual})
results.head()

In [None]:
r_squared=results.corr()**2
r_squared

In [None]:
sns.regplot(data = results, x='Y_pred' , y = 'price' , scatter_kws = {"color":"darkred", "alpha" : 0.15 , "s" : 10})