In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from sklearn.cross_validation import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics

In [None]:
camera = pd.read_csv('../input/1000-cameras-dataset/camera_dataset.csv')
camera.head(3)

In [None]:
camera = camera.set_index('Model')

In [None]:
pd.isnull(camera).any()

In [None]:
pd.isnull(camera).sum()

In [None]:
camera = camera.dropna()

In [None]:
pd.isnull(camera).any()

In [None]:
temp = camera.corr()

In [None]:
plt.subplots(figsize=(10,5))
sns.heatmap(temp, cmap='RdYlGn', annot=True)
plt.show()

In [None]:
camera['Avg resolution'] = (camera['Max resolution']+camera['Low resolution'])/2


camera['Avg zoom'] = ((camera['Zoom wide (W)']<=30.0)).astype(int)
camera['Avg zoom1'] = ((camera['Zoom tele (T)']<=120.0)).astype(int)

camera['Avg focus'] = (camera['Normal focus range']+camera['Macro focus range'])/2

camera.head(3)

In [None]:
camera['Avg resolution 1'] = (camera['Avg resolution']<=2000).astype(int)
camera['Avg resolution 2'] = ((camera['Avg resolution']>2000)&(camera['Avg resolution']<=2800)).astype(int)

In [None]:
camera['Epix 1'] = (camera['Effective pixels']<=4.0).astype(int)

In [None]:
camera['stor 1'] = (camera['Storage included']<=8.0).astype(int)
camera['stor 2'] = ((camera['Storage included']>8.0) & camera['Storage included']<15.0).astype(int)
camera['stor 3'] = ((camera['Storage included']>=15.0) & camera['Storage included']<=64.0).astype(int)

In [None]:
camera['Weight 1'] = (camera['Weight (inc. batteries)']<=180.0).astype(int)
camera['Weight 2'] = ((camera['Weight (inc. batteries)']>180.0) & (camera['Weight (inc. batteries)']<=320.0)).astype(int)
camera['Weight 3'] = ((camera['Weight (inc. batteries)']>320.0) & (camera['Weight (inc. batteries)']<=1100.0)).astype(int)

In [None]:
camera['Dim 1'] = (camera['Dimensions']<=40.0).astype(int)
camera['Dim 2'] = ((camera['Dimensions']>40.0)&(camera['Dimensions']<=80.0)).astype(int)
camera['Dim 3'] = ((camera['Dimensions']>80.0)&(camera['Dimensions']<=130.0)).astype(int)

In [None]:
labels = ['0-750','750-1500','1500-1900']
camera['Weight_bins'] = pd.cut(camera['Weight (inc. batteries)'],3,right=True,labels=labels)

In [None]:
camera['Weight_bins 1'] = (camera['Weight_bins']== '0-750').astype(int)
camera['Weight_bins 2'] = (camera['Weight_bins']== '1000-1500').astype(int)

In [None]:
labels = ['0-30','30-60','60-90']
camera['Macro_bins'] = pd.cut(camera['Macro focus range'],3,right=True,labels=labels)

In [None]:
camera['Macro_bins 1'] = (camera['Macro_bins']== '0-30').astype(int)
camera['Macro_bins 2'] = (camera['Macro_bins']== '30-60').astype(int)

In [None]:
labels = ['0-25','25-35','35-53']
camera['Zoom_w_bins'] = pd.cut(camera['Zoom wide (W)'],3,right=True,labels=labels)

In [None]:
camera['Zoom_w_bins 1'] = (camera['Zoom_w_bins']== '0-25').astype(int)
camera['Zoom_w_bins 2'] = (camera['Zoom_w_bins']== '25-35').astype(int)

In [None]:
camera.head(3)

In [None]:
X = camera.drop(['Release date','Price','Effective pixels','Avg focus','Normal focus range','Macro focus range','Epix 1','Storage included','Dimensions','Low resolution','Weight (inc. batteries)','Weight_bins','Macro_bins','Zoom_w_bins'],axis=1)
y = camera['Price']

In [None]:
Xtrain,Xtest,ytrain,ytest = train_test_split(X,y,random_state=1,test_size=0.19)
lin = LinearRegression()
lin.fit(Xtrain,ytrain)

In [None]:
y_pred = lin.predict(Xtest)

In [None]:
np.sqrt(metrics.mean_squared_error(ytest,y_pred))

In [None]:
df = pd.DataFrame({})
df['Price'] = ytest
df['Predicted'] = y_pred
df['ERROR'] = df['Price'] - df['Predicted']
df.head(15)

In [None]:
df['ERROR'].describe()

In [None]:
lin.intercept_

In [None]:
lin.coef_

In [None]:
plt.subplots(figsize=(10,5))
plt.scatter(df['Price'],df['ERROR'],color='black')
plt.xlabel('Price',fontsize=18,fontweight='bold',color='navy')
plt.ylabel('Error',fontsize=18,fontweight='bold',color='navy')
plt.show()