-
Notifications
You must be signed in to change notification settings - Fork 49
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 66d1014
Showing
24 changed files
with
3,469 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
import matplotlib.pyplot as plt | ||
|
||
x = [[6], [8], [10], [14], [18], [21]] | ||
y = [[7], [9], [13], [17.5], [18], [21]] | ||
|
||
plt.figure() | ||
plt.title('Pizza Price statistics') | ||
plt.xlabel('Diameter') | ||
plt.ylabel('dollar price') | ||
plt.plot(x,y,'.') | ||
plt.axis([0,25,0,25]) | ||
plt.grid(True) | ||
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
import matplotlib.pyplot as plt | ||
from sklearn.linear_model import LinearRegression | ||
|
||
x = [[6], [8], [10], [14], [18]] | ||
y = [[7], [9], [13], [17.5], [18]] | ||
|
||
model = LinearRegression() | ||
model.fit(x,y) | ||
|
||
plt.figure() | ||
plt.title('Pizza Price Predictions') | ||
plt.xlabel('Diameter') | ||
plt.ylabel('dollar price') | ||
plt.plot(x,y,'.') | ||
plt.plot(x,model.predict(x),'--') | ||
plt.axis([0,25,0,25]) | ||
plt.grid(True) | ||
plt.show() | ||
|
||
print (model.predict([[21]])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
from sklearn.linear_model import LinearRegression | ||
import numpy as np | ||
from numpy.linalg import inv,lstsq | ||
from numpy import dot, transpose | ||
|
||
x = [[6], [8], [10], [14], [18]] | ||
y = [[7], [9], [13], [17.5], [18]] | ||
|
||
model = LinearRegression() | ||
model.fit(x,y) | ||
|
||
print ('Residual sum of squares = ',np.mean((model.predict(x)- y) ** 2)) | ||
print ('Variance = ',np.var([6, 8, 10, 14, 18], ddof=1)) | ||
print ('Co-variance = ',np.cov([6, 8, 10, 14, 18], [7, 9, 13, 17.5, 18])[0][1]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
from sklearn.linear_model import LinearRegression | ||
import numpy as np | ||
from numpy.linalg import inv,lstsq | ||
from numpy import dot, transpose | ||
|
||
x = [[6], [8], [10], [14], [18]] | ||
y = [[7], [9], [13], [17.5], [18]] | ||
model = LinearRegression() | ||
model.fit(x,y) | ||
|
||
x_test = [[8], [9], [11], [16], [12]] | ||
y_test = [[11], [8.5], [15], [18], [11]] | ||
|
||
print ('R-squared score = ',model.score(x_test, y_test)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
from sklearn.linear_model import LinearRegression | ||
from numpy.linalg import lstsq | ||
|
||
x = [[6, 2], [8, 1], [10, 0], [14, 2], [18, 0]] | ||
y = [[7], [9], [13], [17.5], [18]] | ||
model = LinearRegression() | ||
model.fit(x,y) | ||
|
||
x1 = [[8, 2], [9, 0], [11, 2], [16, 2], [12, 0]] | ||
y1 = [[11], [8.5], [15], [18], [11]] | ||
|
||
predictions = model.predict(x1) | ||
for i, prediction in enumerate(predictions): | ||
print ((prediction, y1[i])) | ||
|
||
print (lstsq(x, y, rcond=None)[0]) | ||
|
||
print ('R-squared score = ',model.score(x1, y1)) | ||
|
||
|
||
|
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
import pandas as pd | ||
|
||
# data can be downloaded from the url: https://www.kaggle.com/vikrishnan/boston-house-prices | ||
df = pd.read_csv('./06_input_data.csv') | ||
|
||
# Understanding data | ||
print (df.shape) | ||
print (df.columns) | ||
print(df.head(5)) | ||
print(df.info()) | ||
print(df.describe()) | ||
print(df.groupby('LotShape').size()) | ||
|
||
# Dropping null value columns which cross the threshold | ||
a = df.isnull().sum() | ||
print (a) | ||
b = a[a>(0.05*len(a))] | ||
print (b) | ||
df = df.drop(b.index, axis=1) | ||
print (df.shape) | ||
|
||
# Replacing null value columns (text) with most used value | ||
a1 = df.select_dtypes(include=['object']).isnull().sum() | ||
print (a1) | ||
print (a1.index) | ||
for i in a1.index: | ||
b1 = df[i].value_counts().index.tolist() | ||
print (b1) | ||
df[i] = df[i].fillna(b1[0]) | ||
|
||
# Replacing null value columns (int, float) with most used value | ||
a2 = df.select_dtypes(include=['integer','float']).isnull().sum() | ||
print (a2) | ||
b2 = a2[a2!=0].index | ||
print (b2) | ||
df = df.fillna(df[b2].mode().to_dict(orient='records')[0]) | ||
|
||
# Creating new columns from existing columns | ||
print (df.shape) | ||
a3 = df['YrSold'] - df['YearBuilt'] | ||
b3 = df['YrSold'] - df['YearRemodAdd'] | ||
df['Years Before Sale'] = a3 | ||
df['Years Since Remod'] = b3 | ||
print (df.shape) | ||
|
||
# Dropping unwanted columns | ||
df = df.drop(["Id", "MoSold", "SaleCondition", "SaleType", "YearBuilt", "YearRemodAdd"], axis=1) | ||
print (df.shape) | ||
|
||
# Dropping columns which has correlation with target less than threshold | ||
target='SalePrice' | ||
x = df.select_dtypes(include=['integer','float']).corr()[target].abs() | ||
print (x) | ||
df=df.drop(x[x<0.4].index, axis=1) | ||
print (df.shape) | ||
|
||
# Checking for the necessary features after dropping some columns | ||
l1 = ["PID","MS SubClass","MS Zoning","Street","Alley","Land Contour","Lot Config","Neighborhood","Condition 1","Condition 2","Bldg Type","House Style","Roof Style","Roof Matl","Exterior 1st","Exterior 2nd","Mas Vnr Type","Foundation","Heating","Central Air","Garage Type","Misc Feature","Sale Type","Sale Condition"] | ||
l2 = [] | ||
for i in l1: | ||
if i in df.columns: | ||
l2.append(i) | ||
|
||
# Getting rid of nominal columns with too many unique values | ||
for i in l2: | ||
len(df[i].unique())>10 | ||
df=df.drop(i, axis=1) | ||
print (df.columns) | ||
|
||
df.to_csv('06_output_data.csv',index=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
import pandas as pd | ||
from sklearn.linear_model import LinearRegression | ||
from sklearn.model_selection import train_test_split,cross_val_score | ||
from sklearn.externals import joblib | ||
from sklearn.metrics import mean_squared_error | ||
import matplotlib.pyplot as plt | ||
from math import sqrt | ||
import os | ||
|
||
df = pd.read_csv('./06_output_data.csv') | ||
|
||
i = list(df.columns.values) | ||
i.pop(i.index('SalePrice')) | ||
df0 = df[i+['SalePrice']] | ||
df = df0.select_dtypes(include=['integer','float']) | ||
print (df.columns) | ||
|
||
X = df[list(df.columns)[:-1]] | ||
y = df['SalePrice'] | ||
X_train, X_test, y_train, y_test = train_test_split(X, y) | ||
regressor = LinearRegression() | ||
regressor.fit(X_train, y_train) | ||
|
||
y_predictions = regressor.predict(X_test) | ||
|
||
meanSquaredError=mean_squared_error(y_test, y_predictions) | ||
rootMeanSquaredError = sqrt(meanSquaredError) | ||
|
||
print("Number of predictions:",len(y_predictions)) | ||
print("Mean Squared Error:", meanSquaredError) | ||
print("Root Mean Squared Error:", rootMeanSquaredError) | ||
print ("Scoring:",regressor.score(X_test, y_test)) | ||
|
||
plt.plot(y_predictions,y_test,'r.') | ||
plt.plot(y_predictions,y_predictions,'k-') | ||
plt.title('Parity Plot - Linear Regression') | ||
plt.show() | ||
|
||
plot = plt.scatter(y_predictions, (y_predictions - y_test), c='b') | ||
plt.hlines(y=0, xmin= 100000, xmax=400000) | ||
plt.title('Residual Plot - Linear Regression') | ||
plt.show() | ||
|
||
joblib.dump(regressor, './07_output_salepricemodel.pkl') |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
{ | ||
"OverallQual":[7], | ||
"TotalBsmtSF":[856], | ||
"1stFlrSF":[856], | ||
"GrLivArea":[1710], | ||
"FullBath":[2], | ||
"TotRmsAbvGrd":[8], | ||
"Fireplaces":[0], | ||
"GarageCars":[2], | ||
"GarageArea":[548], | ||
"Years Before Sale":[5], | ||
"Years Since Remod":[5] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
import os | ||
import json | ||
import pandas as pd | ||
import numpy | ||
from sklearn.externals import joblib | ||
|
||
s = pd.read_json('./08_input.json') | ||
p = joblib.load("./07_output_salepricemodel.pkl") | ||
r = p.predict(s) | ||
print (str(r)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
import os | ||
import json | ||
import pandas as pd | ||
import numpy | ||
from flask import Flask, render_template, request, jsonify | ||
from pandas.io.json import json_normalize | ||
from sklearn.externals import joblib | ||
|
||
app = Flask(__name__) | ||
port = int(os.getenv('PORT', 5500)) | ||
|
||
@app.route('/') | ||
def home(): | ||
return render_template('index.html') | ||
|
||
@app.route('/api/salepricemodel', methods=['POST']) | ||
def salepricemodel(): | ||
if request.method == 'POST': | ||
try: | ||
post_data = request.get_json() | ||
json_data = json.dumps(post_data) | ||
s = pd.read_json(json_data) | ||
p = joblib.load("./07_output_salepricemodel.pkl") | ||
r = p.predict(s) | ||
return str(r) | ||
|
||
except Exception as e: | ||
return (e) | ||
|
||
if __name__ == '__main__': | ||
app.run(host='0.0.0.0', port=port, debug=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
import pandas as pd | ||
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet | ||
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, ExtraTreesRegressor, GradientBoostingRegressor | ||
from sklearn.tree import DecisionTreeRegressor | ||
from sklearn.neural_network import MLPRegressor | ||
from sklearn.model_selection import train_test_split,cross_val_score | ||
from sklearn.externals import joblib | ||
from sklearn.metrics import mean_squared_error | ||
import matplotlib.pyplot as plt | ||
from math import sqrt | ||
import numpy as np | ||
import os | ||
|
||
df = pd.read_csv('./06_output_data.csv') | ||
|
||
i = list(df.columns.values) | ||
i.pop(i.index('SalePrice')) | ||
df0 = df[i+['SalePrice']] | ||
df = df0.select_dtypes(include=['integer','float']) | ||
|
||
X = df[list(df.columns)[:-1]] | ||
y = df['SalePrice'] | ||
X_train, X_test, y_train, y_test = train_test_split(X, y) | ||
|
||
def linear(): | ||
regressor = LinearRegression() | ||
regressor.fit(X_train, y_train) | ||
y_predictions = regressor.predict(X_test) | ||
return (regressor.score(X_test, y_test),sqrt(mean_squared_error(y_test, y_predictions))) | ||
|
||
def ridge(): | ||
regressor = Ridge(alpha=.3, normalize=True) | ||
regressor.fit(X_train, y_train) | ||
y_predictions = regressor.predict(X_test) | ||
return (regressor.score(X_test, y_test),sqrt(mean_squared_error(y_test, y_predictions))) | ||
|
||
def lasso(): | ||
regressor = Lasso(alpha=0.00009, normalize=True) | ||
regressor.fit(X_train, y_train) | ||
y_predictions = regressor.predict(X_test) | ||
return (regressor.score(X_test, y_test),sqrt(mean_squared_error(y_test, y_predictions))) | ||
|
||
def elasticnet(): | ||
regressor = ElasticNet(alpha=1, l1_ratio=0.5, normalize=False) | ||
regressor.fit(X_train, y_train) | ||
y_predictions = regressor.predict(X_test) | ||
return (regressor.score(X_test, y_test),sqrt(mean_squared_error(y_test, y_predictions))) | ||
|
||
def randomforest(): | ||
regressor = RandomForestRegressor(n_estimators=15,min_samples_split=15,criterion='mse',max_depth=None) | ||
regressor.fit(X_train, y_train) | ||
y_predictions = regressor.predict(X_test) | ||
print("Selected Features for RamdomForest",regressor.feature_importances_) | ||
return (regressor.score(X_test, y_test),sqrt(mean_squared_error(y_test, y_predictions))) | ||
|
||
def perceptron(): | ||
regressor = MLPRegressor(hidden_layer_sizes=(5000,), activation='relu', solver='adam', max_iter=1000) | ||
regressor.fit(X_train, y_train) | ||
y_predictions = regressor.predict(X_test) | ||
print("Co-efficients of Perceptron",regressor.coefs_) | ||
return (regressor.score(X_test, y_test),sqrt(mean_squared_error(y_test, y_predictions))) | ||
|
||
def decisiontree(): | ||
regressor = DecisionTreeRegressor(min_samples_split=30,max_depth=None) | ||
regressor.fit(X_train, y_train) | ||
y_predictions = regressor.predict(X_test) | ||
print("Selected Features for DecisionTrees",regressor.feature_importances_) | ||
return (regressor.score(X_test, y_test),sqrt(mean_squared_error(y_test, y_predictions))) | ||
|
||
def adaboost(): | ||
regressor = AdaBoostRegressor(random_state=8, loss='exponential').fit(X_train, y_train) | ||
regressor.fit(X_train, y_train) | ||
y_predictions = regressor.predict(X_test) | ||
print("Selected Features for Adaboost",regressor.feature_importances_) | ||
return (regressor.score(X_test, y_test),sqrt(mean_squared_error(y_test, y_predictions))) | ||
|
||
def extratrees(): | ||
regressor = ExtraTreesRegressor(n_estimators=50).fit(X_train, y_train) | ||
regressor.fit(X_train, y_train) | ||
y_predictions = regressor.predict(X_test) | ||
print("Selected Features for Extratrees",regressor.feature_importances_) | ||
return (regressor.score(X_test, y_test),sqrt(mean_squared_error(y_test, y_predictions))) | ||
|
||
def gradientboosting(): | ||
regressor = GradientBoostingRegressor(loss='ls',n_estimators=500, min_samples_split=15).fit(X_train, y_train) | ||
regressor.fit(X_train, y_train) | ||
y_predictions = regressor.predict(X_test) | ||
print("Selected Features for Gradientboosting",regressor.feature_importances_) | ||
return (regressor.score(X_test, y_test),sqrt(mean_squared_error(y_test, y_predictions))) | ||
|
||
print ("Score, RMSE values") | ||
print ("Linear = ",linear()) | ||
print ("Ridge = ",ridge()) | ||
print ("Lasso = ",lasso()) | ||
print ("ElasticNet = ",elasticnet()) | ||
print ("RandomForest = ",randomforest()) | ||
print ("Perceptron = ",perceptron()) | ||
print ("DecisionTree = ",decisiontree()) | ||
print ("AdaBoost = ",adaboost()) | ||
print ("ExtraTrees = ",extratrees()) | ||
print ("GradientBoosting = ",gradientboosting()) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
,G,H,I,J,K,L,M,N,O,P,Q,R,S,T,U,V,W,X,Y,Z | ||
G,1,-0.52,-0.5,-0.19,-0.11,-0.42,0.78,0.53,0.52,-0.16,-0.15,-0.08,-0.44,0.33,0.39,-0.46,-0.08,0.2,0.57,0.76 | ||
H,-0.52,1,0.99,0.57,0.85,0.98,-0.77,-0.3,-1,-0.41,0.69,0.72,0.78,-0.6,-0.68,-0.07,0.22,0.58,0.06,-0.64 | ||
I,-0.5,0.99,1,0.66,0.87,0.99,-0.74,-0.23,-0.99,-0.36,0.74,0.78,0.76,-0.61,-0.67,-0.03,0.29,0.61,0.09,-0.63 | ||
J,-0.19,0.57,0.66,1,0.71,0.68,-0.23,0.46,-0.57,0.01,0.88,0.87,0.38,-0.48,-0.38,0.12,0.77,0.71,0.45,-0.33 | ||
K,-0.11,0.85,0.87,0.71,1,0.92,-0.34,0.13,-0.85,-0.52,0.8,0.89,0.62,-0.48,-0.54,-0.35,0.33,0.92,0.52,-0.22 | ||
L,-0.42,0.98,0.99,0.68,0.92,1,-0.67,-0.12,-0.98,-0.42,0.8,0.83,0.75,-0.62,-0.69,-0.12,0.36,0.71,0.24,-0.56 | ||
M,0.78,-0.77,-0.74,-0.23,-0.34,-0.67,1,0.62,0.77,0.18,-0.33,-0.27,-0.6,0.59,0.61,-0.23,-0.07,0.02,0.44,0.91 | ||
N,0.53,-0.3,-0.23,0.46,0.13,-0.12,0.62,1,0.3,0.15,0.45,0.42,-0.32,0.02,0.14,-0.11,0.71,0.48,0.82,0.45 | ||
O,0.52,-1,-0.99,-0.57,-0.85,-0.98,0.77,0.3,1,0.41,-0.69,-0.72,-0.78,0.6,0.68,0.07,-0.22,-0.58,-0.06,0.64 | ||
P,-0.16,-0.41,-0.36,0.01,-0.52,-0.42,0.18,0.15,0.41,1,-0.13,-0.29,-0.03,0.34,0.5,0.9,0.26,-0.49,-0.37,0.04 | ||
Q,-0.15,0.69,0.74,0.88,0.8,0.8,-0.33,0.45,-0.69,-0.13,1,0.96,0.51,-0.55,-0.53,0.01,0.8,0.79,0.57,-0.37 | ||
R,-0.08,0.72,0.78,0.87,0.89,0.83,-0.27,0.42,-0.72,-0.29,0.96,1,0.46,-0.57,-0.56,-0.17,0.68,0.89,0.63,-0.27 | ||
S,-0.44,0.78,0.76,0.38,0.62,0.75,-0.6,-0.32,-0.78,-0.03,0.51,0.46,1,-0.1,-0.21,0.26,0.15,0.36,-0.05,-0.4 | ||
T,0.33,-0.6,-0.61,-0.48,-0.48,-0.62,0.59,0.02,0.6,0.34,-0.55,-0.57,-0.1,1,0.86,0.1,-0.38,-0.35,-0.11,0.66 | ||
U,0.39,-0.68,-0.67,-0.38,-0.54,-0.69,0.61,0.14,0.68,0.5,-0.53,-0.56,-0.21,0.86,1,0.23,-0.24,-0.39,-0.14,0.67 | ||
V,-0.46,-0.07,-0.03,0.12,-0.35,-0.12,-0.23,-0.11,0.07,0.9,0.01,-0.17,0.26,0.1,0.23,1,0.29,-0.48,-0.54,-0.35 | ||
W,-0.08,0.22,0.29,0.77,0.33,0.36,-0.07,0.71,-0.22,0.26,0.8,0.68,0.15,-0.38,-0.24,0.29,1,0.44,0.51,-0.24 | ||
X,0.2,0.58,0.61,0.71,0.92,0.71,0.02,0.48,-0.58,-0.49,0.79,0.89,0.36,-0.35,-0.39,-0.48,0.44,1,0.8,0.09 | ||
Y,0.57,0.06,0.09,0.45,0.52,0.24,0.44,0.82,-0.06,-0.37,0.57,0.63,-0.05,-0.11,-0.14,-0.54,0.51,0.8,1,0.42 | ||
Z,0.76,-0.64,-0.63,-0.33,-0.22,-0.56,0.91,0.45,0.64,0.04,-0.37,-0.27,-0.4,0.66,0.67,-0.35,-0.24,0.09,0.42,1 |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Oops, something went wrong.