### Imports

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import statsmodels.api as sm
from statsmodels.formula.api import ols
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from scipy import stats
from sklearn.pipeline import make_pipeline 
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
import warnings
warnings.filterwarnings(action='ignore', category=FutureWarning)
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.metrics import r2_score

In [2]:
df = pd.read_csv('../../data/kc_house_data.csv')
df.head()

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,7129300520,10/13/2014,221900.0,3,1.0,1180,5650,1.0,,NONE,...,7 Average,1180,0.0,1955,0.0,98178,47.5112,-122.257,1340,5650
1,6414100192,12/9/2014,538000.0,3,2.25,2570,7242,2.0,NO,NONE,...,7 Average,2170,400.0,1951,1991.0,98125,47.721,-122.319,1690,7639
2,5631500400,2/25/2015,180000.0,2,1.0,770,10000,1.0,NO,NONE,...,6 Low Average,770,0.0,1933,,98028,47.7379,-122.233,2720,8062
3,2487200875,12/9/2014,604000.0,4,3.0,1960,5000,1.0,NO,NONE,...,7 Average,1050,910.0,1965,0.0,98136,47.5208,-122.393,1360,5000
4,1954400510,2/18/2015,510000.0,3,2.0,1680,8080,1.0,NO,NONE,...,8 Good,1680,0.0,1987,0.0,98074,47.6168,-122.045,1800,7503


####  Split

#####  Pre-split drops.

Dropping rows where the 'sqft_basement' has a value of '?'. 

Dropping the rows where the 'grade' has a value of '3 Poor' and '13 Mansion'. 

Dropping 'id', 'yr_renovated', 'floors', 'yr_built', and 'date'. No use for them and we don't have enough data to use them. 

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21597 entries, 0 to 21596
Data columns (total 21 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             21597 non-null  int64  
 1   date           21597 non-null  object 
 2   price          21597 non-null  float64
 3   bedrooms       21597 non-null  int64  
 4   bathrooms      21597 non-null  float64
 5   sqft_living    21597 non-null  int64  
 6   sqft_lot       21597 non-null  int64  
 7   floors         21597 non-null  float64
 8   waterfront     19221 non-null  object 
 9   view           21534 non-null  object 
 10  condition      21597 non-null  object 
 11  grade          21597 non-null  object 
 12  sqft_above     21597 non-null  int64  
 13  sqft_basement  21597 non-null  object 
 14  yr_built       21597 non-null  int64  
 15  yr_renovated   17755 non-null  float64
 16  zipcode        21597 non-null  int64  
 17  lat            21597 non-null  float64
 18  long  

In [4]:
df.isna().sum()

id                  0
date                0
price               0
bedrooms            0
bathrooms           0
sqft_living         0
sqft_lot            0
floors              0
waterfront       2376
view               63
condition           0
grade               0
sqft_above          0
sqft_basement       0
yr_built            0
yr_renovated     3842
zipcode             0
lat                 0
long                0
sqft_living15       0
sqft_lot15          0
dtype: int64

In [5]:
df['grade'].value_counts()

7 Average        8974
8 Good           6065
9 Better         2615
6 Low Average    2038
10 Very Good     1134
11 Excellent      399
5 Fair            242
12 Luxury          89
4 Low              27
13 Mansion         13
3 Poor              1
Name: grade, dtype: int64

In [6]:
df = df[df.sqft_basement != '?']
df = df[df.grade != '3 Poor']
df = df[df.grade != '13 Mansion']
df = df.drop(['id', 'date', 'yr_renovated', 'floors', 'yr_built'], axis=1) 

#####  Removal of outliers in non-object columns. 

In [7]:
#from pandas.api.types import is_numeric_dtype
#num_col = [c for c in df.columns if is_numeric_dtype(df[c])]
#num_col

In [8]:
#this removes float/int outliers
#def remove_outlier(df):
#    for x in df:
#        q1 = df[x].quantile(0.25)
#        q3 = df[x].quantile(0.75)
#        iqr = q3-q1 #Interquartile range
#        fence_low  = q1-1.5*iqr
#        fence_high = q3+1.5*iqr
#        df = df.filter( (df[x] > fence_low) & (df[x] < fence_high) )
#    return df
#remove_outlier(df)

In [9]:
#def remove_outlier(df_in, *cols):
#    for col in cols:
#        q1 = df_in[col].quantile(0.25)
#        q3 = df_in[col].quantile(0.75)
#        iqr = q3-q1 #Interquartile range
#        fence_low  = q1-1.5*iqr
#        fence_high = q3+1.5*iqr
#        df_in = df_in[(df_in[col] > fence_low)]
#        df_in = df_in[(df_in[col] < fence_high)]
#    return df_in
#remove_outlier(df, num_col)

#####  Split 

Using price as a dependent variable. Assuming our stockholder is a real estate/brokerage company, TBD, who gets a commission on each sale or purchase. 

In [10]:
y = df["price"]
X = df.drop("price", axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

#### Cleaning

In [11]:
df

Unnamed: 0,price,bedrooms,bathrooms,sqft_living,sqft_lot,waterfront,view,condition,grade,sqft_above,sqft_basement,zipcode,lat,long,sqft_living15,sqft_lot15
0,221900.0,3,1.00,1180,5650,,NONE,Average,7 Average,1180,0.0,98178,47.5112,-122.257,1340,5650
1,538000.0,3,2.25,2570,7242,NO,NONE,Average,7 Average,2170,400.0,98125,47.7210,-122.319,1690,7639
2,180000.0,2,1.00,770,10000,NO,NONE,Average,6 Low Average,770,0.0,98028,47.7379,-122.233,2720,8062
3,604000.0,4,3.00,1960,5000,NO,NONE,Very Good,7 Average,1050,910.0,98136,47.5208,-122.393,1360,5000
4,510000.0,3,2.00,1680,8080,NO,NONE,Average,8 Good,1680,0.0,98074,47.6168,-122.045,1800,7503
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21592,360000.0,3,2.50,1530,1131,NO,NONE,Average,8 Good,1530,0.0,98103,47.6993,-122.346,1530,1509
21593,400000.0,4,2.50,2310,5813,NO,NONE,Average,8 Good,2310,0.0,98146,47.5107,-122.362,1830,7200
21594,402101.0,2,0.75,1020,1350,NO,NONE,Average,7 Average,1020,0.0,98144,47.5944,-122.299,1020,2007
21595,400000.0,3,2.50,1600,2388,,NONE,Average,8 Good,1600,0.0,98027,47.5345,-122.069,1410,1287


The column 'waterfront' is a binomial categorical variable. The columns 'view', 'condition', and 'grade' are categorical variables. They will need to be converted into a model-able formats. 

The column 'sqft_basement' is numerical in value, however, contains N/As in the form of '?'s. These need to be addressed, dropped or filled in, and converted to a model-able format.

In addition there may be interactions between columns like 'view' and 'waterfront' or any of the 'sqft's. 

##### Waterfront

The column contains 1756 na's, 14,330 NO's and 111 Yes's. I'm assuming na's are NO's so I will be filling them in as such.

In [12]:
X_train['waterfront'].fillna('NO', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(


In [13]:
X_test['waterfront'].fillna('NO', inplace=True)

In [14]:
waterfront_train = X_train[['waterfront']]

encoder_waterfront = OrdinalEncoder()

encoder_waterfront.fit(waterfront_train)

encoder_waterfront.categories_[0]

waterfront_encoded_train = encoder_waterfront.transform(waterfront_train)

waterfront_encoded_train = waterfront_encoded_train.flatten()

X_train["waterfront"] = waterfront_encoded_train

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train["waterfront"] = waterfront_encoded_train


In [15]:
waterfront_test = X_test[['waterfront']]

encoder_waterfront = OrdinalEncoder()

encoder_waterfront.fit(waterfront_test)

encoder_waterfront.categories_[0]

waterfront_encoded_test = encoder_waterfront.transform(waterfront_test)

waterfront_encoded_test = waterfront_encoded_test.flatten()

X_test["waterfront"] = waterfront_encoded_test

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test["waterfront"] = waterfront_encoded_test


##### View

The column 'view' has 5 categories; NONE, AVERAGE, GOOD, FAIR, and EXCELLENT. 49 values are na's. I'm assuming na's are NONE so I will be filling them in as such.

In [16]:
X_train['view'].fillna("NONE", inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(


In [17]:
view_train = X_train[["view"]]

ohe = OneHotEncoder(categories="auto", sparse=False, handle_unknown="ignore")

ohe.fit(view_train)

view_encoded_train = ohe.transform(view_train)

view_encoded_train = pd.DataFrame(
    view_encoded_train,
    columns=ohe.categories_[0],
    index=X_train.index
)
X_train.drop("view", axis=1, inplace=True)

X_train = pd.concat([X_train, view_encoded_train], axis=1)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [18]:
#def ohe(df, *cols):
#    temp_df = df
#    for col in cols:
#       encoder = OneHotEncoder(handle_unknown='ignore')
#        fitted_arr = encoder.fit_transform(temp_df[[col]]).toarray()
#        col_names = [f"{col}_{f}" for f in encoder.get_feature_names()]
#        encoded_values = pd.DataFrame(fitted_arr, columns=col_names)
#        temp_df = temp_df.join(encoded_values)
#        temp_df.drop(col, axis=1, inplace=True)
#        df = temp_df
#    return df
#X_train = ohe(X_train, "view")

In [19]:
X_test['view'].fillna("NONE", inplace=True)
#X_test = ohe(X_test, "view")

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().fillna(


In [20]:
view_test = X_test[["view"]]
view_encoded_test = ohe.transform(view_test)
view_encoded_test = pd.DataFrame(
    view_encoded_test,
    columns=ohe.categories_[0],
    index=X_test.index
)
X_test.drop("view", axis=1, inplace=True)
X_test = pd.concat([X_test, view_encoded_test], axis=1)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


##### Condition

The column 'condition' has 5 categories; Average, Good, Very Good, Fair, and Poor. 

In [21]:
condition_train = X_train[["condition"]]

ohe = OneHotEncoder(categories="auto", sparse=False, handle_unknown="ignore")

ohe.fit(condition_train)

condition_encoded_train = ohe.transform(condition_train)

condition_encoded_train = pd.DataFrame(
    condition_encoded_train,
    columns=ohe.categories_[0],
    index=X_train.index
)

X_train.drop("condition", axis=1, inplace=True)

X_train = pd.concat([X_train, condition_encoded_train], axis=1)

In [22]:
condition_train

Unnamed: 0,condition
5622,Average
10020,Good
8319,Good
1360,Good
119,Good
...,...
11529,Average
12223,Average
5495,Average
876,Average


In [23]:
condition_test = X_test[["condition"]]
condition_encoded_test = ohe.transform(condition_test)
condition_encoded_test = pd.DataFrame(
    condition_encoded_test,
    columns=ohe.categories_[0],
    index=X_test.index
)
X_test.drop("condition", axis=1, inplace=True)
X_test = pd.concat([X_test, condition_encoded_test], axis=1)

In [24]:
#X_train = ohe(X_train, "condition")

In [25]:
#X_test = ohe(X_test, "condition")

##### Grade

The column 'grade' has eleven categories; 13 Mansion, 12 Luxury, 11 Excellent, 10 Very Good, 9 Better, 8 Good, 7 Average, 6 Low Average, 5 Fair, 4 Low, and 3 Poor. Some of these categories have very few counts and may need to be dropped.

In [26]:
grade_train = X_train[["grade"]]

ohe = OneHotEncoder(categories="auto", sparse=False, handle_unknown="ignore")

ohe.fit(grade_train)

grade_encoded_train = ohe.transform(grade_train)

grade_encoded_train = pd.DataFrame(
    grade_encoded_train,
    columns=ohe.categories_[0],
    index=X_train.index
)

X_train.drop("grade", axis=1, inplace=True)

X_train = pd.concat([X_train, grade_encoded_train], axis=1)

In [27]:
grade_test = X_test[["grade"]]
grade_encoded_test = ohe.transform(grade_test)
grade_encoded_test = pd.DataFrame(
    grade_encoded_test,
    columns=ohe.categories_[0],
    index=X_test.index
)
X_test.drop("grade", axis=1, inplace=True)
X_test = pd.concat([X_test, grade_encoded_test], axis=1)

In [28]:
#X_train = ohe(X_train, "grade")

In [29]:
#X_test = ohe(X_test, "grade")

##### sqft_basement

The column 'sqft_basement' was numerical data with the exception of the na column as '?', which made the column an object type. We chose to drop these rows because it was only 340. We did so above, which made the column an integer type.

##### Zipcode

May need to use 'zipcode' as a category. 

In [30]:
zipcode_train = X_train[["zipcode"]]

ohe = OneHotEncoder(categories="auto", sparse=False, handle_unknown="ignore")

ohe.fit(zipcode_train)

zipcode_encoded_train = ohe.transform(zipcode_train)

zipcode_encoded_train = pd.DataFrame(
    zipcode_encoded_train,
    columns=ohe.categories_[0],
    index=X_train.index
)

X_train.drop("zipcode", axis=1, inplace=True)

X_train = pd.concat([X_train, zipcode_encoded_train], axis=1)

In [31]:
zipcode_test = X_test[["zipcode"]]
zipcode_encoded_test = ohe.transform(zipcode_test)
zipcode_encoded_test = pd.DataFrame(
    zipcode_encoded_test,
    columns=ohe.categories_[0],
    index=X_test.index
)
X_test.drop("zipcode", axis=1, inplace=True)
X_test = pd.concat([X_test, zipcode_encoded_test], axis=1)

##### Bathrooms

In [32]:
bathrooms_train = X_train[["bathrooms"]]

ohe = OneHotEncoder(categories="auto", sparse=False, handle_unknown="ignore")

ohe.fit(bathrooms_train)

bathrooms_encoded_train = ohe.transform(bathrooms_train)

bathrooms_encoded_train = pd.DataFrame(
    bathrooms_encoded_train,
    columns=ohe.categories_[0],
    index=X_train.index
)

X_train.drop("bathrooms", axis=1, inplace=True)

X_train = pd.concat([X_train, bathrooms_encoded_train], axis=1)

In [33]:
bathrooms_test = X_test[["bathrooms"]]
bathrooms_encoded_test = ohe.transform(bathrooms_test)
bathrooms_encoded_test = pd.DataFrame(
    bathrooms_encoded_test,
    columns=ohe.categories_[0],
    index=X_test.index
)
X_test.drop("bathrooms", axis=1, inplace=True)
X_test = pd.concat([X_test, bathrooms_encoded_test], axis=1)

### Model test

In [34]:
model = LinearRegression()

model.fit(X_train, y_train)


LinearRegression()

In [35]:
model.score(X_train, y_train)

0.8365047715011577

In [36]:
model.score(X_test, y_test)

-38.68296268278333

In [37]:
pipe = make_pipeline(StandardScaler(), LinearRegression())
pipe.fit(X_train, y_train)  # apply scaling on training data
pipe.score(X_test, y_test)

-1.6675732755341317e+21

In [38]:
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [39]:
train_preds = model.predict(X_train_scaled)

test_preds = model.predict(X_test_scaled)


In [40]:
print("Training Metrics:")
# R2
print(f"R2: {r2_score(y_train, train_preds):.3f}")
# MAE
print(f"Mean Absolute Error: {mean_absolute_error(y_train, train_preds):.3f}")
# MSE
print(f"Mean Squared Error: {mean_squared_error(y_train, train_preds):.3f}")
# RMSE - just MSE but set squared=False
print(f"Root Mean Squared Error: {mean_squared_error(y_train, train_preds, squared=False):.3f}")

Training Metrics:
R2: -1270772965988.699
Mean Absolute Error: 318532537894.569
Mean Squared Error: 159598365601188215259136.000
Root Mean Squared Error: 399497641546.465


In [41]:
print("Testing Metrics:")
# R2
print(f"R2: {r2_score(y_test, test_preds):.3f}")
# MAE
print(f"Mean Absolute Error: {mean_absolute_error(y_test, test_preds):.3f}")
# MSE
print(f"Mean Squared Error: {mean_squared_error(y_test, test_preds):.3f}")
# RMSE - just MSE but set squared=False
print(f"Root Mean Squared Error: {mean_squared_error(y_test, test_preds, squared=False):.3f}")

Testing Metrics:
R2: -1290263374725.180
Mean Absolute Error: 323090507579.714
Mean Squared Error: 171270987257510843383808.000
Root Mean Squared Error: 413848990886.182


In [42]:
y = y_train
x = X_train
x = sm.add_constant(x)
model = sm.OLS(y, x.astype(float)).fit()
model.summary()

0,1,2,3
Dep. Variable:,price,R-squared:,0.837
Model:,OLS,Adj. R-squared:,0.835
Method:,Least Squares,F-statistic:,670.5
Date:,"Tue, 29 Mar 2022",Prob (F-statistic):,0.0
Time:,14:39:12,Log-Likelihood:,-210620.0
No. Observations:,15846,AIC:,421500.0
Df Residuals:,15725,BIC:,422400.0
Df Model:,120,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-2.051e+07,4.22e+06,-4.865,0.000,-2.88e+07,-1.22e+07
bedrooms,-7529.2328,1625.140,-4.633,0.000,-1.07e+04,-4343.772
sqft_living,91.9782,1.954,47.062,0.000,88.147,95.809
sqft_lot,0.2589,0.040,6.483,0.000,0.181,0.337
waterfront,5.995e+05,1.76e+04,34.139,0.000,5.65e+05,6.34e+05
sqft_above,60.1243,1.880,31.985,0.000,56.440,63.809
sqft_basement,31.8456,2.084,15.282,0.000,27.761,35.930
lat,2.07e+05,6.68e+04,3.101,0.002,7.62e+04,3.38e+05
long,-1.859e+05,4.92e+04,-3.777,0.000,-2.82e+05,-8.94e+04

0,1,2,3
Omnibus:,11377.099,Durbin-Watson:,2.025
Prob(Omnibus):,0.0,Jarque-Bera (JB):,1495088.419
Skew:,2.599,Prob(JB):,0.0
Kurtosis:,50.301,Cond. No.,3e+17


In [43]:
y = y_test
x = X_test
x = sm.add_constant(x)
model = sm.OLS(y, x.astype(float)).fit()
model.summary()

0,1,2,3
Dep. Variable:,price,R-squared:,0.844
Model:,OLS,Adj. R-squared:,0.84
Method:,Least Squares,F-statistic:,236.0
Date:,"Tue, 29 Mar 2022",Prob (F-statistic):,0.0
Time:,14:39:12,Log-Likelihood:,-70249.0
No. Observations:,5283,AIC:,140700.0
Df Residuals:,5164,BIC:,141500.0
Df Model:,118,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,-2.339e+07,7.14e+06,-3.275,0.001,-3.74e+07,-9.39e+06
bedrooms,-1.338e+04,3024.507,-4.423,0.000,-1.93e+04,-7447.503
sqft_living,89.9923,3.591,25.060,0.000,82.952,97.032
sqft_lot,0.3239,0.077,4.230,0.000,0.174,0.474
waterfront,5.912e+05,3.09e+04,19.127,0.000,5.31e+05,6.52e+05
sqft_above,66.4298,3.340,19.890,0.000,59.882,72.977
sqft_basement,23.5639,3.674,6.414,0.000,16.361,30.766
lat,2.948e+05,1.15e+05,2.554,0.011,6.85e+04,5.21e+05
long,-1.725e+05,7.96e+04,-2.168,0.030,-3.29e+05,-1.65e+04

0,1,2,3
Omnibus:,2966.367,Durbin-Watson:,2.005
Prob(Omnibus):,0.0,Jarque-Bera (JB):,114372.771
Skew:,2.048,Prob(JB):,0.0
Kurtosis:,25.423,Cond. No.,1.41e+18


In [44]:
X_train

Unnamed: 0,bedrooms,sqft_living,sqft_lot,waterfront,sqft_above,sqft_basement,lat,long,sqft_living15,sqft_lot15,...,4.75,5.0,5.25,5.5,5.75,6.0,6.25,6.5,6.75,7.5
5622,2,950,4000,0.0,950,0.0,47.5728,-122.312,1480,4000,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10020,3,1250,4120,0.0,980,270.0,47.6850,-122.360,1250,4120,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8319,3,2360,7291,0.0,1360,1000.0,47.5274,-122.384,1860,5499,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1360,3,2330,11424,0.0,2330,0.0,47.6386,-122.110,2050,11448,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
119,4,1610,2982,0.0,1610,0.0,47.5870,-122.294,1610,4040,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11529,3,2020,9718,0.0,2020,0.0,47.5740,-122.210,2370,8604,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12223,4,3720,8633,0.0,3720,0.0,47.6085,-122.013,3515,9660,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5495,4,2240,8322,0.0,2240,0.0,47.7280,-122.206,2240,6448,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
876,3,1910,11576,0.0,1410,500.0,47.7356,-122.198,2040,8750,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
