In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder

# Load the dataset
data = pd.read_csv('data.csv')

# Select specific columns for training
# Encode the 'location' column
label_encoder = LabelEncoder()
data['location'] = label_encoder.fit_transform(data['location'])


# Select specific columns for training
X = data[['bedrooms', 'bathrooms', 'sqft', 'location']]
y = data['price']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Print the model coefficients
print("Coefficients:", model.coef_)
print("Intercept:", model.intercept_)
# Calculate accuracy on the training set
train_score = model.score(X_train, y_train)
print("Training set accuracy:", train_score)

# Calculate accuracy on the test set
test_score = model.score(X_test, y_test)
print("Test set accuracy:", test_score)


Coefficients: [-105510.55942228   37474.81687134     427.52321855    1784.78454488]
Intercept: 30319.39293688361
Training set accuracy: 0.9850239945752916
Test set accuracy: 0.9533146378521392


In [2]:
import joblib

# Save the trained model to a file
joblib.dump(model, 'models/trained_model.pkl')
# Save the encoder
joblib.dump(label_encoder, 'models/location_encoder.pkl')

['models/location_encoder.pkl']