In [16]:
# =============================
# STEP 1: Upload Files
# =============================
from google.colab import files
import pandas as pd

# Upload train.csv, test.csv, data_description.txt
uploaded = files.upload()

# Load datasets
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

# =============================
# STEP 2: Select Features & Target
# =============================
features = ["GrLivArea", "BedroomAbvGr", "FullBath"]
target = "SalePrice"

X_train = train[features]
y_train = train[target]
X_test = test[features]

# =============================
# STEP 3: Handle Missing Values (if any)
# =============================
X_train = X_train.fillna(0)
X_test = X_test.fillna(0)

# =============================
# STEP 4: Train Model
# =============================
from sklearn.linear_model import LinearRegression

model = LinearRegression()
model.fit(X_train, y_train)

print("✅ Model trained successfully")
print("Coefficients:", model.coef_)
print("Intercept:", model.intercept_)

# =============================
# STEP 5: Predict on Test Data
# =============================
test_preds = model.predict(X_test)

submission = pd.DataFrame({
    "Id": test["Id"],
    "SalePrice": test_preds
})

# Save submission file
submission.to_csv("submission.csv", index=False)
print("\n✅ Predictions saved as submission.csv")

# =============================
# STEP 6: Download File
# =============================
files.download("submission.csv")

# Show first few predictions
print(submission.head())


Saving test.csv to test (1).csv
✅ Model trained successfully
Coefficients: [   110.06172639 -27859.33222353  29694.68839062]
Intercept: 47509.48218946537

✅ Predictions saved as submission.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

     Id      SalePrice
0  1461  120100.812977
1  1462  139898.208279
2  1463  202611.414586
3  1464  199859.871426
4  1465  192059.204300


In [8]:
# 1. Import libraries
import pandas as pd

sample = pd.DataFrame([[2000, 3, 2]], columns=['GrLivArea', 'BedroomAbvGr', 'FullBath'])
print("Predicted Price:", model.predict(sample)[0])

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# 2. Load dataset
df = pd.read_csv("train.csv")   # uploaded file
print("Dataset shape:", df.shape)
print(df.head())

# 3. Select features and target
X = df[['GrLivArea', 'BedroomAbvGr', 'FullBath']]
y = df['SalePrice']

# 4. Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 5. Train model
model = LinearRegression()
model.fit(X_train, y_train)

# 6. Predictions
y_pred = model.predict(X_test)

# 7. Evaluate model
print("R² Score:", r2_score(y_test, y_pred))
print("MSE:", mean_squared_error(y_test, y_pred))

# 8. Try prediction
sample = [[2000, 3, 2]]
print("Predicted Price:", model.predict(sample)[0])


Predicted Price: 240377.51479736282
Dataset shape: (1460, 81)
   Id  MSSubClass MSZoning  LotFrontage  LotArea Street Alley LotShape  \
0   1          60       RL         65.0     8450   Pave   NaN      Reg   
1   2          20       RL         80.0     9600   Pave   NaN      Reg   
2   3          60       RL         68.0    11250   Pave   NaN      IR1   
3   4          70       RL         60.0     9550   Pave   NaN      IR1   
4   5          60       RL         84.0    14260   Pave   NaN      IR1   

  LandContour Utilities  ... PoolArea PoolQC Fence MiscFeature MiscVal MoSold  \
0         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      2   
1         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      5   
2         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      9   
3         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      2   
4         Lvl    AllPub  ...        0    NaN   NaN         NaN       0     12   

  YrSo

