In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

In [15]:
class AnalysisDataAndFitLinearRegression:
    def analyse_and_fit_lrm(self, path):
        df = pd.read_csv(path)


        filtered = df[(df["Bathroom"] == 2) & (df["Bedroom"] == 4)]

        statistics = [
            float(filtered["Tax"].mean()),
            float(filtered["Tax"].std()),
            float(filtered["Tax"].median()),
            float(filtered["Tax"].min()),
            float(filtered["Tax"].max())
        ]

        data_frame = (
            df[df["Space"] > 800]
            .sort_values(by="Price", ascending=False)
        )

        q4 = df["Lot"].quantile(0.8)   # 4th 5-quantile (80th percentile)
        number_of_observations = int((df["Lot"] >= q4).sum())

        summary_dict = {
            "statistics": statistics,
            "data_frame": data_frame,
            "number_of_observations": number_of_observations
        }


        df_clean = df.dropna()


        predictors = [
            "Bedroom",
            "Space",
            "Room",
            "Lot",
            "Tax",
            "Bathroom",
            "Garage",
            "Condition"
        ]

        X = df_clean[predictors]
        y = df_clean["Price"]

        lr = LinearRegression()
        lr.fit(X, y)

        lr_parameters = {"Intercept": float(round(lr.intercept_, 2))}

        for name, coef in zip(predictors, lr.coef_):
            lr_parameters[name] = float(round(coef, 2))

        new_house = np.array([[3, 1500, 8, 40, 1000, 2, 1, 0]])
        price_prediction = float(round(lr.predict(new_house)[0], 2))


        regression_dict = {
            "model_parameters": lr_parameters,
            "price_prediction": price_prediction
        }

        return {
            "summary_dict": summary_dict,
            "regression_dict": regression_dict
        }

In [16]:
analysis = AnalysisDataAndFitLinearRegression()
result = analysis.analyse_and_fit_lrm("realest.csv")
result["summary_dict"]



{'statistics': [2806.875, 1115.5490053481888, 2680.0, 1390.0, 5320.0],
 'data_frame':       Price  Bedroom   Space  Room   Lot     Tax  Bathroom  Garage  Condition
 49   459259      NaN  2316.0   6.0  74.0  3190.0       1.0     NaN        1.0
 266  432846      NaN  2178.0   6.0  53.0  3240.0       1.0     0.0        0.0
 242  427215      1.0  2209.0   5.0  73.0  2510.0       1.0     0.0        0.0
 333  426541      5.0  2042.0   9.0  52.0  3620.0       3.0     2.0        NaN
 336  425668      5.0  2257.0  10.0  73.0  3650.0       2.0     2.0        1.0
 ..      ...      ...     ...   ...   ...     ...       ...     ...        ...
 13   191121      4.0  1090.0  10.0  38.0  1540.0       3.0     1.0        0.0
 149  187231      1.0   903.0   5.0  56.0  1710.0       2.0     0.0        0.0
 198  176881      2.0   936.0   5.0  47.0  1310.0       2.0     1.0        0.0
 284  172642      NaN   913.0   6.0  43.0  2150.0       1.0     0.0        0.0
 43   160051      2.0   946.0   5.0   NaN  110

In [17]:
result["regression_dict"]

{'model_parameters': {'Intercept': 45991.02,
  'Bedroom': 554.91,
  'Space': 110.97,
  'Room': 4689.71,
  'Lot': 472.91,
  'Tax': 3.76,
  'Bathroom': 4000.42,
  'Garage': 5442.16,
  'Condition': 18082.99},
 'price_prediction': 287739.96}