

Task details
Write a method named analyse_and_fit_lrm()  which takes one argument 
(a path to a dataset) and returns a dictionary of length 2 with the
following objects (the order and names of the objects should be the same as below):

summary_dict  – a dictionary of length 3 with the following elements:
    
    statistics  – a list of numbers of length 5 with mean, standard deviation, median, minimum and maximum for a variable                     Tax for all houses with two bathrooms and four bedrooms.

    data_frame  – a data frame with observations for which Space  is bigger than 800, ordered by decreasing Price
    
    number_of_observations - a numeric value corresponding to the number of observations for which the value of a                                 variable LOT  is equal to or bigger than the 4th 5-quantile of this variable.


regression_dict- a dictionary of length 2 with the following elements:

	model_parameters - a dictionary of length 9 with the model parameters. The first key of the dictionary should be named                      Intercept , and all other keys should have the same name as the respective variable.
	price_prediction -  – a numeric value which corresponds to the prediction of the price (using the applied model) for a                           house with the following specific parameters: three bedrooms; 1500 square feet of space;                                   eight rooms; width of lot is 40; $1000 tax; two bathrooms; one space in the garage; house is in                             bad condition.

In [113]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from numpy.linalg import inv
from numpy import matmul
from collections import OrderedDict

class AnalysisDataAndFitLinearRegression:

    def __init__(self):
        self.version = 1


    def fit_sklearn(df, target):
        X = df.drop(columns=[target])
        y = df[target]
        reg = LinearRegression(fit_intercept=True).fit(X, y)        
        return reg

    @staticmethod    
    def fit(df, target):
        #print(AnalysisDataAndFitLinearRegression.fit_sklearn(df,target))
        df = df.assign(Intercept=1)
        inputs = df.drop(columns=[target])
        keys = inputs.columns.insert(0, 'Intercept')
        X = inputs.values
        y = df[target]
        X_t = X.transpose()
        betas = matmul(matmul(inv(matmul(X_t, X)), X_t), y)
        betas_keyed = OrderedDict(zip(keys, betas))
        # print(f"\n Analytical Betas:\n{betas_keyed}")
        return betas_keyed

    @staticmethod
    def tax_stats(df, stats=('mean', 'std', '50%', 'min', 'max')):
        filtered = df[(df['Bathroom'] == 2) & (df['Bedroom'] == 4)]
        desc = filtered['Tax'].describe()
        return [desc[s] for s in stats]

    @staticmethod
    def predict(coefficients: OrderedDict, house: dict):
        price = 0
        for name, value in house.items():
            price = price + (coefficients[name] * value)
        return price

    def analyse_and_fit_lrm(self, path):
        data = pd.read_csv(path)
        df = self.__listwise_deletion(data)
        coefficients = self.fit(df, 'Price')
        summary_dict = {
            'statistics': self.tax_stats(data),
            'data_frame': data[data['Space'] > 800],
            'number_of_observations': len(data[data['Lot'] >= data['Lot'].quantile(.8)])
        }

        house = {
            'Intercept':1,
            'Bedroom': 3,
            'Space': 1500,
            'Room': 8,
            'Lot': 40,
            'Tax': 1000,
            'Bathroom': 2,
            'Garage': 1,
            'Condition': 0
        }

        regression_dict = {
            'model_parameters': coefficients.values(),
            'price_prediction': self.predict(coefficients, house)
        }

        return {
            'summary_dict': summary_dict,
            'regression_dict': regression_dict
        }

    def __listwise_deletion(self, data: pd.DataFrame):
        return data.dropna()



model = AnalysisDataAndFitLinearRegression()
result = model.analyse_and_fit_lrm("realest.csv")

print(result['summary_dict']['statistics'])
print(result['regression_dict']['price_prediction'])

TypeError: 'AnalysisDataAndFitLinearRegression' object is not subscriptable

In [44]:
import numpy as np
import pandas as pd

df = pd.read_csv("realest.csv")
df.head()

Unnamed: 0,Price,Bedroom,Space,Room,Lot,Tax,Bathroom,Garage,Condition
0,53.0,2.0,967.0,5.0,39.0,652.0,1.5,0.0,0.0
1,55.0,2.0,815.0,5.0,33.0,1000.0,1.0,2.0,1.0
2,56.0,3.0,900.0,5.0,35.0,897.0,1.5,1.0,0.0
3,58.0,3.0,1007.0,6.0,24.0,964.0,1.5,2.0,0.0
4,64.0,3.0,1100.0,7.0,50.0,1099.0,1.5,1.5,0.0


df1

In [45]:
df.isnull().sum()

Price         1
Bedroom       1
Space        11
Room          1
Lot          11
Tax          10
Bathroom      1
Garage        1
Condition     1
dtype: int64

In [97]:
df.describe()


Unnamed: 0,Price,Bedroom,Space,Room,Lot,Tax,Bathroom,Garage,Condition
count,128.0,128.0,128.0,128.0,128.0,128.0,128.0,128.0,128.0
mean,56.71875,3.195312,1109.382812,6.53125,32.664062,925.476562,1.488281,0.8125,0.226562
std,13.348708,1.392096,480.844011,1.724933,8.630907,435.174559,0.548674,0.810852,0.420252
min,32.0,1.0,539.0,4.0,24.0,418.0,1.0,0.0,0.0
25%,46.0,2.0,809.75,5.0,25.0,679.75,1.0,0.0,0.0
50%,56.0,3.0,963.5,6.0,30.0,855.5,1.5,1.0,0.0
75%,64.0,4.0,1199.5,7.0,35.5,1039.75,2.0,1.5,0.0
max,90.0,8.0,2295.0,12.0,50.0,2752.0,3.0,2.0,1.0


In [58]:
df.dropna(inplace=True)

In [59]:
df.isnull().sum()

Price        0
Bedroom      0
Space        0
Room         0
Lot          0
Tax          0
Bathroom     0
Garage       0
Condition    0
dtype: int64

In [None]:
df.insert()


In [60]:

#independent
x=df.drop(['Price'],axis=1)

#dependent
y=df['Price']

In [61]:
from sklearn.model_selection import train_test_split
xtrain, xtest,ytrain, ytest = train_test_split(x,y,test_size=0.2)

In [99]:
from sklearn.linear_model import LinearRegression
lm=LinearRegression()
lm.fit(xtrain,ytrain)
lm.intercept_
lm.coef_

array([-4.01202593e+00,  1.13781285e-02,  2.44034847e+00,  3.08402255e-01,
        2.33475032e-03,  4.51416213e+00,  5.04014935e+00,  5.90693518e-01])

In [67]:
xtest.head()

Unnamed: 0,Bedroom,Space,Room,Lot,Tax,Bathroom,Garage,Condition
47,2.0,539.0,5.0,50.0,721.0,1.0,0.0,0.0
155,4.0,951.0,7.0,30.0,895.0,2.0,1.0,0.0
120,2.0,1107.0,7.0,30.0,722.0,2.0,0.0,1.0
1,2.0,815.0,5.0,33.0,1000.0,1.0,2.0,1.0
67,3.0,715.0,5.0,25.0,815.0,1.0,0.0,0.0


In [76]:
prediction = lm.predict(xtest)
xtest.shape

(26, 8)

In [85]:
#house = np [3,1500, 8, 40,1000, 2,1,0]
#house.shape

house = {
    'Bedroom': 3,
    'Space': 1500,
    'Room': 8,
    'Lot': 40,
    'Tax': 1000,
    'Bathroom': 2,
    'Garage': 1,
    'Condition': 0
}


valhouse= house.values()



In [91]:
a = np.array([3,1500, 8, 40,1000, 2,1,0])

predictionHouse = lm.predict(a.reshape(1,-1))

In [92]:
predictionHouse

array([70.77210665])

In [None]:

#[ 2.08654086e+03  6.56429857e+00 -1.27599216e+04  7.80331827e+01
#  9.65383477e+01  9.19801647e+03  1.78615633e+04 -3.32879735e+04]