In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# https://www.statsmodels.org/stable/index.html
import statsmodels.api as sm

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
from functools import partial

In [None]:
from dotenv import load_dotenv

from pathlib import Path

env_path = Path("../../.env-live")

if env_path.exists():
    print('envs Loaded')
    load_dotenv(dotenv_path=env_path)
from jrjModelRegistry.jrjModelRegistry import registerAJrjModel

In [None]:
def generalRegressionPredictor(self, transformedData):
    return self.predict(transformedData)

In [None]:
annArborDf = pd.read_excel("./AnnArbor.xlsx")
# annArborDf = pd.read_excel("https://www.dropbox.com/scl/fi/bkcdp9tpqqh6dfr6phtt8/AnnArbor.xlsx?rlkey=0agfqwc7f0kt7oqb3e2h6q3qs&dl=1")
annArborDf

In [None]:
annArborSampleData = {
    "Beds": [1],
    "Baths": [1.0],
    "Sqft": [500]
}
annArborSampleData

In [None]:
annArborDf.describe()

In [None]:
annArborDf.size

In [None]:
annArborDf.shape

In [None]:
import matplotlib.pyplot as plt

In [None]:
# Plotting
fig1 = plt.figure(
  figsize=(8, 8)
)

In [None]:
plt.scatter(
  annArborDf["Beds"],
  annArborDf["Rent"],
  color='blue',
  alpha=0.9,
  label='Data Points - scatter',
)

plt.xlabel('Beds')
plt.ylabel('Rent')
plt.legend()
plt.grid(True)



plt.show()

In [None]:
plt.scatter(
  annArborDf["Baths"],
  annArborDf["Rent"],
  color='blue',
  alpha=0.9,
  label='Data Points - scatter',
)

plt.xlabel('Baths')
plt.ylabel('Rent')
plt.legend()
plt.grid(True)



plt.show()

In [None]:
plt.scatter(
  annArborDf["Sqft"],
  annArborDf["Rent"],
  color='blue',
  alpha=0.9,
  label='Data Points - scatter',
)

plt.xlabel('Sqft')
plt.ylabel('Rent')
plt.legend()
plt.grid(True)



plt.show()

In [None]:
def annArborRegModel1Transformer(dataForTransfer = None):
    import pandas as pd
    import statsmodels.api as sm
    if isinstance(dataForTransfer, pd.DataFrame):
        df = dataForTransfer.copy()
    else:
        df = pd.DataFrame(dataForTransfer)
    dfTransformer = sm.add_constant(df[['Sqft']],has_constant='add')
    return dfTransformer

annArborRegModel1 = sm.OLS(
  annArborDf["Rent"],
  annArborRegModel1Transformer(annArborDf)
)
annArborRegModel1Fit = annArborRegModel1.fit()
print(annArborRegModel1Fit.summary())

In [None]:
annArborRegModel1Transformer(annArborSampleData)

In [None]:
annArborRegModel1Fit.transformer = annArborRegModel1Transformer
annArborRegModel1Fit.mainPredictor = partial(generalRegressionPredictor, annArborRegModel1Fit)
registerAJrjModel(
    annArborRegModel1Fit,
    {
        "modelName":f"annArborRegModel1Fit",
        "version":"1.0.1",
        "params": annArborRegModel1Fit.params.to_dict(),
        "score": float(annArborRegModel1Fit.rsquared),
        "modelLibraray": 'sm.OLS',
        "libraryMetadata": {
            "pvalues": annArborRegModel1Fit.pvalues.to_dict(),
            "r_squared": float(annArborRegModel1Fit.rsquared),
            "adj_r_squared": float(annArborRegModel1Fit.rsquared_adj)
        },
    
        "sampleData": {
            "dataForTransfer": annArborSampleData
        }
    }
)

In [None]:
predictedRent1 = annArborRegModel1Fit.predict(annArborRegModel1Transformer(annArborDf))
annArborDf['predictedRent1'] = predictedRent1
annArborDf

In [None]:
plt.scatter(
  annArborDf["Rent"],
  annArborDf["Sqft"],
  color='blue',
  alpha=0.5,
  label='Data Points - scatter',
)

intercept = annArborRegModel1Fit.params['const']
sqFtSlope = annArborRegModel1Fit.params['Sqft']
x_values = np.linspace(500, 4500, 200)
y_values = intercept + sqFtSlope * x_values

plt.plot(
  x_values,
  y_values,
  color='red',
  label='rentSqftModel1Fit - predictedRent1'
)
plt.xlabel('Sqft')
plt.ylabel('Rent')
plt.legend()
plt.grid(True)


plt.show()

In [None]:
def annArborRegModel2Transformer(dataForTransfer = None):
    import pandas as pd
    import statsmodels.api as sm
    if isinstance(dataForTransfer, pd.DataFrame):
        df = dataForTransfer.copy()
    else:
        df = pd.DataFrame(dataForTransfer)
    dfTransformer = sm.add_constant(df[["Beds", "Baths", "Sqft"]],has_constant='add')
    return dfTransformer
annArborRegModel2 = sm.OLS(
  annArborDf["Rent"],
  annArborRegModel2Transformer(annArborDf)
)
annArborRegModel2Fit = annArborRegModel2.fit()
print(annArborRegModel2Fit.summary())

In [None]:
annArborRegModel2Fit.transformer = annArborRegModel2Transformer
annArborRegModel2Fit.mainPredictor = partial(generalRegressionPredictor, annArborRegModel2Fit)
registerAJrjModel(
    annArborRegModel2Fit,
    {
        "modelName":f"annArborRegModel2Fit",
        "version":"1.0.1",
        "params": annArborRegModel2Fit.params.to_dict(),
        "score": float(annArborRegModel2Fit.rsquared),
        "modelLibraray": 'sm.OLS',
        "libraryMetadata": {
            "pvalues": annArborRegModel2Fit.pvalues.to_dict(),
            "r_squared": float(annArborRegModel2Fit.rsquared),
            "adj_r_squared": float(annArborRegModel2Fit.rsquared_adj)
        },
    
        "sampleData": {
            "dataForTransfer": annArborSampleData
        }
    }
)

In [None]:
def annArborRegModel3Transformer(dataForTransfer = None):
    import pandas as pd
    import statsmodels.api as sm
    import math
    if isinstance(dataForTransfer, pd.DataFrame):
        df = dataForTransfer.copy()
    else:
        df = pd.DataFrame(dataForTransfer)
    df['log(Sqft)'] = df.apply(lambda row: math.log(row['Sqft']), axis=1)
    dfTransformer = sm.add_constant(df[["Beds", "Baths", "Sqft", 'log(Sqft)']],has_constant='add')
    return dfTransformer

In [None]:
annArborDf['log(Sqft)'] = annArborRegModel3Transformer(annArborDf)['log(Sqft)']
annArborDf

In [None]:
annArborRegModel3 = sm.OLS(
  annArborDf["Rent"],
  annArborRegModel3Transformer(annArborDf)
)
annArborRegModel3Fit = annArborRegModel3.fit()
print(annArborRegModel3Fit.summary())

In [None]:
annArborRegModel3Fit.transformer = annArborRegModel3Transformer
annArborRegModel3Fit.mainPredictor = partial(generalRegressionPredictor, annArborRegModel3Fit)
registerAJrjModel(
    annArborRegModel3Fit,
    {
        "modelName":f"annArborRegModel3Fit",
        "version":"1.0.1",
        "params": annArborRegModel3Fit.params.to_dict(),
        "score": float(annArborRegModel3Fit.rsquared),
        "modelLibraray": 'sm.OLS',
        "libraryMetadata": {
            "pvalues": annArborRegModel3Fit.pvalues.to_dict(),
            "r_squared": float(annArborRegModel3Fit.rsquared),
            "adj_r_squared": float(annArborRegModel3Fit.rsquared_adj)
        },
    
        "sampleData": {
            "dataForTransfer": annArborSampleData
        }
    }
)

In [None]:
def annArborRegModel4Transformer(dataForTransfer = None):
    import pandas as pd
    import statsmodels.api as sm
    import math
    if isinstance(dataForTransfer, pd.DataFrame):
        df = dataForTransfer.copy()
    else:
        df = pd.DataFrame(dataForTransfer)
    df['log(Sqft)'] = df.apply(lambda row: math.log(row['Sqft']), axis=1)
    dfTransformer = sm.add_constant(df[["Beds", 'log(Sqft)']],has_constant='add')
    return dfTransformer

In [None]:
annArborRegModel4 = sm.OLS(
  annArborDf["Rent"],
  annArborRegModel4Transformer(annArborDf)
)
annArborRegModel4Fit = annArborRegModel4.fit()
print(annArborRegModel4Fit.summary())

In [None]:
# plt.scatter(
#   annArborDf["Sqft"],
#   annArborDf["Rent"],
#   color='blue',
#   alpha=0.9,
#   label='Data Points - scatter',
# )

plt.scatter(
  annArborDf["log(Sqft)"],
  annArborDf["Rent"],
  color='red',
  alpha=0.9,
  label='Data Points - scatter',
)


# plt.xlabel('Sqft')
plt.ylabel('Rent')
plt.legend()
plt.grid(True)



plt.show()

In [None]:
annArborRegModel4Fit.transformer = annArborRegModel4Transformer
annArborRegModel4Fit.mainPredictor = partial(generalRegressionPredictor, annArborRegModel4Fit)
registerAJrjModel(
    annArborRegModel4Fit,
    {
        "modelName":f"annArborRegModel4Fit",
        "version":"1.0.1",
        "params": annArborRegModel4Fit.params.to_dict(),
        "score": float(annArborRegModel4Fit.rsquared),
        "modelLibraray": 'sm.OLS',
        "libraryMetadata": {
            "pvalues": annArborRegModel4Fit.pvalues.to_dict(),
            "r_squared": float(annArborRegModel4Fit.rsquared),
            "adj_r_squared": float(annArborRegModel4Fit.rsquared_adj)
        },
    
        "sampleData": {
            "dataForTransfer": annArborSampleData
        }
    }
)

In [None]:
## AFTER AREFULLY LOOKUP WE RELIZED  annArborRegModel4Fit SO LETS KEEP IT AS THE BEST

In [None]:
compareDf = pd.read_csv('./annArborModelsComparison.csv')
compareDf

In [None]:
registerAJrjModel(
    annArborRegModel4Fit,
    {
        "modelName":f"annArborRegModelBEST",
        "version":"1.0.1",
        "params": annArborRegModel4Fit.params.to_dict(),
        "score": float(annArborRegModel4Fit.rsquared),
        "modelLibraray": 'sm.OLS',
        "libraryMetadata": {
            "pvalues": annArborRegModel4Fit.pvalues.to_dict(),
            "r_squared": float(annArborRegModel4Fit.rsquared),
            "adj_r_squared": float(annArborRegModel4Fit.rsquared_adj)
        },
    
        "sampleData": {
            "dataForTransfer": annArborSampleData
        }
    }
)