In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# https://www.statsmodels.org/stable/index.html
import statsmodels.api as sm

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
from functools import partial

In [None]:
from dotenv import load_dotenv

from pathlib import Path

env_path = Path("../../.env-live")

if env_path.exists():
    print('envs Loaded')
    load_dotenv(dotenv_path=env_path)
from jrjModelRegistry.jrjModelRegistry import registerAJrjModel

Download it from [here](https://www.dropbox.com/scl/fi/1sc8ojfezlbrcaje42w0n/College.xlsx?rlkey=i3starhohiwkua8ekbjk3nb92&st=yd75jyvp&dl=0)

In [None]:
collegeDf = pd.read_excel("./College.xlsx")

In [None]:
collegeDf.head()

In [None]:
collegeDf.tail()

In [None]:
collegeDf.shape

In [None]:
collegeDf.size

In [None]:
collegeDf.describe()

In [None]:
# Plotting
plt.figure(
  figsize=(8, 8)
)

plt.scatter(
  collegeDf["Cost"],
  collegeDf["Earnings"],
  color='blue',
  alpha=0.9,
  label='Data Points - scatter',
)

plt.xlabel('Cost')
plt.ylabel('Earnings')
plt.legend()
plt.grid(True)



plt.show()

In [None]:
sm.add_constant(collegeDf['Cost']), type(sm.add_constant(collegeDf['Cost']))

In [None]:
earningOthersOlsModelFit1 = sm.OLS(
  collegeDf["Earnings"],
  sm.add_constant(collegeDf['Cost'])
).fit()

In [None]:
print(earningOthersOlsModelFit1.summary())

In [None]:
predictedEarning1 = earningOthersOlsModelFit1.predict(sm.add_constant(collegeDf["Cost"]))
collegeDf['predictedEarning1'] = predictedEarning1
collegeDf

In [None]:
plt.figure(
  figsize=(8, 8)
)

plt.scatter(
  collegeDf["Cost"],
  collegeDf["Earnings"],
  color='blue',
  alpha=0.9,
  label='Data Points - scatter',
)


plt.plot(
  collegeDf["Cost"],
  collegeDf["predictedEarning1"],
  color='red',
  label='OLS 1'
)

plt.legend()

plt.show()

In [None]:
earningOthersOlsModelFit1SampleData = {
    "Cost": [22920, 23429]
}

In [None]:
def earningOthersOlsModelFit1Transformer(dataForTransfer = None):
    import pandas as pd
    import statsmodels.api as sm
    dfTransformer = pd.DataFrame(dataForTransfer)
    dfTransformer = sm.add_constant(dfTransformer[['Cost']])
    return dfTransformer

In [None]:
earningOthersOlsModelFit1SampleTransformed = earningOthersOlsModelFit1Transformer(earningOthersOlsModelFit1SampleData)
earningOthersOlsModelFit1SampleTransformed

In [None]:
earningOthersOlsModelFit1Predict = earningOthersOlsModelFit1.predict(earningOthersOlsModelFit1SampleTransformed)
earningOthersOlsModelFit1Predict

In [None]:
def earningOthersOlsModelFit1MainPredictor(self, transformedData):
    return self.predict(transformedData)


In [None]:
earningOthersOlsModelFit1.mainPredictor = partial(earningOthersOlsModelFit1MainPredictor, earningOthersOlsModelFit1)
earningOthersOlsModelFit1.transformer = earningOthersOlsModelFit1Transformer

In [None]:
test1 = earningOthersOlsModelFit1.transformer(earningOthersOlsModelFit1SampleData)
test1

In [None]:
earningOthersOlsModelFit1.mainPredictor(test1)

In [None]:
earningOthersOlsModelFit1Metadata = {
    "modelName":f"earningOthersOlsModelFit1",
    "version":"1.0.1",
    "params": earningOthersOlsModelFit1.params.to_dict(),
    "score": float(earningOthersOlsModelFit1.rsquared),
    "modelLibraray": 'sm.OLS',
    "libraryMetadata": {
        "pvalues": earningOthersOlsModelFit1.pvalues.to_dict(),
        "r_squared": float(earningOthersOlsModelFit1.rsquared),
        "adj_r_squared": float(earningOthersOlsModelFit1.rsquared_adj)
    },

    "sampleData": {
        "dataForTransfer": earningOthersOlsModelFit1SampleData
    }
}
earningOthersOlsModelFit1Metadata

In [None]:
registerAJrjModel(earningOthersOlsModelFit1, earningOthersOlsModelFit1Metadata)

In [None]:
collegeDf

In [None]:
fig = plt.figure()
ax = plt.axes(projection ="3d")

# Creating plot
ax.scatter3D(
  collegeDf["Cost"],
  collegeDf["Grad"],
  collegeDf["Earnings"],
  color = "green"
)
plt.title("Cost,Grad -> Earnings")
ax.set_xlabel('Cost')
ax.set_ylabel('Grad')
ax.set_zlabel('Earnings')

# show plot
plt.show()

In [None]:
collegeDf[['Cost', 'Grad']], type(collegeDf[['Cost', 'Grad']])

In [None]:
sm.add_constant(collegeDf[['Cost', 'Grad']]), type(sm.add_constant(collegeDf[['Cost', 'Grad']]))

In [None]:
earningOthersOlsModelFit2 = sm.OLS(
  collegeDf["Earnings"],
  sm.add_constant(collegeDf[['Cost', 'Grad']])
).fit()

In [None]:
print(earningOthersOlsModelFit2.summary())

In [None]:
predictedEarning2 = earningOthersOlsModelFit2.predict(
  sm.add_constant(collegeDf[['Cost', 'Grad']])
)
collegeDf['predictedEarning2'] = predictedEarning2
collegeDf

In [None]:
# Extracting coefficients
intercept = earningOthersOlsModelFit2.params['const']
coef_cost = earningOthersOlsModelFit2.params['Cost']
coef_grad = earningOthersOlsModelFit2.params['Grad']

# Create 3D grid for plotting
cost_range = np.linspace(collegeDf['Cost'].min(), collegeDf['Cost'].max(), 100)
grad_range = np.linspace(collegeDf['Grad'].min(), collegeDf['Grad'].max(), 100)
cost_grid, grad_grid = np.meshgrid(cost_range, grad_range)

# Calculate predicted earnings for each combination of cost and grad
earnings_predicted = intercept + coef_cost * cost_grid + coef_grad * grad_grid




fig = plt.figure()
ax = plt.axes(projection ="3d")

# Scatter plot of the actual data points
ax.scatter(collegeDf['Cost'], collegeDf['Grad'], collegeDf['Earnings'], color='blue', label='Actual Earnings')

# Plotting the fitted plane
ax.plot_surface(cost_grid, grad_grid, earnings_predicted, color='red', alpha=0.5, label='Fitted Plane')

# Labeling axes
ax.set_xlabel('Cost')
ax.set_ylabel('Grad')
ax.set_zlabel('Earnings')



plt.title('Cost and Grad vs. Earnings with Fitted Plane')

# Rotating the plot
# ax.view_init(elev=0, azim=0)  # Set the elevation and azimuth angles
plt.show()

In [None]:
collegeDf

In [None]:
earningOthersOlsModelFit2SampleData = {
    "Cost": [22920, 23429],
    "Grad": [62, 88]
}

In [None]:
def earningOthersOlsModelFit2Transformer(dataForTransfer = None):
    import pandas as pd
    import statsmodels.api as sm
    dfTransformer = pd.DataFrame(dataForTransfer)
    dfTransformer = sm.add_constant(dfTransformer[['Cost', 'Grad']])
    return dfTransformer

In [None]:
earningOthersOlsModelFit2SampleTransformed = earningOthersOlsModelFit2Transformer(earningOthersOlsModelFit2SampleData)
earningOthersOlsModelFit2SampleTransformed

In [None]:
earningOthersOlsModelFit2Predict = earningOthersOlsModelFit2.predict(earningOthersOlsModelFit2SampleTransformed)
earningOthersOlsModelFit2Predict

In [None]:
def earningOthersOlsModelFit2MainPredictor(self, transformedData):
    return self.predict(transformedData)


In [None]:
earningOthersOlsModelFit2.mainPredictor = partial(earningOthersOlsModelFit2MainPredictor, earningOthersOlsModelFit2)
earningOthersOlsModelFit2.transformer = earningOthersOlsModelFit2Transformer

In [None]:
test2 = earningOthersOlsModelFit2.transformer(earningOthersOlsModelFit2SampleData)
test2

In [None]:
earningOthersOlsModelFit2.mainPredictor(test2)

In [None]:
earningOthersOlsModelFit2Metadata = {
    "modelName":f"earningOthersOlsModelFit2",
    "version":"1.0.1",
    "params": earningOthersOlsModelFit2.params.to_dict(),
    "score": float(earningOthersOlsModelFit2.rsquared),
    "modelLibraray": 'sm.OLS',
    "libraryMetadata": {
        "pvalues": earningOthersOlsModelFit2.pvalues.to_dict(),
        "r_squared": float(earningOthersOlsModelFit2.rsquared),
        "adj_r_squared": float(earningOthersOlsModelFit2.rsquared_adj)
    },

    "sampleData": {
        "dataForTransfer": earningOthersOlsModelFit2SampleData
    }
}
earningOthersOlsModelFit2Metadata

In [None]:
registerAJrjModel(earningOthersOlsModelFit2, earningOthersOlsModelFit2Metadata)

In [None]:
earningOthersOlsModelFit3 = sm.OLS(
  collegeDf["Earnings"],
  sm.add_constant(collegeDf[['Cost', 'Grad','Debt']])
).fit()

In [None]:
print(earningOthersOlsModelFit3.summary())

In [None]:
print(earningOthersOlsModelFit2.summary())

In [None]:
predictedEarning3 = earningOthersOlsModelFit3.predict(
  sm.add_constant(collegeDf[['Cost', 'Grad', 'Debt']])
)
collegeDf['predictedEarning3'] = predictedEarning3
collegeDf

In [None]:
earningOthersOlsModelFit3SampleData = {
    "Cost": [22920, 23429],
    "Grad": [62, 88],
    "Debt": [88,92]
}

In [None]:
def earningOthersOlsModelFit3Transformer(dataForTransfer = None):
    import pandas as pd
    import statsmodels.api as sm
    dfTransformer = pd.DataFrame(dataForTransfer)
    dfTransformer = sm.add_constant(dfTransformer[['Cost', 'Grad', 'Debt']])
    return dfTransformer

In [None]:
earningOthersOlsModelFit3SampleTransformed = earningOthersOlsModelFit3Transformer(earningOthersOlsModelFit3SampleData)
earningOthersOlsModelFit3SampleTransformed

In [None]:
earningOthersOlsModelFit3Predict = earningOthersOlsModelFit3.predict(earningOthersOlsModelFit3SampleTransformed)
earningOthersOlsModelFit3Predict

In [None]:
def earningOthersOlsModelFit3MainPredictor(self, transformedData):
    return self.predict(transformedData)

In [None]:
earningOthersOlsModelFit3.mainPredictor = partial(earningOthersOlsModelFit3MainPredictor, earningOthersOlsModelFit3)
earningOthersOlsModelFit3.transformer = earningOthersOlsModelFit3Transformer

In [None]:
test3 = earningOthersOlsModelFit3.transformer(earningOthersOlsModelFit3SampleData)
test3

In [None]:
earningOthersOlsModelFit3.mainPredictor(test3)

In [None]:
earningOthersOlsModelFit3Metadata = {
    "modelName":f"earningOthersOlsModelFit3",
    "version":"1.0.1",
    "params": earningOthersOlsModelFit3.params.to_dict(),
    "score": float(earningOthersOlsModelFit3.rsquared),
    "modelLibraray": 'sm.OLS',
    "libraryMetadata": {
        "pvalues": earningOthersOlsModelFit3.pvalues.to_dict(),
        "r_squared": float(earningOthersOlsModelFit3.rsquared),
        "adj_r_squared": float(earningOthersOlsModelFit3.rsquared_adj)
    },

    "sampleData": {
        "dataForTransfer": earningOthersOlsModelFit3SampleData
    }
}
earningOthersOlsModelFit3Metadata

In [None]:
registerAJrjModel(earningOthersOlsModelFit3, earningOthersOlsModelFit3Metadata)