In [None]:
import numpy as np
import pandas as pd

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
from functools import partial

In [None]:
from dotenv import load_dotenv

from pathlib import Path

env_path = Path("../../.env-live")


if env_path.exists():
    load_dotenv(dotenv_path=env_path)

In [None]:
from jrjModelRegistry import handleDashboard, jrjRouterModelRegistry
from jrjModelRegistry.jrjModelRegistry import registerAJrjModel

In [None]:
# https://www.statsmodels.org/stable/index.html
import statsmodels.api as sm

In [None]:
# Download Dataset from https://www.dropbox.com/scl/fo/v71bqw2zowgla028cwdh0/AEfemP4C8qQ2X5tTNXMCqUQ/Session%203?dl=0&preview=educationWage.xlsx&rlkey=rlkgo6o58ex2kjbiv4b7cr9nj&subfolder_nav_tracking=1
# and add it to colab

In [None]:
educationWageDf = pd.read_excel("./educationWage.xlsx")

In [None]:
educationWageDf

In [None]:
type(educationWageDf)

In [None]:
educationWageDf["Education"], type(educationWageDf["Education"])

In [None]:
educationWageDf.plot.scatter(
    x = 'Education',
    y = 'Wage',
    xlim = (0, 25),
    ylim = (0, 180),
    grid = True
)


In [None]:
educationWageDf["Education"], type(educationWageDf["Education"])

In [None]:
sm.add_constant(educationWageDf["Education"]), type(sm.add_constant(educationWageDf["Education"]))

In [None]:
sm.add_constant(educationWageDf["Education"])

In [None]:
educationWageLiniarRgressionModel = sm.OLS(
  educationWageDf["Wage"],
  sm.add_constant(educationWageDf["Education"])
)


In [None]:
educationWageLiniarRgressionModelFit = educationWageLiniarRgressionModel.fit()

In [None]:
print(educationWageLiniarRgressionModelFit.summary())

In [None]:
educationWageLiniarRgressionModelFit.params

In [None]:
# wage = b0 + b1 * Education

In [None]:
predictedWage = educationWageLiniarRgressionModelFit.predict(sm.add_constant(educationWageDf["Education"]))
predictedWage

In [None]:
educationWageLiniarRgressionModelSampleData = {
    "Education": [20, 25]
}

In [None]:
def educationWageLiniarRgressionModelTransformer(dataForTransfer = None):
    import pandas as pd
    import statsmodels.api as sm
    dfTransformer = pd.DataFrame(dataForTransfer)
    dfTransformer = sm.add_constant(dfTransformer[['Education']])
    return dfTransformer

In [None]:
educationWageLiniarRgressionModelSampleTransformed = educationWageLiniarRgressionModelTransformer(educationWageLiniarRgressionModelSampleData)
educationWageLiniarRgressionModelSampleTransformed

In [None]:
educationWageLiniarRgressionModelFit.predict(educationWageLiniarRgressionModelSampleTransformed)

In [None]:
def educationWageLiniarRgressionModelMainPredictor(self, transformedData):
    return self.predict(transformedData)
educationWageLiniarRgressionModelFit.mainPredictor = partial(educationWageLiniarRgressionModelMainPredictor, educationWageLiniarRgressionModelFit)
educationWageLiniarRgressionModelFit.transformer = educationWageLiniarRgressionModelTransformer

In [None]:
test1 = educationWageLiniarRgressionModelFit.transformer(educationWageLiniarRgressionModelSampleData)
test1

In [None]:
educationWageLiniarRgressionModelFit.mainPredictor(test1)

In [None]:
educationWageLiniarRgressionModelMetadata = {
    "modelName":f"educationWageLiniarRgression",
    "version":"1.0.1",
    "params": educationWageLiniarRgressionModelFit.params.to_dict(),
    "score": float(educationWageLiniarRgressionModelFit.rsquared),
    "modelLibraray": 'sm.OLS',
    "libraryMetadata": {
        "pvalues": educationWageLiniarRgressionModelFit.pvalues.to_dict(),
        "r_squared": float(educationWageLiniarRgressionModelFit.rsquared),
        "adj_r_squared": float(educationWageLiniarRgressionModelFit.rsquared_adj)
    },

    "sampleData": {
        "dataForTransfer": educationWageLiniarRgressionModelSampleData
    }
}
educationWageLiniarRgressionModelMetadata

In [None]:
registerAJrjModel(educationWageLiniarRgressionModelFit, educationWageLiniarRgressionModelMetadata)

In [None]:
educationWageDf['predictedWage'] = predictedWage
educationWageDf

In [None]:
testPredict = educationWageLiniarRgressionModelFit.predict([[1,15]])
testPredict

In [None]:
import matplotlib.pyplot as plt


In [None]:
# Plotting
plt.figure(
  figsize=(8, 8)
)

plt.scatter(
  educationWageDf["Education"],
  educationWageDf["Wage"],
  color='blue',
  alpha=0.9,
  label='Data Points - scatter',
)

plt.plot(
  educationWageDf["Education"],
  educationWageDf["predictedWage"],
  color='red',
  label='OLS Regression - predictedWage'
)
plt.title('Education Level vs. Wage with OLS Regression')
plt.xlabel('Education Level(yr)')
plt.ylabel('Wage K')
plt.legend()
plt.grid(True)

plt.gca().set_xlim([0, 25])
plt.gca().set_ylim([0, 180])


plt.show()

# Another way

In [None]:
X = np.array([20, 18, 12, 16, 10, 23, 2, 5,])
y = np.array([160, 120, 70 , 100, 65, 160, 40, 55])
X, y

In [None]:
X = sm.add_constant(X)

In [None]:
X

In [None]:
model = sm.OLS(y, X).fit()


In [None]:
print(model.summary())

In [None]:
print("Coefficients:", model.params)

In [None]:
predictions = model.predict([[1, 5], [1, 15]])
predictions

In [None]:
def predicWage(intercept, slope, yearsOfExperience):
  return intercept + (slope * yearsOfExperience)

In [None]:
predicWage(model.params[0], model.params[1], 5)

In [None]:
predicWage(model.params[0], model.params[1], 15)

# Another way

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
educationWageDf

In [None]:
educationWageLiniarRgressionModel2 = LinearRegression()

In [None]:
educationWageDf["Education"], type(educationWageDf["Education"])

In [None]:
educationWageDf[["Education"]], type(educationWageDf[["Education"]])

In [None]:
educationWageLiniarRgressionModel2Fit = educationWageLiniarRgressionModel2.fit(
    educationWageDf[["Education"]],
    educationWageDf["Wage"]
)

In [None]:
# Print the intercept and coefficient
print("Intercept:", educationWageLiniarRgressionModel2Fit.intercept_)
print("Coefficient:", educationWageLiniarRgressionModel2Fit.coef_[0])

In [None]:
predictions1 = educationWageLiniarRgressionModel2Fit.predict(np.array([[5], [15]]))
predictions1

In [None]:
predicWage(educationWageLiniarRgressionModel2Fit.intercept_, educationWageLiniarRgressionModel2Fit.coef_[0], 5)

In [None]:
predicWage(educationWageLiniarRgressionModel2Fit.intercept_, educationWageLiniarRgressionModel2Fit.coef_[0], 15)

In [None]:
predictedWage2 = educationWageLiniarRgressionModel2Fit.predict(educationWageDf[["Education"]])
educationWageDf['predictedWage2'] = predictedWage2
educationWageDf

In [None]:
# Plotting
plt.figure(
  figsize=(8, 8)
)

plt.scatter(
  educationWageDf["Education"],
  educationWageDf["Wage"],
  color='blue',
  alpha=0.9,
  label='Data Points - scatter',
)

plt.plot(
  educationWageDf["Education"],
  educationWageDf["predictedWage"],
  color='red',
  label='OLS Regression - predictedWage'
)
plt.plot(
  educationWageDf["Education"],
  educationWageDf["predictedWage2"],
  color='black',
  label='sklearn  Regression - predictedWage'
)
plt.title('Education Level vs. Wage with OLS Regression')
plt.xlabel('Education Level(yr)')
plt.ylabel('Wage K')
plt.legend()
plt.grid(True)

plt.gca().set_xlim([0, 25])
plt.gca().set_ylim([0, 180])


equation = f'Wage = {educationWageLiniarRgressionModel2Fit.coef_[0]:.2f} * Education + {educationWageLiniarRgressionModel2Fit.intercept_:.2f}'


plt.text(
    10, 120,
    equation,
    horizontalalignment='center',
    verticalalignment='center',
    fontsize=12,
    color="green",
    bbox=dict(facecolor='white', alpha=0.5)
)



plt.show()

In [None]:
def educationWageLiniarRgressionModel2Transformer(dataForTransfer = None):
    import pandas as pd
    dfTransformer = pd.DataFrame(dataForTransfer)
    return dfTransformer

In [None]:
educationWageLiniarRgressionModel2SampleTransformed = educationWageLiniarRgressionModel2Transformer(educationWageLiniarRgressionModelSampleData)
educationWageLiniarRgressionModel2SampleTransformed

In [None]:
educationWageLiniarRgressionModel2Fit.predict(educationWageLiniarRgressionModel2SampleTransformed)

In [None]:
def educationWageLiniarRgressionModel2MainPredictor(self, transformedData):
    npArr =  self.predict(transformedData)
    # return npArr
    result = {f"{i}": float(v) for i, v in enumerate(npArr)}
    return result
educationWageLiniarRgressionModel2Fit.mainPredictor = partial(educationWageLiniarRgressionModel2MainPredictor, educationWageLiniarRgressionModel2Fit)
educationWageLiniarRgressionModel2Fit.transformer = educationWageLiniarRgressionModel2Transformer

In [None]:
test2 = educationWageLiniarRgressionModel2Fit.transformer(educationWageLiniarRgressionModelSampleData)
test2

In [None]:
educationWageLiniarRgressionModel2Fit.mainPredictor(test2)

In [None]:
educationWageLiniarRgressionModel2Metadata = {
    "modelName":f"educationWageLiniarRgression2",
    "version":"1.0.1",
    "params": {
        "intercept": float(educationWageLiniarRgressionModel2Fit.intercept_),
        **{f"coef_{i}": float(coef) for i, coef in enumerate(educationWageLiniarRgressionModel2Fit.coef_)}
    },
    "score":  educationWageLiniarRgressionModel2Fit.score(
        educationWageDf[["Education"]],
        educationWageDf["Wage"]
    ),
    "modelLibraray": 'sklearn.LinearRegression',
    "libraryMetadata": {
        "params": educationWageLiniarRgressionModel2Fit.get_params(),
        "r2_score":  educationWageLiniarRgressionModel2Fit.score(
            educationWageDf[["Education"]],
            educationWageDf["Wage"]
        )
    },
    "sampleData": {
        "dataForTransfer": educationWageLiniarRgressionModelSampleData
    }
}
educationWageLiniarRgressionModel2Metadata

In [None]:
registerAJrjModel(educationWageLiniarRgressionModel2Fit, educationWageLiniarRgressionModel2Metadata)