In [156]:
import numpy as np
import matplotlib as plt
import pandas as pd
import random
import sklearn
import math
from sklearn.linear_model import LinearRegression
from scipy.stats import multivariate_normal

<b>PREPARING THE DATA FOR THE MODEL</b>

In [45]:
header=["Gender_ID","Degree","Age","Salary"]
noise=np.random.normal(0,1,5)

training_data=[
    [-1,-1,+1,-1,+1],
    [2,3,1,1,2],
    [36,47,26,68,33],
    [89.563,123.543,23.989,138.769,113.888]
]


df=pd.DataFrame(training_data).transpose()
df.columns=header


#Adding noise to the data.
df["Salary"]=[df["Salary"][i]+noise[i] for i in range(len(df))]
df.head()
data_types={"Gender_ID":int,"Degree":int,"Age":int}
df.astype(data_types)

Unnamed: 0,Gender_ID,Degree,Age,Salary
0,-1,2,36,88.827044
1,-1,3,47,122.824304
2,1,1,26,23.759862
3,-1,1,68,136.930458
4,1,2,33,113.784797


In [144]:
X1=np.array(df[["Age"]]).reshape(-1,1)
X1=X1.astype("int")
X2=np.array(df[["Age","Degree"]]).reshape(-1,2)
X2=X2.astype("int")
X3=np.array(df[["Age","Degree","Gender_ID"]]).reshape(-1,3)
X3=X3.astype("int")
Y=np.array(df["Salary"]).reshape(-1,1)
print(X3)

[[36  2 -1]
 [47  3 -1]
 [26  1  1]
 [68  1 -1]
 [33  2  1]]


<b>STRAIGHT CURVE FITTING REGRESSION</b>

In [124]:
regr=LinearRegression()
regr.fit(X1,Y)
score_SCF_X1=regr.score(X1,Y)
prediction_SCF_X1=regr.predict(np.array(X1))
prediction_SCF_X1=[prediction_SCF_X1[i][0] for i in range(len(X1))]
#print(prediction_SCF_X1)

y_test=regr.predict(np.array([[60]]))
print(y_test[0][0])


134.71909499805756


In [125]:
regr=LinearRegression()
regr.fit(X2,Y)
score_SCF_X2=regr.score(X2,Y)
prediction_SCF_X2=regr.predict(np.array(X2))
prediction_SCF_X2=[prediction_SCF_X2[i][0] for i in range(len(X2))]
#print(prediction_SCF_X2)

In [128]:
regr=LinearRegression()
regr.fit(X3,Y)
score_SCF_X3=regr.score(X3,Y)
prediction_SCF_X3=regr.predict(np.array(X3))
prediction_SCF_X3=[prediction_SCF_X3[i][0] for i in range(len(X3))]
#print(prediction_SCF_X3)

<b>MLE REGRESSION MODEL</b>

In [84]:
def calcLogLikelihood(guess,true,n):
    error=true-guess
    sigma=np.std(error)
    f=((1.0/(2.0*math.pi*(sigma**2)))**(n/2))* \
        np.exp(-1*((np.dot(error.T,error))/(2*sigma*sigma)))
    return np.log(f)

In [81]:
def MLE_model1(var):
    x,y=np.array(df["Age"]),np.array(df["Salary"])
    yguess=np.array([(var[1]*(x[i])+var[0]) for i in range(len(x))])
    f=calcLogLikelihood(yguess,y,float(len(yguess)))
    return (-1*f)

def MLE_model2(var):
    x1,x2,y=df["Age"],df["Degree"],df["Salary"]
    yguess=[0 for i in range(len(x1))]
    yguess=[(var[2]*x2[i]+var[1]*x1[i]+var[0]) for i in range(len(x1))]
    f=calcLogLikelihood(yguess,y,float(len(yguess)))
    return (-1*f)

def MLE_model3(var):
    x1,x2,x3,y=df["Age"],df["Degree"],df["Gender_ID"],df["Salary"]
    yguess=[0 for i in range(len(x1))]
    yguess=[(var[3]*x3[i]+var[2]*x2[i]+var[1]*x1[i]+var[0]) for i in range(len(x1))]
    f=calcLogLikelihood(yguess,y,float(len(yguess)))
    return (-1*f)

In [129]:
from scipy.optimize import minimize
nvar=2
var=np.zeros(nvar)
var[0]=15
var[1]=2

res1=minimize(MLE_model1,var,method="BFGS",options={"disp":False})
#print(res1)
theta_MLE_1=res1.x
prediction_MLE_X1=[theta_MLE_1[0]+theta_MLE_1[1]*X1[i][0] for i in range(len(X1))]
print(prediction_MLE_X1)
#print(prediction_SCF_X1)

[84.72735540592757, 107.64024754669248, 63.89745345977764, 151.3830416336073, 78.47838482208259]


In [134]:
from scipy.optimize import minimize
nvar=3
var=np.zeros(nvar)
var[0]=15
var[1]=2
var[2]=7

res2=minimize(MLE_model2,var,method="BFGS",options={"disp":False})
#print(res2)
theta_MLE_2=res2.x
prediction_MLE_X2=[theta_MLE_2[0]+theta_MLE_2[1]*X2[i][0] + theta_MLE_2[2]*X2[i][1] for i in range(len(X2))]
print(prediction_MLE_X2)
#print(prediction_SCF_X2)

[89.0511074449638, 138.37906318918797, 41.92312501742467, 134.32200431819672, 82.45118749490865]


In [147]:
from scipy.optimize import minimize
nvar=4
var=np.zeros(nvar)
var[0]=15
var[1]=2
var[2]=7
var[3]=15

res3=minimize(MLE_model3,var,method="BFGS",options={"disp":False})
#print(res3.x)
theta_MLE_3=res3.x
prediction_MLE_X3=[theta_MLE_3[0]+theta_MLE_3[1]*X3[i][0] + theta_MLE_3[2]*X3[i][1] + theta_MLE_3[3]*X3[i][2] for i in range(len(X3))]
print(prediction_MLE_X3)
#print(prediction_SCF_X3)


[73.99492030599427, 139.35163292379605, 41.98227386380541, 135.23524912592978, 95.56251751065486]


<b>MAP REGRESSION MODEL</b>

In [171]:

def MAP_model1(var):
    n=len(var)
    x,y=np.array(df["Age"]),np.array(df["Salary"])
    yguess=np.array([(var[1]*(x[i])+var[0]) for i in range(len(x))])
    cov_mat=np.identity(n,dtype=int)*200
    var_dist=multivariate_normal.pdf(var,[0]*n,cov_mat)
    f=calcLogLikelihood(yguess,y,float(len(yguess)))+var_dist
    return (-1*f)

def MAP_model2(var):
    n=len(var)
    x1,x2,y=df["Age"],df["Degree"],df["Salary"]
    yguess=[0 for i in range(len(x1))]
    yguess=[(var[2]*x2[i]+var[1]*x1[i]+var[0]) for i in range(len(x1))]
    cov_mat=np.identity(n,dtype=int)*4
    var_dist=multivariate_normal.pdf(var,[0]*n,cov_mat)
    f=calcLogLikelihood(yguess,y,float(len(yguess)))+var_dist
    return (-1*f)

def MAP_model3(var):
    n=len(var)
    x1,x2,x3,y=df["Age"],df["Degree"],df["Gender_ID"],df["Salary"]
    yguess=[0 for i in range(len(x1))]
    yguess=[(var[3]*x3[i]+var[2]*x2[i]+var[1]*x1[i]+var[0]) for i in range(len(x1))]
    cov_mat=np.identity(n,dtype=int)*4
    var_dist=multivariate_normal.pdf(var,[0]*n,cov_mat)
    f=calcLogLikelihood(yguess,y,float(len(yguess)))+var_dist
    return (-1*f)

In [172]:
from scipy.optimize import minimize
nvar=2
var=np.zeros(nvar)
var[0]=15
var[1]=2

res1=minimize(MAP_model1,var,method="BFGS",options={"disp":False})
print(res1.x)
theta_MAP_1=res1.x
prediction_MAP_X1=[theta_MAP_1[0]+theta_MAP_1[1]*X1[i][0] for i in range(len(X1))]
print(prediction_MAP_X1)
#print(prediction_SCF_X1)

[9.70350351 2.0837582 ]
[84.71879885930943, 107.64013910633253, 63.881216816561164, 151.3990613961039, 78.46752424648496]
[84.72735905084977, 107.64023802665334, 63.897469072846526, 151.38300698046015, 78.4783920574488]


In [177]:
from scipy.optimize import minimize
nvar=3
var=np.zeros(nvar)
var[0]=15
var[1]=2
var[2]=7

res2=minimize(MAP_model2,var,method="BFGS",options={"disp":False})
#print(res2)
theta_MAP_2=res2.x
prediction_MAP_X2=[theta_MAP_2[0]+theta_MAP_2[1]*X2[i][0] + theta_MAP_2[2]*X2[i][1] for i in range(len(X2))]
print(prediction_MAP_X2)
#print(prediction_SCF_X2)

[89.05112770653797, 138.37905712481768, 41.923169748345, 134.32197107198905, 82.45121332627767]


In [176]:
from scipy.optimize import minimize
nvar=4
var=np.zeros(nvar)
var[0]=15
var[1]=2
var[2]=7
var[3]=15

res3=minimize(MAP_model3,var,method="BFGS",options={"disp":False})
#print(res3.x)
theta_MAP_3=res3.x
prediction_MAP_X3=[theta_MAP_3[0]+theta_MAP_3[1]*X3[i][0] + theta_MAP_3[2]*X3[i][1] + theta_MAP_3[3]*X3[i][2] for i in range(len(X3))]
print(prediction_MAP_X3)
#print(prediction_SCF_X3)


[73.99492030599427, 139.35163292379605, 41.98227386380541, 135.23524912592978, 95.56251751065486]
