# Makine Öğrenmesi - Doğrusal Regresyon Modelleri

## Çoklu Doğrusal Regresyon

### Model

In [1]:
from sklearn.linear_model import LinearRegression
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error

In [2]:
df=pd.read_spss("akcigerhacmi.sav")
df

Unnamed: 0,Yas,Cinsiyet,Boy,sagAK,solAK
0,39.0,1.0,180.0,4.1964,3.6898
1,39.0,1.0,180.0,4.1964,3.6898
2,38.0,1.0,168.0,3.3459,1.8142
3,37.0,1.0,179.0,2.6872,2.5876
4,37.0,1.0,179.0,2.6872,2.5876
...,...,...,...,...,...
269,69.0,2.0,160.0,1.9588,1.7471
270,67.0,2.0,152.0,2.1085,1.8567
271,80.0,2.0,151.0,1.6779,1.3923
272,69.0,2.0,158.0,2.2558,1.8196


In [3]:
X=df.drop(["solAK","sagAK"],axis=1)
y = df[["solAK"]]

In [4]:
y.head()

Unnamed: 0,solAK
0,3.6898
1,3.6898
2,1.8142
3,2.5876
4,2.5876


In [5]:
X.head()

Unnamed: 0,Yas,Cinsiyet,Boy
0,39.0,1.0,180.0
1,39.0,1.0,180.0
2,38.0,1.0,168.0
3,37.0,1.0,179.0
4,37.0,1.0,179.0


In [6]:
#Statsmodels ile model kurmak

In [7]:
import statsmodels.api as sm

In [8]:
lm = sm.OLS(y, X)

In [9]:
model = lm.fit()

In [10]:
model.summary()

0,1,2,3
Dep. Variable:,solAK,R-squared (uncentered):,0.948
Model:,OLS,Adj. R-squared (uncentered):,0.947
Method:,Least Squares,F-statistic:,1644.0
Date:,"Fri, 19 Mar 2021",Prob (F-statistic):,1.7300000000000002e-173
Time:,02:12:23,Log-Likelihood:,-197.04
No. Observations:,274,AIC:,400.1
Df Residuals:,271,BIC:,410.9
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Yas,-0.0061,0.002,-3.433,0.001,-0.010,-0.003
Cinsiyet,-0.5376,0.055,-9.774,0.000,-0.646,-0.429
Boy,0.0195,0.001,25.756,0.000,0.018,0.021

0,1,2,3
Omnibus:,0.555,Durbin-Watson:,2.108
Prob(Omnibus):,0.758,Jarque-Bera (JB):,0.674
Skew:,-0.03,Prob(JB):,0.714
Kurtosis:,2.765,Cond. No.,320.0


In [11]:
##scikit learn ile model kurmak

In [12]:
from sklearn.linear_model import LinearRegression
lm  = LinearRegression()

In [13]:
model = lm.fit(X, y)

In [14]:
model.intercept_

array([-2.21076577])

In [15]:
model.coef_

array([[-0.00413004, -0.39457421,  0.0308151 ]])

### Model Tuning (Model Doğrulama)

In [16]:
import numpy as np

In [17]:
X.head()

Unnamed: 0,Yas,Cinsiyet,Boy
0,39.0,1.0,180.0
1,39.0,1.0,180.0
2,38.0,1.0,168.0
3,37.0,1.0,179.0
4,37.0,1.0,179.0


In [18]:
y.head()

Unnamed: 0,solAK
0,3.6898
1,3.6898
2,1.8142
3,2.5876
4,2.5876


In [19]:
#sinama seti
from sklearn.model_selection import train_test_split

In [20]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state = 1)

In [21]:
X_train.head()

Unnamed: 0,Yas,Cinsiyet,Boy
67,59.0,1.0,163.0
88,72.0,1.0,170.0
28,42.0,1.0,181.0
16,27.0,1.0,183.0
185,63.0,2.0,157.0


In [22]:
y_train.head()

Unnamed: 0,solAK
67,1.5321
88,3.2278
28,3.6059
16,2.8271
185,2.139


In [23]:
X_test.head()

Unnamed: 0,Yas,Cinsiyet,Boy
58,58.0,1.0,165.0
101,68.0,1.0,168.0
186,61.0,2.0,165.0
116,77.0,1.0,157.0
11,32.0,1.0,168.0


In [24]:
y_test.head()

Unnamed: 0,solAK
58,2.829
101,2.6936
186,1.1344
116,1.4571
11,2.8254


In [25]:
lm = LinearRegression()
model = lm.fit(X_train, y_train)

In [37]:
model.intercept_

array([-2.23315622])

In [26]:
model.coef_

array([[-0.00541754, -0.38942238,  0.03149909]])

In [27]:
prediction=model.predict([[2,1,30]])

In [28]:
prediction[0][0]

-1.6884409198396741

In [29]:
#egitim hatasi
np.sqrt(mean_squared_error(y_train, model.predict(X_train)))

0.4843941248787055

In [30]:
#test hatasi
np.sqrt(mean_squared_error(y_test, model.predict(X_test)))

0.5077179410179467

In [31]:
#k-katlı cv

In [32]:
from sklearn.model_selection import cross_val_score

In [33]:
cross_val_score(model, X_train, y_train, cv = 10, scoring = "neg_mean_squared_error")

array([-0.34224911, -0.13816714, -0.15882702, -0.19708389, -0.30877384,
       -0.26136178, -0.33456977, -0.21428333, -0.29550605, -0.19509384])

In [34]:
#cv mse
np.mean(-cross_val_score(model, X_train, y_train, cv = 10, scoring = "neg_mean_squared_error"))

0.2445915765845581

In [35]:
#cv rmse
np.sqrt(np.mean(-cross_val_score(model, X_train, y_train, cv = 10, scoring = "neg_mean_squared_error")))

0.4945620047926833

In [36]:
#cv rmse
np.sqrt(np.mean(-cross_val_score(model, X, y, cv = 10, scoring = "neg_mean_squared_error")))

0.5007922612143351

In [38]:
X=df.drop(["solAK","sagAK"],axis=1)
y = df[["sagAK"]]

In [39]:
X.head()

Unnamed: 0,Yas,Cinsiyet,Boy
0,39.0,1.0,180.0
1,39.0,1.0,180.0
2,38.0,1.0,168.0
3,37.0,1.0,179.0
4,37.0,1.0,179.0


In [40]:
y.head()

Unnamed: 0,sagAK
0,4.1964
1,4.1964
2,3.3459
3,2.6872
4,2.6872


In [41]:
lm = sm.OLS(y, X)

In [42]:
model = lm.fit()

In [43]:
model.summary()

0,1,2,3
Dep. Variable:,sagAK,R-squared (uncentered):,0.96
Model:,OLS,Adj. R-squared (uncentered):,0.96
Method:,Least Squares,F-statistic:,2186.0
Date:,"Fri, 19 Mar 2021",Prob (F-statistic):,1.63e-189
Time:,02:14:06,Log-Likelihood:,-198.05
No. Observations:,274,AIC:,402.1
Df Residuals:,271,BIC:,412.9
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Yas,-0.0045,0.002,-2.524,0.012,-0.008,-0.001
Cinsiyet,-0.5826,0.055,-10.553,0.000,-0.691,-0.474
Boy,0.0214,0.001,28.127,0.000,0.020,0.023

0,1,2,3
Omnibus:,4.103,Durbin-Watson:,1.962
Prob(Omnibus):,0.129,Jarque-Bera (JB):,3.855
Skew:,0.226,Prob(JB):,0.145
Kurtosis:,3.365,Cond. No.,320.0


In [44]:
from sklearn.linear_model import LinearRegression
lm  = LinearRegression()

In [45]:
model = lm.fit(X, y)

In [46]:
model.intercept_

array([-1.86292403])

In [47]:
model.coef_

array([[-0.00283737, -0.4620499 ,  0.03090377]])

In [48]:
from sklearn.model_selection import train_test_split

In [49]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state = 1)

In [50]:
X_train.head()

Unnamed: 0,Yas,Cinsiyet,Boy
67,59.0,1.0,163.0
88,72.0,1.0,170.0
28,42.0,1.0,181.0
16,27.0,1.0,183.0
185,63.0,2.0,157.0


In [51]:
y_train.head()

Unnamed: 0,sagAK
67,2.2323
88,4.0726
28,3.7486
16,3.1939
185,2.4576


In [52]:
X_test.head()

Unnamed: 0,Yas,Cinsiyet,Boy
58,58.0,1.0,165.0
101,68.0,1.0,168.0
186,61.0,2.0,165.0
116,77.0,1.0,157.0
11,32.0,1.0,168.0


In [53]:
y_test.head()

Unnamed: 0,sagAK
58,3.3609
101,2.9678
186,1.576
116,1.6259
11,3.2282


In [54]:
lm = LinearRegression()
model = lm.fit(X_train, y_train)

In [55]:
#egitim hatasi
np.sqrt(mean_squared_error(y_train, model.predict(X_train)))

0.4888572473128394

In [56]:
#test hatasi
np.sqrt(mean_squared_error(y_test, model.predict(X_test)))

0.509009876789429

In [57]:
#kkatlı cros 
from sklearn.model_selection import cross_val_score

In [58]:
cross_val_score(model, X_train, y_train, cv = 10, scoring = "neg_mean_squared_error")

array([-0.36931589, -0.23369311, -0.12782527, -0.29593907, -0.21571869,
       -0.31020804, -0.32102684, -0.21323745, -0.20205964, -0.19848492])

In [59]:
#cv mse
np.mean(-cross_val_score(model, X_train, y_train, cv = 10, scoring = "neg_mean_squared_error"))

0.2487508906265418

In [60]:
#cv rmse
np.sqrt(np.mean(-cross_val_score(model, X_train, y_train, cv = 10, scoring = "neg_mean_squared_error")))

0.4987493264421936

In [61]:
#cv rmse
np.sqrt(np.mean(-cross_val_score(model, X, y, cv = 10, scoring = "neg_mean_squared_error")))

0.5070214371557151

In [62]:
model.intercept_

array([-1.77600545])

In [63]:
model.coef_

array([[-0.0037174, -0.4798962,  0.0309997]])

In [None]:
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Diabetes Prediction Project</title>
</head>
<body>

    <div>
    <h1>Diabetes Prediction Project</h1>

    <form action="{{ url_for('predict')}}"method="post">
    
        Yas: <input type="text" name="Yas" id="Yas" placeholder="Yas">
        <br> <br>
        Cinsiyet: <input type="text" name="Cinsiyet" id="Cinsiyet" placeholder="Cinsiyet">
        <br> <br>
        BMI: <input type="text" name="BMI" id="BMI" placeholder="BMI">
        <br> <br>
        <button type="submit">Predict</button>

    </form>
    <br><br>
    {{pred}}
</div>
</body>
</html>

In [None]:
import numpy as np
from flask import Flask, request, jsonify, render_template
import pickle

app = Flask(__name__)
model = pickle.load(open("model.pkl","rb"))

@app.route('/')
def home():
    return render_template("index.html")

@app.route("/predict",methods=["POST"])
def predict():
    features = [str(x) for x in request.form.values()]

    final_features = [np.array(features)]
    prediction = model.predict_proba(final_features)

    output='{0:.{1}f}'.format(prediction[0][1], 2)


    return render_template('index.html',pred='Your probability of diabetes is % {}'.format(str(float(output)*100)))

if __name__ == "__main__":
    app.run(debug=True)
view rawdiabetDeploy1.py hosted with ❤ by GitHub

In [None]:
import numpy as np
from flask import Flask, request, jsonify, render_template
import pickle

app = Flask(__name__)
model = pickle.load(open("model.pkl","rb"))

@app.route('/')
def home():
    return render_template("index.html")

@app.route("/predict",methods=["POST"])
def predict():
    features = [int(x) for x in request.form.values()]

    final_features = [np.array(features)]
    prediction = model.predict(final_features)
    print(str(prediction))
    output='{0:.{1}f}'.format(prediction[0][0], 2)


    return render_template('index.html',pred='Sag Akciğer Hacmi {}'.format(str(float(output))))

if __name__ == "__main__":
    app.run(debug=True)
    

In [None]:
A=df.drop(["sagAK","solAK"],axis=1)
b = df[["solAK"]]

lm2 = LinearRegression()
A_train, A_test, b_train, b_test = train_test_split(A, b, test_size = 0.30, random_state = 1)

model2 = lm2.fit(A_train, b_train)
pipe2=make_pipeline(StandardScaler(),lm2)
pipe2.fit(A_train, b_train)
pickle.dump(pipe2, open("model2.pkl","wb"))

model2 = pickle.load(open("model2.pkl","rb"))