# Makine Öğrenmesi - Doğrusal Regresyon Modelleri

## Çoklu Doğrusal Regresyon

### Model

In [1]:
from sklearn.linear_model import LinearRegression
import numpy as np
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error

In [2]:
df=pd.read_spss("akcigerhacmi.sav")
df

Unnamed: 0,Yas,Cinsiyet,BMI,sagAK,solAK
0,1.0,1.0,31.2,2.4013,2.0149
1,1.0,1.0,18.4,2.3174,2.4541
2,1.0,1.0,21.5,3.5404,3.1376
3,1.0,1.0,29.6,3.4055,2.9768
4,1.0,1.0,29.4,2.4326,2.2114
...,...,...,...,...,...
269,3.0,2.0,25.3,2.1609,1.8604
270,3.0,2.0,27.4,1.8867,1.5776
271,3.0,2.0,25.2,2.4417,2.3721
272,3.0,2.0,25.7,2.2499,2.0001


In [11]:
X=df.drop(["solAK","sagAK"],axis=1)
y = df[["solAK"]]

In [12]:
y.head()

Unnamed: 0,solAK
0,2.0149
1,2.4541
2,3.1376
3,2.9768
4,2.2114


In [13]:
X.head()

Unnamed: 0,Yas,Cinsiyet,BMI
0,1.0,1.0,31.2
1,1.0,1.0,18.4
2,1.0,1.0,21.5
3,1.0,1.0,29.6
4,1.0,1.0,29.4


In [14]:
#Statsmodels ile model kurmak

In [15]:
import statsmodels.api as sm

In [16]:
lm = sm.OLS(y, X)

In [17]:
model = lm.fit()

In [18]:
model.summary()

0,1,2,3
Dep. Variable:,solAK,R-squared (uncentered):,0.873
Model:,OLS,Adj. R-squared (uncentered):,0.871
Method:,Least Squares,F-statistic:,618.9
Date:,"Mon, 08 Mar 2021",Prob (F-statistic):,6.74e-121
Time:,15:09:18,Log-Likelihood:,-319.49
No. Observations:,274,AIC:,645.0
Df Residuals:,271,BIC:,655.8
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Yas,0.1319,0.062,2.126,0.034,0.010,0.254
Cinsiyet,-0.2052,0.084,-2.445,0.015,-0.371,-0.040
BMI,0.0763,0.007,11.640,0.000,0.063,0.089

0,1,2,3
Omnibus:,1.327,Durbin-Watson:,1.596
Prob(Omnibus):,0.515,Jarque-Bera (JB):,1.05
Skew:,0.128,Prob(JB):,0.591
Kurtosis:,3.164,Cond. No.,48.5


In [None]:
##scikit learn ile model kurmak

In [19]:
from sklearn.linear_model import LinearRegression
lm  = LinearRegression()

In [20]:
model = lm.fit(X, y)

In [21]:
model.intercept_

array([3.70088246])

In [22]:
model.coef_

array([[-0.15567349, -0.72865699, -0.00754051]])

### Model Tuning (Model Doğrulama)

In [23]:
import numpy as np

In [24]:
X.head()

Unnamed: 0,Yas,Cinsiyet,BMI
0,1.0,1.0,31.2
1,1.0,1.0,18.4
2,1.0,1.0,21.5
3,1.0,1.0,29.6
4,1.0,1.0,29.4


In [25]:
y.head()

Unnamed: 0,solAK
0,2.0149
1,2.4541
2,3.1376
3,2.9768
4,2.2114


In [26]:
#sinama seti
from sklearn.model_selection import train_test_split

In [27]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state = 1)

In [28]:
X_train.head()

Unnamed: 0,Yas,Cinsiyet,BMI
67,2.0,1.0,25.0
88,2.0,1.0,29.4
28,1.0,2.0,23.2
16,1.0,1.0,23.5
185,3.0,1.0,24.5


In [29]:
y_train.head()

Unnamed: 0,solAK
67,2.0923
88,2.475
28,1.6821
16,3.6898
185,1.7358


In [30]:
X_test.head()

Unnamed: 0,Yas,Cinsiyet,BMI
58,2.0,1.0,26.5
101,2.0,1.0,30.7
186,3.0,1.0,34.9
116,2.0,2.0,24.2
11,1.0,1.0,29.6


In [31]:
y_test.head()

Unnamed: 0,solAK
58,2.3819
101,3.2573
186,1.9145
116,2.0512
11,2.5876


In [32]:
lm = LinearRegression()
model = lm.fit(X_train, y_train)

In [33]:
#egitim hatasi
np.sqrt(mean_squared_error(y_train, model.predict(X_train)))

0.5408394556139733

In [34]:
#test hatasi
np.sqrt(mean_squared_error(y_test, model.predict(X_test)))

0.505731406385501

In [35]:
#k-katlı cv

In [36]:
from sklearn.model_selection import cross_val_score

In [37]:
cross_val_score(model, X_train, y_train, cv = 10, scoring = "neg_mean_squared_error")

array([-0.42516293, -0.22603215, -0.21159038, -0.17248757, -0.42580554,
       -0.20425621, -0.42464361, -0.35772457, -0.25405908, -0.30068212])

In [38]:
#cv mse
np.mean(-cross_val_score(model, X_train, y_train, cv = 10, scoring = "neg_mean_squared_error"))

0.3002444174938042

In [39]:
#cv rmse
np.sqrt(np.mean(-cross_val_score(model, X_train, y_train, cv = 10, scoring = "neg_mean_squared_error")))

0.547945633702655

In [40]:
#cv rmse
np.sqrt(np.mean(-cross_val_score(model, X, y, cv = 10, scoring = "neg_mean_squared_error")))

0.5415494832041916

In [41]:
X=df.drop(["solAK","sagAK"],axis=1)
y = df[["sagAK"]]

In [42]:
X.head()

Unnamed: 0,Yas,Cinsiyet,BMI
0,1.0,1.0,31.2
1,1.0,1.0,18.4
2,1.0,1.0,21.5
3,1.0,1.0,29.6
4,1.0,1.0,29.4


In [43]:
y.head()

Unnamed: 0,sagAK
0,2.4013
1,2.3174
2,3.5404
3,3.4055
4,2.4326


In [45]:
lm = sm.OLS(y, X)

In [46]:
model = lm.fit()

In [47]:
model.summary()

0,1,2,3
Dep. Variable:,sagAK,R-squared (uncentered):,0.895
Model:,OLS,Adj. R-squared (uncentered):,0.893
Method:,Least Squares,F-statistic:,766.0
Date:,"Mon, 08 Mar 2021",Prob (F-statistic):,5.5100000000000006e-132
Time:,15:13:24,Log-Likelihood:,-331.53
No. Observations:,274,AIC:,669.1
Df Residuals:,271,BIC:,679.9
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Yas,0.1872,0.065,2.888,0.004,0.060,0.315
Cinsiyet,-0.2262,0.088,-2.579,0.010,-0.399,-0.054
BMI,0.0853,0.007,12.452,0.000,0.072,0.099

0,1,2,3
Omnibus:,3.447,Durbin-Watson:,1.623
Prob(Omnibus):,0.178,Jarque-Bera (JB):,3.375
Skew:,0.161,Prob(JB):,0.185
Kurtosis:,3.438,Cond. No.,48.5


In [49]:
from sklearn.linear_model import LinearRegression
lm  = LinearRegression()

In [50]:
model = lm.fit(X, y)

In [51]:
model.intercept_

array([3.99015435])

In [52]:
model.coef_

array([[-0.12284225, -0.79054399, -0.00511183]])

In [53]:
from sklearn.model_selection import train_test_split

In [55]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state = 1)

In [56]:
X_train.head()

Unnamed: 0,Yas,Cinsiyet,BMI
67,2.0,1.0,25.0
88,2.0,1.0,29.4
28,1.0,2.0,23.2
16,1.0,1.0,23.5
185,3.0,1.0,24.5


In [57]:
y_train.head()

Unnamed: 0,sagAK
67,2.4011
88,3.2606
28,1.8989
16,4.1964
185,2.6532


In [58]:
X_test.head()

Unnamed: 0,Yas,Cinsiyet,BMI
58,2.0,1.0,26.5
101,2.0,1.0,30.7
186,3.0,1.0,34.9
116,2.0,2.0,24.2
11,1.0,1.0,29.6


In [59]:
y_test.head()

Unnamed: 0,sagAK
58,2.4143
101,3.4604
186,2.1767
116,2.2884
11,2.6872


In [60]:
lm = LinearRegression()
model = lm.fit(X_train, y_train)

In [61]:
#egitim hatasi
np.sqrt(mean_squared_error(y_train, model.predict(X_train)))

0.5455035435851339

In [62]:
#test hatasi
np.sqrt(mean_squared_error(y_test, model.predict(X_test)))

0.5076854143462117

In [63]:
#kkatlı cros 
from sklearn.model_selection import cross_val_score

In [64]:
cross_val_score(model, X_train, y_train, cv = 10, scoring = "neg_mean_squared_error")

array([-0.52533513, -0.19106929, -0.17738937, -0.14670987, -0.52523231,
       -0.17866986, -0.35064715, -0.32129245, -0.23067429, -0.39417556])

In [65]:
#cv mse
np.mean(-cross_val_score(model, X_train, y_train, cv = 10, scoring = "neg_mean_squared_error"))

0.30411952713290896

In [66]:
#cv rmse
np.sqrt(np.mean(-cross_val_score(model, X_train, y_train, cv = 10, scoring = "neg_mean_squared_error")))

0.551470332051425

In [67]:
#cv rmse
np.sqrt(np.mean(-cross_val_score(model, X, y, cv = 10, scoring = "neg_mean_squared_error")))

0.545168721292374

In [None]:
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Diabetes Prediction Project</title>
</head>
<body>

    <div>
    <h1>Diabetes Prediction Project</h1>

    <form action="{{ url_for('predict')}}"method="post">
    
        Yas: <input type="text" name="Yas" id="Yas" placeholder="Yas">
        <br> <br>
        Cinsiyet: <input type="text" name="Cinsiyet" id="Cinsiyet" placeholder="Cinsiyet">
        <br> <br>
        BMI: <input type="text" name="BMI" id="BMI" placeholder="BMI">
        <br> <br>
        <button type="submit">Predict</button>

    </form>
    <br><br>
    {{pred}}
</div>
</body>
</html>

In [None]:
import numpy as np
from flask import Flask, request, jsonify, render_template
import pickle

app = Flask(__name__)
model = pickle.load(open("model.pkl","rb"))

@app.route('/')
def home():
    return render_template("index.html")

@app.route("/predict",methods=["POST"])
def predict():
    features = [str(x) for x in request.form.values()]

    final_features = [np.array(features)]
    prediction = model.predict_proba(final_features)

    output='{0:.{1}f}'.format(prediction[0][1], 2)


    return render_template('index.html',pred='Your probability of diabetes is % {}'.format(str(float(output)*100)))

if __name__ == "__main__":
    app.run(debug=True)
view rawdiabetDeploy1.py hosted with ❤ by GitHub