# پیش‌بینی هزینه‌های پزشکی
Dataset: Medical Cost Personal Dataset

In [1]:

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression


## بارگذاری دیتاست

In [2]:

df = pd.read_csv("insurance.csv")
df.head()


Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


## پیش‌پردازش داده‌ها

In [None]:
df = pd.read_csv("insurance.csv")

df_encoded = pd.get_dummies(df, drop_first=True)

X = df_encoded.drop('charges', axis=1)
y = df_encoded['charges']

X = X.astype(np.float32)
y = y.astype(np.float32)

scaler = StandardScaler()
X[['age', 'bmi']] = scaler.fit_transform(X[['age', 'bmi']])

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


## رگرسیون خطی

In [16]:

lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

y_pred_lr = lin_reg.predict(X_test)
mse_lr = mean_squared_error(y_test, y_pred_lr)
mse_lr


33596920.0

## تبدیل به Tensor

In [17]:
X_train_t = torch.tensor(X_train.to_numpy(), dtype=torch.float32)
X_test_t  = torch.tensor(X_test.to_numpy(), dtype=torch.float32)

y_train_t = torch.tensor(y_train.to_numpy(), dtype=torch.float32).view(-1, 1)
y_test_t  = torch.tensor(y_test.to_numpy(), dtype=torch.float32).view(-1, 1)


## شبکه عصبی

In [18]:

model = nn.Sequential(
    nn.Linear(X_train_t.shape[1], 64),
    nn.ReLU(),
    nn.Linear(64, 64),
    nn.ReLU(),
    nn.Linear(64, 1)
)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)


In [19]:

for epoch in range(100):
    optimizer.zero_grad()
    preds = model(X_train_t)
    loss = criterion(preds, y_train_t)
    loss.backward()
    optimizer.step()

loss.item()


114222624.0

In [20]:

with torch.no_grad():
    test_preds = model(X_test_t)
    mse_nn = criterion(test_preds, y_test_t)

mse_nn.item()


114655320.0