In [27]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 


np.random.seed(42)
subject_num = 50000

In [28]:
age = np.random.randint(18, 81, size=subject_num)
gender = np.random.choice([0, 1], size=subject_num, p=[0.5, 0.5])
height = np.random.randint(150, 201, size=subject_num)
symptom1 = np.random.choice([0, 1], size=subject_num, p=[0.7, 0.3])
symptom2 = np.random.choice([0, 1], size=subject_num, p=[0.8, 0.2])
symptom3 = np.random.choice([0, 1], size=subject_num, p=[0.9, 0.1])

In [29]:
base_tidal_volume = 500  
age_factor = 5 * (age - 30) / 50
gender_factor = 50 * gender  
height_factor = 2 * (height - 170)  
symptom_factor = -50 * (symptom1 + symptom2 + symptom3)

In [30]:
tidal_volume = base_tidal_volume + age_factor + gender_factor + height_factor + symptom_factor

In [31]:
dataset = pd.DataFrame({
    'Age': age,
    'Gender': gender,
    'Height': height,
    'Cough': symptom1,
    'Smoker': symptom2,
    'Asthma': symptom3,
    'Tidal_Volume': tidal_volume
})

In [32]:
dataset.to_csv('tidal_volume_data.csv', index=False)

In [33]:
print(dataset.head())

   Age  Gender  Height  Cough  Smoker  Asthma  Tidal_Volume
0   56       1     194      0       0       0         600.6
1   69       1     173      0       1       0         509.9
2   46       0     195      0       0       0         551.6
3   32       0     166      1       0       0         442.2
4   60       0     164      0       0       0         491.0


In [34]:
dataset = pd.read_csv("tidal_volume_data.csv")

In [35]:
data_set= pd.read_csv('tidal_volume_data.csv')    
X= data_set.iloc[:, 0:-1].values  
y= data_set.iloc[:,-1].values  

In [36]:
print(X)

[[ 56   1 194   0   0   0]
 [ 69   1 173   0   1   0]
 [ 46   0 195   0   0   0]
 ...
 [ 52   0 164   0   0   0]
 [ 37   1 195   1   0   1]
 [ 52   0 200   0   0   0]]


In [37]:
print(y)

[600.6 509.9 551.6 ... 490.2 500.7 562.2]


In [38]:
from sklearn.model_selection import train_test_split  
X_train, X_test, y_train, y_test= train_test_split(X, y, test_size= 0.25, random_state=0)  

In [39]:
from sklearn.preprocessing import StandardScaler    
sc= StandardScaler()    
X_train= sc.fit_transform(X_train)    
X_test= sc.transform(X_test)    

In [40]:
print(X_test)

[[ 1.53930165  1.00545488 -1.62837047  1.51329141 -0.49949995  2.95212227]
 [-0.66193549 -0.99457472 -1.69627197 -0.66081126  2.0020022  -0.33873936]
 [-0.16665713 -0.99457472 -1.69627197 -0.66081126 -0.49949995 -0.33873936]
 ...
 [-1.2672757   1.00545488  1.56300033 -0.66081126 -0.49949995 -0.33873936]
 [-0.05659527 -0.99457472  0.47657623 -0.66081126  2.0020022  -0.33873936]
 [ 0.6588068  -0.99457472  0.40867472 -0.66081126 -0.49949995 -0.33873936]]


In [41]:
from sklearn.ensemble import RandomForestRegressor
regressor1=RandomForestRegressor(n_estimators=10,random_state=0)
regressor1.fit(X_train,y_train)

In [42]:
y_pred1=regressor1.predict(X_test)

In [43]:
from sklearn.linear_model import LinearRegression
regressor2=LinearRegression()
regressor2.fit(X_train,y_train)

In [44]:
y_pred2=regressor2.predict(X_test)

In [45]:
from sklearn.tree import DecisionTreeRegressor
regressor3=DecisionTreeRegressor(random_state=0)
regressor3.fit(X_train,y_train)

In [46]:
y_pred3=regressor3.predict(X_test)

In [47]:
print(y_pred1)

[415.46 410.63 461.6  ... 605.58 475.8  525.1 ]


In [48]:
print(y_pred2)

[416.7 410.7 461.6 ... 605.6 475.8 525.1]


In [49]:
print(y_pred3)

[416.6 410.7 461.6 ... 605.5 475.9 525.1]


In [50]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

mae1 = mean_absolute_error(y_test, y_pred1)
mse1 = mean_squared_error(y_test, y_pred1)
rmse1 = mse1 ** 0.5

print(f'MAE 1: {mae1}')
print(f'MSE 1: {mse1}')
print(f'RMSE 1: {rmse1}')

MAE 1: 0.07545680000001864
MSE 1: 0.05359740000000016
RMSE 1: 0.2315111228429428


In [51]:
mae2 = mean_absolute_error(y_test, y_pred2)
mse2 = mean_squared_error(y_test, y_pred2)
rmse2 = mse2 ** 0.5

print(f'MAE 2: {mae2}')
print(f'MSE 2: {mse2}')
print(f'RMSE 2: {rmse2}')

MAE 2: 1.3526005204766988e-13
MSE 2: 2.675153799784776e-26
RMSE 2: 1.6355897406699444e-13


In [52]:
mae3 = mean_absolute_error(y_test, y_pred3)
mse3 = mean_squared_error(y_test, y_pred3)
rmse3 = mse3 ** 0.5

print(f'MAE 3: {mae3}')
print(f'MSE 3: {mse3}')
print(f'RMSE 3: {rmse3}')

MAE 3: 0.07700000000000203
MSE 3: 0.08305519999999973
RMSE 3: 0.28819299089325495
