In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# Load the dataset
df = pd.read_csv('homes.csv')

# Selecting the predictor (number of bedrooms) and the target variable (home price)
X = df[['beds']]  # Predictor
y = df['price']   # Target

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Building the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Extracting the coefficient for the number of bedrooms
bedroom_coefficient = model.coef_[0]

# Display the estimated increase in average price for each additional bedroom
print(f'Estimated increase in average price for each additional bedroom: ${bedroom_coefficient:.2f}')


Estimated increase in average price for each additional bedroom: $57247.87


In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# Load the dataset
df = pd.read_csv('homes.csv')

# Selecting multiple predictors for the MLR model
X_mlr = df[['lotsize', 'area', 'beds', 'baths', 'garage']]
y_mlr = df['price']

# Splitting the dataset into training and testing sets for the MLR model
X_train_mlr, X_test_mlr, y_train_mlr, y_test_mlr = train_test_split(X_mlr, y_mlr, test_size=0.2, random_state=42)

# Building the MLR model
model_mlr = LinearRegression()
model_mlr.fit(X_train_mlr, y_train_mlr)

# Extracting the coefficient for the number of bedrooms within the MLR model
bedroom_coefficient_mlr = model_mlr.coef_[2]

# Display the estimated change in average price for each additional bedroom
print(f'Estimated change in average price for each additional bedroom: ${bedroom_coefficient_mlr:.2f}')


Estimated change in average price for each additional bedroom: $-7310.21


In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# Load the dataset
df = pd.read_csv('homes.csv')

# Selecting multiple predictors for the MLR model
X_mlr = df[['lotsize', 'area', 'beds', 'baths', 'garage']]
y_mlr = df['price']

# Splitting the dataset into training and testing sets for the MLR model
X_train_mlr, X_test_mlr, y_train_mlr, y_test_mlr = train_test_split(X_mlr, y_mlr, test_size=0.2, random_state=42)

# Building the MLR model
model_mlr = LinearRegression()
model_mlr.fit(X_train_mlr, y_train_mlr)

# House features for prediction: lot size = 24500 sqft, area = 2650 sqft, beds = 3, baths = 3, garage = 2
house_features = [[24500, 2650, 3, 3, 2]]

# Predicting the price of the house with the given features
predicted_price = model_mlr.predict(house_features)

# Display the predicted price
print(f'The predicted price of the house is: ${predicted_price[0]:.2f}')


The predicted price of the house is: $323638.80




In [6]:
import pandas as pd

# Load the dataset
df_insurance = pd.read_csv('insurance.csv')

# Display the first five patients from the loaded dataset
print("First five patients (before transformation):")
print(df_insurance.head())

# Transform the categorical variables 'sex' and 'smoker' to numeric (0/1)
df2 = pd.get_dummies(df_insurance, columns=['sex', 'smoker'], drop_first=True)

# Display the first five patients after transformation
print("\nFirst five patients (after transformation):")
print(df2.head())


First five patients (before transformation):
   age     sex   bmi  children smoker  expenses
0   19  female  27.9         0    yes  16884.92
1   18    male  33.8         1     no   1725.55
2   28    male  33.0         3     no   4449.46
3   33    male  22.7         0     no  21984.47
4   32    male  28.9         0     no   3866.86

First five patients (after transformation):
   age   bmi  children  expenses  sex_male  smoker_yes
0   19  27.9         0  16884.92     False        True
1   18  33.8         1   1725.55      True       False
2   28  33.0         3   4449.46      True       False
3   33  22.7         0  21984.47      True       False
4   32  28.9         0   3866.86      True       False


In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Load the dataset
df_insurance = pd.read_csv('insurance.csv')

# Transform the categorical variables 'sex' and 'smoker' to numeric (0/1)
df2 = pd.get_dummies(df_insurance, columns=['sex', 'smoker'], drop_first=True)

# Define the predictors and the target variable
X = df2[['age', 'bmi', 'children', 'sex_male', 'smoker_yes']]
y = df2['expenses']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict the expenses on the testing set
y_pred = model.predict(X_test)

# Calculate R-squared
r_squared = r2_score(y_test, y_pred)

# Calculate Adjusted R-squared
n = len(y_test)  # Number of observations
p = X_train.shape[1]  # Number of predictors
adjusted_r_squared = 1 - ((1 - r_squared) * (n - 1) / (n - p - 1))

print(f'R-squared: {r_squared:.3f}')
print(f'Adjusted R-squared: {adjusted_r_squared:.3f}')


R-squared: 0.781
Adjusted R-squared: 0.777


In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# Load the dataset
df_insurance = pd.read_csv('insurance.csv')

# Transform the categorical variables 'sex' and 'smoker' to numeric (0/1)
df2 = pd.get_dummies(df_insurance, columns=['sex', 'smoker'], drop_first=True)

# Creating a binary variable BMI_ind equal to 1 if the patient's BMI is at least 30, and 0 otherwise
df2['BMI_ind'] = df2.bmi >= 30
# Converting the boolean values to integers
df2['BMI_ind'] = df2['BMI_ind'].astype(int)

# Display the first five rows of the DataFrame to verify the addition of BMI_ind
print(df2.head())
# 
# # (Optional) Following steps could be for building a regression model, for example
# # Define the predictors and the target variable
# X = df2[['age', 'bmi', 'children', 'sex_male', 'smoker_yes', 'BMI_ind']]
# y = df2['expenses']
# 
# # Split the dataset into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 
# # Build the linear regression model
# model = LinearRegression()
# model.fit(X_train, y_train)
# 
# # Predict the expenses on the testing set and calculate performance metrics (as needed)


   age   bmi  children  expenses  sex_male  smoker_yes  BMI_ind
0   19  27.9         0  16884.92     False        True        0
1   18  33.8         1   1725.55      True       False        1
2   28  33.0         3   4449.46      True       False        1
3   33  22.7         0  21984.47      True       False        0
4   32  28.9         0   3866.86      True       False        0


In [11]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# Load the dataset
df_insurance = pd.read_csv('insurance.csv')

# Transform the categorical variables 'sex' and 'smoker' to numeric (0/1)
df2 = pd.get_dummies(df_insurance, columns=['sex', 'smoker'], drop_first=True)

# Creating a binary variable BMI_ind equal to 1 if the patient's BMI is at least 30, and 0 otherwise
df2['BMI_ind'] = (df2.bmi >= 30).astype(int)

# Creating an interaction variable for BMI_ind and smoker_yes
df2['BMI_ind_smoker'] = df2['BMI_ind'] * df2['smoker_yes']

# Display the first five rows of the DataFrame to confirm the additions
print(df2.head())

# # (Optional) Following steps could be for building and evaluating a regression model
# # Define the predictors including the interaction term and the target variable
# X = df2[['age', 'bmi', 'children', 'sex_male', 'smoker_yes', 'BMI_ind', 'BMI_ind_smoker']]
# y = df2['expenses']
# 
# # Split the dataset into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 
# # Build the linear regression model
# model = LinearRegression()
# model.fit(X_train, y_train)
# 
# # (Optional) Predict the expenses on the testing set and calculate performance metrics


   age   bmi  children  expenses  sex_male  smoker_yes  BMI_ind  \
0   19  27.9         0  16884.92     False        True        0   
1   18  33.8         1   1725.55      True       False        1   
2   28  33.0         3   4449.46      True       False        1   
3   33  22.7         0  21984.47      True       False        0   
4   32  28.9         0   3866.86      True       False        0   

   BMI_ind_smoker  
0               0  
1               0  
2               0  
3               0  
4               0  


In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Load the dataset
df_insurance = pd.read_csv('insurance.csv')

# Transform the categorical variables 'sex' and 'smoker' to numeric (0/1)
df2 = pd.get_dummies(df_insurance, columns=['sex', 'smoker'], drop_first=True)

# Creating a binary variable BMI_ind equal to 1 if the patient's BMI is at least 30, and 0 otherwise
df2['BMI_ind'] = (df2.bmi >= 30).astype(int)

# Creating an interaction variable for BMI_ind and smoker_yes
df2['BMI_ind_smoker'] = df2['BMI_ind'] * df2['smoker_yes']

# Define the predictors including the new interaction term and the target variable
X = df2[['age', 'bmi', 'children', 'sex_male', 'smoker_yes', 'BMI_ind', 'BMI_ind_smoker']]
y = df2['expenses']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Predict the expenses on the testing set
y_pred = model.predict(X_test)

# Calculate R-squared and Adjusted R-squared
r_squared = r2_score(y_test, y_pred)
n = len(y_test)  # Number of observations
p = X_train.shape[1]  # Number of predictors
adjusted_r_squared = 1 - ((1 - r_squared) * (n - 1) / (n - p - 1))

print(f'R-squared: {r_squared:.3f}')
print(f'Adjusted R-squared: {adjusted_r_squared:.3f}')


R-squared: 0.873
Adjusted R-squared: 0.870
