In [1]:
# 1. Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import pickle

# 2. Load the dataset
dataset = pd.read_csv("50_Startups.csv")

# 3. Convert 'State' categorical column into dummy variables (drop_first=True avoids dummy variable trap)
dataset = pd.get_dummies(dataset, columns=['State'], drop_first=True)

# 4. Define independent and dependent variables
independent = dataset[['R&D Spend', 'Administration', 'Marketing Spend', 'State_Florida', 'State_New York']]
dependent = dataset[['Profit']]

# 5. Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(independent, dependent, test_size=0.30, random_state=0)

# 6. Train the Linear Regression model
regressor = LinearRegression()
regressor.fit(X_train, y_train)

# 7. Get model coefficients and intercept
weight = regressor.coef_
bias = regressor.intercept_

# 8. Predict on test data
y_pred = regressor.predict(X_test)

# 9. Evaluate model performance
r_score = r2_score(y_test, y_pred)

# 10. Save the trained model
filename = "finalized_model_mul_linear.sav"
pickle.dump(regressor, open(filename, 'wb'))

# 11. Load the model and make a prediction
loaded_model = pickle.load(open(filename, 'rb'))
result = loaded_model.predict([[1234, 345, 4565, 1, 0]])  # Example input

# 12. Display outputs
print("Model Coefficients:", weight)
print("Model Intercept:", bias)
print("R² Score:", r_score)
print("Predicted Profit:", result)

Model Coefficients: [[7.90840255e-01 3.01968165e-02 3.10148566e-02 4.63028992e+02
  3.04799573e+02]]
Model Intercept: [42403.87087053]
R² Score: 0.9358680970046241
Predicted Profit: [[43994.79745873]]


