In [6]:
# 📥 Data Collection
import pandas as pd
dataset = pd.read_csv('50_Startups.csv')  # Load dataset

# 🧹 Data Preprocessing
# Convert categorical 'State' column into dummy variables and drop the first to avoid multicollinearity
dataset = pd.get_dummies(dataset, drop_first=True)

# 🔀 Input / Output Split
X = dataset[['R&D Spend', 'Administration', 'Marketing Spend', 'State_Florida', 'State_New York']]
y = dataset['Profit']

# 📊 Split Train and Test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

# 🏗️ Model Creation (SVM Regressor)
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(n_estimators=100, random_state=0)

# 🏋️ Train Model
regressor.fit(X_train, y_train)

# 🧪 Test Set Prediction
y_pred = regressor.predict(X_test)

# 📈 Evaluation Metrics
from sklearn.metrics import r2_score
r_squared = r2_score(y_test, y_pred)
print("R-squared:", r_squared)  # Closer to 1 means better model

# 💾 Save the Best Model
import pickle
filename = 'RandomForest_Model.pkl'
pickle.dump(regressor, open(filename, 'wb'))


# 📦 Load the Saved Model
loaded_model = pickle.load(open(filename, 'rb'))

# 🧾 Get Inputs & 🔮 Predict
# Predict on test set
predictions = loaded_model.predict(X_test)
print("Predictions:", predictions)

# Predict for a new data point
new_data = [[160000, 130000, 300000, 0, 1]]
new_data_scaled = loaded_sc_X.transform(new_data)
new_prediction_scaled = loaded_model.predict(new_data_scaled)
new_prediction = loaded_sc_y.inverse_transform(new_prediction_scaled.reshape(-1, 1))
print("Prediction for new input:", new_prediction)

# 📣 Call to Action
# These predictions can be integrated into a business dashboard or web app for decision-making.

R-squared: 0.9460043548938504
Predictions: [104131.3085 134767.7027 136940.3837  79251.8251 182860.3155 115810.3632
  69942.9352 100347.8112 114842.7638 167117.7177 101048.768   89843.4912
 117257.0921  91658.5201 130868.7268]
Prediction for new input: [[1.51034037e+09]]


