In [None]:

# 📥 Data Collection
import pandas as pd
dataset = pd.read_csv('50_Startups.csv')  # Load dataset

# 🧹 Data Preprocessing
# Convert categorical 'State' column into dummy variables and drop the first to avoid multicollinearity
dataset = pd.get_dummies(dataset, drop_first=True)

# 🔀 Input / Output Split
X = dataset[['R&D Spend', 'Administration', 'Marketing Spend', 'State_Florida', 'State_New York']]
y = dataset['Profit']

# 📊 Split Train and Test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

# 🏗️ Model Creation (SVM Regressor)
from sklearn.svm import SVR
regressor = SVR(kernel='rbf',)  # You can also try 'linear', 'poly', etc.

# 🏋️ Train Model
# SVM requires feature scaling
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
sc_y = StandardScaler()

X_train_scaled = sc_X.fit_transform(X_train)
X_test_scaled = sc_X.transform(X_test)
y_train_scaled = sc_y.fit_transform(y_train.values.reshape(-1, 1)).ravel()

regressor.fit(X_train_scaled, y_train_scaled)

# 🧪 Test Set Prediction
y_pred_scaled = regressor.predict(X_test_scaled)
y_pred = sc_y.inverse_transform(y_pred_scaled.reshape(-1, 1))

# 📈 Evaluation Metrics
from sklearn.metrics import r2_score
r_squared = r2_score(y_test, y_pred)
print("R-squared:", r_squared)  # Closer to 1 means better model

# 💾 Save the Best Model
import pickle
filename = 'SVM_Regression_Model.pkl'
pickle.dump((regressor, sc_X, sc_y), open(filename, 'wb'))



# 📦 Load the Saved Model
loaded_model, loaded_sc_X, loaded_sc_y = pickle.load(open(filename, 'rb'))

# 🧾 Get Inputs & 🔮 Predict
# Predict on test set
X_test_scaled = loaded_sc_X.transform(X_test)
predictions_scaled = loaded_model.predict(X_test_scaled)
predictions = loaded_sc_y.inverse_transform(predictions_scaled.reshape(-1, 1))
print("Predictions:", predictions)

# Predict for a new data point
new_data = [[160000, 130000, 300000, 0, 1]]
new_data_scaled = loaded_sc_X.transform(new_data)
new_prediction_scaled = loaded_model.predict(new_data_scaled)
new_prediction = loaded_sc_y.inverse_transform(new_prediction_scaled.reshape(-1, 1))
print("Prediction for new input:", new_prediction)

# 📣 Call to Action
# These predictions can be integrated into a business dashboard or web app for decision-making.

R-squared: 0.6770859937361201
Predictions: [[107317.84346549]
 [123229.16015796]
 [133228.44937977]
 [ 87967.15730496]
 [142383.33002246]
 [121688.43746896]
 [ 81046.06771789]
 [102788.82165891]
 [122750.14441838]
 [136041.69155237]
 [101791.80044773]
 [ 92672.68669778]
 [115446.83940885]
 [100856.08034009]
 [131925.63243054]]
Prediction for new input: [[168040.41726758]]


