In [None]:
import pandas as pd
import warnings
import numpy as np

warnings.filterwarnings("ignore")
data=pd.read_csv('space_traffic.csv')

In [None]:
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.compose import ColumnTransformer

# Convert 'Timestamp' to datetime format
data['Timestamp'] = pd.to_datetime(data['Timestamp'])

# Convert 'Peak_Time' to hour (integer)
data['Peak_Time'] = data['Peak_Time'].apply(lambda x: int(x.split(':')[0]))

# Extract features from the timestamp
data['Hour'] = data['Timestamp'].dt.hour
data['Day_of_Week'] = data['Timestamp'].dt.dayofweek
data['Day_of_Month'] = data['Timestamp'].dt.day

# Encode categorical features using LabelEncoder
label_encoder_location = LabelEncoder()
label_encoder_object = LabelEncoder()

data['Location_Encoded'] = label_encoder_location.fit_transform(data['Location'])
data['Object_Type_Encoded'] = label_encoder_object.fit_transform(data['Object_Type'])

# Define features (X) and target (y)
X = data[['Location_Encoded', 'Object_Type_Encoded', 'Hour', 'Day_of_Week', 'Day_of_Month']]
y = data['Traffic_Density']

# Standardize the target variable
scaler = StandardScaler()
y = scaler.fit_transform(y.values.reshape(-1, 1))

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import joblib

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Linear Regression model
model = LinearRegression()

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Rescale the predictions and actual values for evaluation
y_test_rescaled = scaler.inverse_transform(y_test)
y_pred_rescaled = scaler.inverse_transform(y_pred)

# Evaluate the model
mse = mean_squared_error(y_test_rescaled, y_pred_rescaled)
r2 = r2_score(y_test_rescaled, y_pred_rescaled)

# Output results
print(f"Mean Squared Error (MSE): {mse}")
print(f"R² Score: {r2}")

# Display some predictions alongside actual values
print("\nSample Predictions:")
for actual, predicted in zip(y_test_rescaled[:10], y_pred_rescaled[:10]):  # Displaying the first 10 values
    print(f"Actual: {actual[0]:.2f}, Predicted: {predicted[0]:.2f}")

# Save the model
joblib.dump(model, 'linear_regression_model.pkl')
print("Model saved successfully!")

Mean Squared Error (MSE): 805.5773943591065
R² Score: -0.006034119149835648

Sample Predictions:
Actual: 25.00, Predicted: 48.07
Actual: 97.00, Predicted: 49.46
Actual: 41.00, Predicted: 50.43
Actual: 38.00, Predicted: 46.15
Actual: 43.00, Predicted: 52.95
Actual: 79.00, Predicted: 47.21
Actual: 73.00, Predicted: 49.25
Actual: 67.00, Predicted: 47.75
Actual: 83.00, Predicted: 48.41
Actual: 95.00, Predicted: 51.79
Model saved successfully!


In [None]:
from sklearn.model_selection import cross_val_score

# Cross-validation with 5 folds, using R² score as the metric
cv_scores_r2 = cross_val_score(model, X, y, cv=5, scoring='r2')

# Mean and standard deviation of cross-validation R² scores
print(f"Cross-validated R² score: {np.mean(cv_scores_r2)}")
print(f"Standard deviation of R² scores: {np.std(cv_scores_r2)}")

Cross-validated R² score: -0.008935263569921447
Standard deviation of R² scores: 0.00532714924995567


In [None]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, r2_score
import joblib
from sklearn.preprocessing import StandardScaler

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the parameter grid for Ridge Regression
ridge_param_grid = {'alpha': [0.01, 0.1, 1, 10, 100]}

# Perform Grid Search with Cross-Validation
ridge_grid_search = GridSearchCV(Ridge(), ridge_param_grid, cv=5, scoring='r2')
ridge_grid_search.fit(X_train, y_train)

# Get the best Ridge model
best_ridge_model = ridge_grid_search.best_estimator_

# Make predictions
ridge_pred = best_ridge_model.predict(X_test)

y_test_rescaled = scaler.inverse_transform(y_test)
ridge_pred_rescaled = scaler.inverse_transform(ridge_pred)

# Evaluate the model
ridge_mse = mean_squared_error(y_test_rescaled, ridge_pred_rescaled)
ridge_r2 = r2_score(y_test_rescaled, ridge_pred_rescaled)

# Output results
print(f"Ridge Regression - Mean Squared Error (MSE): {ridge_mse}")
print(f"Ridge Regression - R² Score: {ridge_r2}")
print(f"Best alpha for Ridge: {ridge_grid_search.best_params_['alpha']}")
print(f"Best R² score during GridSearchCV: {ridge_grid_search.best_score_}")

# Save the best Ridge model
joblib.dump(best_ridge_model, 'best_ridge_model.pkl')
print("Best Ridge model saved successfully!")

Ridge Regression - Mean Squared Error (MSE): 805.4744559605903
Ridge Regression - R² Score: -0.005905565963260084
Best alpha for Ridge: 100
Best R² score during GridSearchCV: -0.0177382989718865
Best Ridge model saved successfully!


In [None]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Lasso
from sklearn.metrics import mean_squared_error, r2_score
import joblib
from sklearn.preprocessing import StandardScaler

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the parameter grid for Lasso Regression
lasso_param_grid = {'alpha': [0.01, 0.1, 1, 10, 100]}

# Perform Grid Search with Cross-Validation
lasso_grid_search = GridSearchCV(Lasso(max_iter=10000), lasso_param_grid, cv=5, scoring='r2')
lasso_grid_search.fit(X_train, y_train)

# Get the best Lasso model
best_lasso_model = lasso_grid_search.best_estimator_

# Make predictions
lasso_pred = best_lasso_model.predict(X_test)

# Rescale the predictions and actual test values to original scale
y_test_rescaled = scaler.inverse_transform(y_test.reshape(-1, 1))
lasso_pred_rescaled = scaler.inverse_transform(lasso_pred.reshape(-1, 1))

# Evaluate the model
lasso_mse = mean_squared_error(y_test_rescaled, lasso_pred_rescaled)
lasso_r2 = r2_score(y_test_rescaled, lasso_pred_rescaled)

# Output results
print(f"Lasso Regression - Mean Squared Error (MSE): {lasso_mse}")
print(f"Lasso Regression - R² Score: {lasso_r2}")
print(f"Best alpha for Lasso: {lasso_grid_search.best_params_['alpha']}")
print(f"Best R² score during GridSearchCV for Lasso: {lasso_grid_search.best_score_}")

# Save the best Lasso model
joblib.dump(best_lasso_model, 'best_lasso_model.pkl')
print("Best Lasso model saved successfully!")

Lasso Regression - Mean Squared Error (MSE): 801.0278265625
Lasso Regression - R² Score: -0.00035245471533040806
Best alpha for Lasso: 1
Best R² score during GridSearchCV for Lasso: -0.00974720749070399
Best Lasso model saved successfully!


In [None]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, r2_score
import joblib

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the parameter grid for KNN Regressor
knn_param_grid = {
    'n_neighbors': [3, 5, 7, 9, 11],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan', 'minkowski']
}

# Perform Grid Search with Cross-Validation
knn_grid_search = GridSearchCV(KNeighborsRegressor(), knn_param_grid, cv=5, scoring='r2', verbose=1)
knn_grid_search.fit(X_train, y_train)

# Get the best KNN model
best_knn_model = knn_grid_search.best_estimator_

# Make predictions
knn_pred = best_knn_model.predict(X_test)

# Rescale the predictions and actual test values to original scale
y_test_rescaled = scaler.inverse_transform(y_test.reshape(-1, 1))
knn_pred_rescaled = scaler.inverse_transform(knn_pred.reshape(-1, 1))

# Evaluate the model
knn_mse = mean_squared_error(y_test_rescaled, knn_pred_rescaled)
knn_r2 = r2_score(y_test_rescaled, knn_pred_rescaled)

# Output results
print(f"KNN Regressor - Mean Squared Error (MSE): {knn_mse}")
print(f"KNN Regressor - R² Score: {knn_r2}")
print(f"Best parameters for KNN: {knn_grid_search.best_params_}")
print(f"Best R² score during GridSearchCV for KNN: {knn_grid_search.best_score_}")

# Save the best KNN model
joblib.dump(best_knn_model, 'best_knn_model.pkl')
print("Best KNN model saved successfully!")

Fitting 5 folds for each of 30 candidates, totalling 150 fits
KNN Regressor - Mean Squared Error (MSE): 890.2504779377429
KNN Regressor - R² Score: -0.11177692133149764
Best parameters for KNN: {'metric': 'euclidean', 'n_neighbors': 11, 'weights': 'distance'}
Best R² score during GridSearchCV for KNN: -0.08007791935209156
Best KNN model saved successfully!


In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
import joblib

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the parameter grid for Random Forest Regressor
rf_param_grid = {
    'n_estimators': [100, 200, 300],  # Number of trees in the forest
    'max_depth': [None, 10, 20, 30],  # Maximum depth of the trees
    'min_samples_split': [2, 5, 10],  # Minimum samples required to split a node
    'min_samples_leaf': [1, 2, 4],  # Minimum samples required at each leaf node
    'bootstrap': [True, False]  # Whether bootstrap samples are used when building trees
}

# Initialize Random Forest and GridSearchCV
rf_grid_search = GridSearchCV(RandomForestRegressor(random_state=42),
                              rf_param_grid, cv=5, scoring='r2', verbose=1, n_jobs=-1)

rf_grid_search.fit(X_train, y_train.ravel())

# Get the best Random Forest model
best_rf_model = rf_grid_search.best_estimator_

# Make predictions
y_pred_rf = best_rf_model.predict(X_test)

# Rescale the predictions and actual test values to the original scale
y_test_rescaled = scaler.inverse_transform(y_test.reshape(-1, 1))
y_pred_rf_rescaled = scaler.inverse_transform(y_pred_rf.reshape(-1, 1))

# Evaluate the model
rf_mse = mean_squared_error(y_test_rescaled, y_pred_rf_rescaled)
rf_r2 = r2_score(y_test_rescaled, y_pred_rf_rescaled)

# Output results
print(f"Random Forest - Mean Squared Error (MSE): {rf_mse}")
print(f"Random Forest - R² Score: {rf_r2}")
print(f"Best parameters for Random Forest: {rf_grid_search.best_params_}")
print(f"Best R² score during GridSearchCV: {rf_grid_search.best_score_}")

# Save the best Random Forest model
joblib.dump(best_rf_model, 'best_random_forest_model.pkl')
print("Best Random Forest model saved successfully!")

Fitting 5 folds for each of 216 candidates, totalling 1080 fits
Random Forest - Mean Squared Error (MSE): 885.138737009281
Random Forest - R² Score: -0.10539319480404385
Best parameters for Random Forest: {'bootstrap': True, 'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 10, 'n_estimators': 300}
Best R² score during GridSearchCV: -0.05173288862346435
Best Random Forest model saved successfully!


In [None]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import joblib

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the parameter grid for Decision Tree
dt_param_grid = {
    'max_depth': [None, 5, 10, 20],  # Maximum depth of the tree
    'min_samples_split': [2, 5, 10],  # Minimum samples required to split an internal node
    'min_samples_leaf': [1, 2, 4],  # Minimum samples required to be at a leaf node
    'max_features': [None, 'sqrt', 'log2']  # Number of features to consider for best split
}

# Initialize Decision Tree and GridSearchCV
dt_grid_search = GridSearchCV(DecisionTreeRegressor(random_state=42),
                              dt_param_grid, cv=5, scoring='r2', verbose=1, n_jobs=-1)

# Fit GridSearchCV on the training data
dt_grid_search.fit(X_train, y_train.ravel())

# Extract the best Decision Tree model
best_dt_model = dt_grid_search.best_estimator_

# Make predictions
y_pred_dt = best_dt_model.predict(X_test)

# Rescale the predictions and actual test values to the original scale
y_test_rescaled = scaler.inverse_transform(y_test.reshape(-1, 1))
y_pred_dt_rescaled = scaler.inverse_transform(y_pred_dt.reshape(-1, 1))

# Evaluate the model
dt_mse = mean_squared_error(y_test_rescaled, y_pred_dt_rescaled)
dt_r2 = r2_score(y_test_rescaled, y_pred_dt_rescaled)

# Output results
print(f"Decision Tree - Mean Squared Error (MSE): {dt_mse}")
print(f"Decision Tree - R² Score: {dt_r2}")
print(f"Best parameters for Decision Tree: {dt_grid_search.best_params_}")
print(f"Best R² score during GridSearchCV: {dt_grid_search.best_score_}")

# Save the best Decision Tree model
joblib.dump(best_dt_model, 'best_decision_tree_model.pkl')
print("Best Decision Tree model saved successfully!")


Fitting 5 folds for each of 108 candidates, totalling 540 fits
Decision Tree - Mean Squared Error (MSE): 869.1550832283101
Decision Tree - R² Score: -0.08543223119591303
Best parameters for Decision Tree: {'max_depth': 5, 'max_features': 'sqrt', 'min_samples_leaf': 4, 'min_samples_split': 2}
Best R² score during GridSearchCV: -0.0905079463781348
Best Decision Tree model saved successfully!


In [None]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import joblib

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the parameter grid for Gradient Boosting
gb_param_grid = {
    'n_estimators': [100, 200, 300],  # Number of boosting stages to be run
    'learning_rate': [0.01, 0.1, 0.2],  # Step size shrinkage to prevent overfitting
    'max_depth': [3, 5, 7],  # Maximum depth of individual regression estimators
    'min_samples_split': [2, 5, 10],  # Minimum samples required to split a node
    'min_samples_leaf': [1, 2, 4]  # Minimum samples required at a leaf node
}

# Initialize Gradient Boosting and GridSearchCV
gb_grid_search = GridSearchCV(GradientBoostingRegressor(random_state=42),
                              gb_param_grid, cv=5, scoring='r2', verbose=1, n_jobs=-1)

# Fit GridSearchCV on the training data
gb_grid_search.fit(X_train, y_train.ravel())

# Extract the best Gradient Boosting model
best_gb_model = gb_grid_search.best_estimator_

# Make predictions
y_pred_gb = best_gb_model.predict(X_test)

# Rescale the predictions and actual test values to the original scale
y_test_rescaled = scaler.inverse_transform(y_test.reshape(-1, 1))
y_pred_gb_rescaled = scaler.inverse_transform(y_pred_gb.reshape(-1, 1))

# Evaluate the model
gb_mse = mean_squared_error(y_test_rescaled, y_pred_gb_rescaled)
gb_r2 = r2_score(y_test_rescaled, y_pred_gb_rescaled)

# Output results
print(f"Gradient Boosting - Mean Squared Error (MSE): {gb_mse}")
print(f"Gradient Boosting - R² Score: {gb_r2}")
print(f"Best parameters for Gradient Boosting: {gb_grid_search.best_params_}")
print(f"Best R² score during GridSearchCV: {gb_grid_search.best_score_}")

# Save the best Gradient Boosting model
joblib.dump(best_gb_model, 'best_gradient_boosting_model.pkl')
print("Best Gradient Boosting model saved successfully!")

Fitting 5 folds for each of 243 candidates, totalling 1215 fits
Gradient Boosting - Mean Squared Error (MSE): 827.20340028239
Gradient Boosting - R² Score: -0.03304145571626016
Best parameters for Gradient Boosting: {'learning_rate': 0.01, 'max_depth': 3, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
Best R² score during GridSearchCV: -0.0033742339245776297
Best Gradient Boosting model saved successfully!
