In [6]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import joblib

# Load the data from CSV file
data = pd.read_csv('Dataset.csv')

# Preprocess the data
X = data[['DA']]
y1 = data['Backlinks']
y2 = data['LinkingDomains']

# Split the data into training and testing sets
X_train, X_test, y1_train, y1_test, y2_train, y2_test = train_test_split(X, y1, y2, test_size=0.2, random_state=42)

# Train a Random Forest model for Backlinks
rf1 = RandomForestRegressor(n_estimators=100, random_state=42)
rf1.fit(X_train, y1_train)

# Train a Random Forest model for LinkingDomains
rf2 = RandomForestRegressor(n_estimators=100, random_state=42)
rf2.fit(X_train, y2_train)

# Evaluate the models on the testing set
y1_pred = rf1.predict(X_test)
y2_pred = rf2.predict(X_test)
mse1 = mean_squared_error(y1_test, y1_pred)
mse2 = mean_squared_error(y2_test, y2_pred)
print(f'MSE for Backlinks: {mse1:.2f}')
print(f'MSE for LinkingDomains: {mse2:.2f}')

# Save the models to files
joblib.dump(rf1, 'da-to-backlinks.joblib')
joblib.dump(rf2, 'da-to-linking-domains.joblib')

# Load the models from files
rf1 = joblib.load('da-to-backlinks.joblib')
rf2 = joblib.load('da-to-linking-domains.joblib')

# Make a prediction for a new data point
new_data = pd.DataFrame({'DA': [2]})
backlinks_pred = rf1.predict(new_data)[0]
linkingdomains_pred = rf2.predict(new_data)[0]
print(f'Predicted Backlinks: {backlinks_pred:.0f}')
print(f'Predicted LinkingDomains: {linkingdomains_pred:.0f}')


MSE for Backlinks: 173331255962014.66
MSE for LinkingDomains: 1115404047.18
Predicted Backlinks: 8
Predicted LinkingDomains: 3
