## Regression example using the supercondactivty dataset

The dataset can also be found here:

https://archive.ics.uci.edu/dataset/464/superconductivty+data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [None]:
import pandas as pd
df = pd.read_csv('train.csv')

In [None]:
df.head()

In [None]:
Y = df['critical_temp']
X = df[[col for col in df.columns if col!="critical_temp"]]

In [None]:
X

In [None]:
Y

In [None]:
from sklearn.model_selection import train_test_split
#test_size is the percentage of the test size to the complete dataset
# random_state is the seed. A specific pseudorandom number to split the data set
# in order to produce same splitting every time we run the script.
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.25, random_state = 42)

In [None]:
from sklearn.neighbors import KNeighborsRegressor
KNReg = KNeighborsRegressor(n_neighbors = 5, metric = 'minkowski', p = 2)

from sklearn.tree import DecisionTreeRegressor
DTReg = DecisionTreeRegressor()

from sklearn.ensemble import RandomForestRegressor
RFReg = RandomForestRegressor(n_estimators=20)

In [None]:
KNReg.fit(X_train, Y_train)

KNReg_pred = KNReg.predict(X_test)

In [None]:
KNReg_pred

## Plotting true versus predicted values of the critical temperature

In [None]:
plt.scatter(Y_test, KNReg_pred, s=5)
plt.xlabel("True Critical Temperature (K)", fontsize=16)
plt.ylabel("Predicted Critical Temperature (K)", fontsize=16)
#plt.savefig("critical_temperature.pdf")

### Root Mean Square Error 

Represents the square root of the average squared differences between predicted and observed outcomes

It is a measurement of prediction error. RMSE can provide significant insights into the performance and reliability of predictive models.

In [None]:
regressors = [KNReg, DTReg, RFReg]

for reg in regressors:

    reg.fit(X_train, Y_train)
    
    Y_pred = reg.predict(X_test)
    
    rms = mean_squared_error(Y_test, Y_pred, squared=False)
    
    print(f"root mean square error {rms:.2f}\n")