In [None]:
import pandas as pd
import numpy as np
df = pd.read_csv("C:/Users/kokamoto/Desktop/data.csv")

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use("ggplot")

In [None]:
# Correlational Analysis
corr = np.corrcoef(df.values.T)
hm = sns.heatmap(corr, annot = True, fmt = ".2f", annot_kws = {"size":8}, yticklabels = list(df.columns), xticklabels = list(df.columns))
plt.tight_layout()
plt.show()

In [None]:
scaler = StandardScaler()
scaler.fit(np.array(df))
df_std = scaler.transform(np.array(df))
df_std = pd.DataFrame(df_std, columns = df.columns)

In [None]:
x = df_std[["FIM-M","SI"]]
y = df_std[["FOIS"]]
x1 = df_std[["FIM-M"]]
x2 = df_std[["SI"]]
X_train, X_test, Y_train, Y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)

In [None]:
# Multiple Linear Regression
from sklearn.linear_model import LinearRegression                              
model_multi = LinearRegression()
model_multi.fit(X_train, Y_train)
Y_test_pred_multi = model_multi.predict(X_test)
r2_multi = r2_score(Y_test, Y_test_pred_multi)
print(r2_multi)

In [None]:
# Decition Tree
from sklearn.tree import DecisionTreeRegressor
model_clf = DecisionTreeRegressor(random_state = 0)
param_grid_clf = {
    "max_depth":[1,3,10],
    "min_samples_split":[2,4,12],
    "min_samples_leaf":[1,2,4]}
tuned_model_clf = GridSearchCV(estimator = model_clf,
                           param_grid = param_grid_clf,
                           cv = 5,
                           n_jobs = -1,
                           verbose=True,
                           return_train_score = False)
tuned_model_clf.fit(X_train,Y_train)
print(tuned_model_clf.best_estimator_)

Y_test_pred_clf = tuned_model_clf.predict(X_test)
r2_clf = r2_score(Y_test, Y_test_pred_clf)
print(r2_clf)

In [None]:
# Random Forest
from sklearn.ensemble import RandomForestRegressor
model_gsr = RandomForestRegressor(random_state=0)
param_grid_gsr = {
    "n_estimators":[5, 10, 20, 30, 50, 100, 300],
    "max_features":[i for i in range(1,X_train.shape[1])],
    "n_jobs":[1],
    "min_samples_split":[3, 5, 10, 15, 20, 25, 30, 40, 50, 100],
    "max_depth":[3, 5, 10, 15, 20, 25, 30, 40, 50, 100]}
tuned_model_gsr = GridSearchCV(estimator = model_gsr,
                               param_grid = param_grid_gsr,
                               cv = 5,
                               n_jobs = -1,
                               verbose=True,
                               return_train_score = False)
tuned_model_gsr.fit(X_train,Y_train)
print(tuned_model_gsr.best_estimator_)

Y_test_pred_gsr = tuned_model_gsr.predict(X_test)
r2_gsr = r2_score(Y_test, Y_test_pred_gsr)
print(r2_gsr)

In [None]:
# Support Vector Regression
from sklearn import svm
model_svr = svm.SVR()
param_grid_svr = {
    "kernel":["rbf"],
    "C":[1, 3, 5, 10, 100], 
    "epsilon" : [0.0, 0.1, 0.2],
    "gamma":[1, 0.1]}
 
tuned_model_svr = GridSearchCV(estimator = model_svr,
                               param_grid = param_grid_svr,
                               cv = 5,
                               n_jobs = -1,
                               verbose=True,
                               return_train_score = False)
tuned_model_svr.fit(X_train, Y_train)
print(tuned_model_svr.best_estimator_)

Y_test_pred_svr = tuned_model_svr.predict(X_test)
r2_svr = r2_score(Y_test, Y_test_pred_svr)
print(r2_svr)