In [None]:
# get rid of unnecessary warnings
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import cross_val_score

train_data = pd.read_csv('geckoq_data/train.csv')
test_data = pd.read_csv('geckoq_data/test.csv')

X_train = train_data.drop(columns=['ID','log_pSat_Pa'])
X_train = pd.get_dummies(X_train)
y_train = train_data['log_pSat_Pa']
X_test = test_data.drop(columns=['ID'])
X_test = pd.get_dummies(X_test)

for col in X_train.columns:
    if col not in X_test.columns:
        X_test[col] = 0
X_test = X_test[X_train.columns]

models = []

models.append(("LinearRegression", LinearRegression()))
models.append(("SVR", SVR()))
models.append(("KNeighborsRegressor", KNeighborsRegressor()))
models.append(("DecisionTreeRegressor", DecisionTreeRegressor()))
models.append(("RandomForestRegressor", RandomForestRegressor()))
rf2 = RandomForestRegressor(n_estimators=100, criterion='squared_error',
                            max_depth=10, random_state=0, max_features=None)
models.append(("RandomForestRegressor2", rf2))
models.append(("MLPRegressor", MLPRegressor(solver='lbfgs', random_state=0)))

results = []
names = []
for name, model in models:
    result = cross_val_score(model, X_train, y_train, cv=3)
    names.append(name)
    results.append(result)

for i in range(len(names)):
    print(names[i], results[i].mean())

# Fit the models and make predictions on X_test
predictions = {}
for name, model in models:
    model.fit(X_train, y_train)
    predictions[name] = model.predict(X_test)

# Print predictions for each model
for name in predictions:
    print(f"Predictions for {name}: {predictions[name]}")