In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

from sklearn.svm import SVR
from sklearn.metrics import r2_score

In [2]:
df = pd.read_csv('data/data-ready.csv')

targets = ["z1", "z2", "z3"]

df.drop("DateTime", inplace=True, axis=1)

# split train and test
X = df.drop(columns=targets, axis=1)
y1 = df['z1']
X1_train, X1_test, y1_train, y1_test = train_test_split(X, y1, test_size=0.2, random_state=42)

y2 = df['z2']
X2_train, X2_test, y2_train, y2_test = train_test_split(X, y2, test_size=0.2, random_state=42)

y3 = df['z3']
X3_train, X3_test, y3_train, y3_test = train_test_split(X, y3, test_size=0.2, random_state=42)

In [3]:
X_train = [X1_train, X2_train, X3_train]
X_test = [X1_test, X2_test, X3_test]
y_train = [y1_train, y2_train, y3_train]
y_test = [y1_test, y2_test, y3_test]

### Testing Base SVM Model

In [8]:
models = []
predictions = []
for i in range(3):
    model = SVR()
    model.fit(X_train[i], y_train[i])

    prediction = model.predict(X_test[i])
    score = r2_score(y_test[i], prediction)

    print(f"------ zone {i}: r2 score: {score}")

    models.append(model)
    predictions.append(prediction)

------ zone 0: r2 score: 0.01392089827143772
------ zone 1: r2 score: 0.006481702611940587
------ zone 2: r2 score: -0.05310327894715505


In [4]:
df["total"] = df["z1"] + df["z2"] + df["z3"]

targetT = df["total"]
XT = df.drop(columns=["z1", "z2", "z3", "total"], axis=1)


XT_train, XT_test, yT_train, yT_test = train_test_split(XT, targetT, test_size=0.2, random_state=42)

model = SVR()
model.fit(XT_train, yT_train)

prediction = model.predict(XT_test)
score = r2_score(yT_test, prediction)

print(f"------ total: r2 score: {score}")

------ total: r2 score: -0.006123817807192733


In [10]:
from sklearn.model_selection import GridSearchCV

In [11]:
search = [{
    'kernel': ('linear', 'poly', 'rbf', 
               'sigmoid'),
    'degree': [2, 3, 4],
    'C': [1, 10],
}]
""" model = SVR()
clf = GridSearchCV(model, param_grid=search)
clf.fit(XT_train, yT_train)
clf.best_params_ """


' model = SVR()\nclf = GridSearchCV(model, param_grid=search)\nclf.fit(XT_train, yT_train)\nclf.best_params_ '

In [7]:
model = SVR(kernel='linear', C=1)
model.fit(XT_train, yT_train)

prediction = model.predict(XT_test)
score = r2_score(yT_test, prediction)

print(f"------ total: r2 score: {score}")

------ total: r2 score: 0.6756429969649131


In [5]:
model = SVR(kernel='poly', C=1, degree=3)
model.fit(XT_train, yT_train)

prediction = model.predict(XT_test)
score = r2_score(yT_test, prediction)

print(f"------ total: r2 score: {score}")

------ total: r2 score: -0.0038710296791468757


In [6]:
model = SVR(kernel='poly', C=10, degree=2)
model.fit(XT_train, yT_train)

prediction = model.predict(XT_test)
score = r2_score(yT_test, prediction)

print(f"------ total: r2 score: {score}")

------ total: r2 score: -0.004621565929048943


In [8]:
model = SVR(kernel='sigmoid', C=10, degree=2)
model.fit(XT_train, yT_train)

prediction = model.predict(XT_test)
score = r2_score(yT_test, prediction)

print(f"------ total: r2 score: {score}")

------ total: r2 score: -0.004696756452044815


In [9]:
model = SVR(kernel='linear', C=20)
model.fit(XT_train, yT_train)

prediction = model.predict(XT_test)
score = r2_score(yT_test, prediction)

print(f"------ total: r2 score: {score}")

------ total: r2 score: 0.7050011615918124


In [10]:
model = SVR(kernel='linear', C=5)
model.fit(XT_train, yT_train)

prediction = model.predict(XT_test)
score = r2_score(yT_test, prediction)

print(f"------ total: r2 score: {score}")

------ total: r2 score: 0.699936813151984


### Trying PCA

In [11]:
from sklearn.decomposition import PCA

In [12]:
XT_pca = PCA(n_components=0.98).fit_transform(XT)


XT_train, XT_test, yT_train, yT_test = train_test_split(XT_pca, targetT, test_size=0.2, random_state=42)

model = SVR(kernel='linear', C=5)
model.fit(XT_train, yT_train)

prediction = model.predict(XT_test)
score = r2_score(yT_test, prediction)

print(f"------ total: r2 score: {score}")

------ total: r2 score: -0.02877189851316575
