In [183]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from scipy.stats import spearmanr
from sklearn.svm import SVR

In [184]:
train_dataset = pd.read_csv("./datasets/train.csv")
validation_dataset = pd.read_csv("./datasets/val.csv")

In [185]:
train_dataset.head()

Unnamed: 0,id,text,score
0,196112,A sandwich and french bread sit on a cutting b...,-0.666667
1,514558,A baseball player is standing on the field whi...,0.0
2,54244,A casserole served at a restaurant in a brown ...,1.333333
3,9420,OLD BLACK AND WHITE PHOTO OF AN ALL BOYS SCHOOL,1.666667
4,531512,A woman is milking a cow in the animal stable.,0.25


In [186]:
vectorizer = TfidfVectorizer()

X = vectorizer.fit_transform(train_dataset["text"].values)
X_validation = vectorizer.transform(validation_dataset["text"].values)

y = train_dataset["score"].values
y_validation = validation_dataset["score"].values

In [187]:
model = SVR(kernel="poly")
model.fit(X, y)

In [188]:
y_pred = model.predict(X_validation)

In [189]:
mse = mean_squared_error(y_validation, y_pred)
spearman_corr = spearmanr(y_validation, y_pred)
print(f"MSE: {mse:.2f}")
print(f"Spearman's correlation: {spearman_corr.statistic:.2f}")

MSE: 0.46
Spearman's correlation: 0.60


In [190]:
test_dataset = pd.read_csv("./datasets/test.csv")

In [191]:
test_dataset["score"] = model.predict(vectorizer.transform(test_dataset["text"].values))

In [192]:
test_dataset.drop(["text"], axis=1, inplace=True)

In [193]:
test_dataset.to_csv("./datasets/submission.csv", index=False)