In [None]:
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from scipy import optimize
import math

# Problem 1
# 1. Load the data
smhi_data = pd.read_csv("data/smhi.csv", delimiter=";", decimal=",", encoding="latin1")
wind_direction = smhi_data['Vindriktning'].to_numpy()
wind_speed = smhi_data['Vindhastighet'].to_numpy()

# 2. Convert wind-direction to unit circle coordinates
wind_direction_rad = np.deg2rad(wind_direction)
wind_direction_x = np.cos(wind_direction_rad)
wind_direction_y = np.sin(wind_direction_rad)
wind_velocity_x = wind_speed * wind_direction_x
wind_velocity_y = wind_speed * wind_direction_y

# 3. Calculate the average wind velocity and compare with average wind direction
average_wind_velocity_x = np.mean(wind_velocity_x)
average_wind_velocity_y = np.mean(wind_velocity_y)
average_wind_velocity_angle = (np.arctan2(average_wind_velocity_y, average_wind_velocity_x) * 180 / np.pi) % 360
average_wind_direction = np.mean(wind_direction)
same_angle = np.isclose(average_wind_velocity_angle, average_wind_direction, atol=1)

# 4. Calculate empirical covariance matrix
wind_velocity = np.vstack((wind_velocity_x, wind_velocity_y)).T
cov_matrix = np.cov(wind_velocity, rowvar=False)

# Problem 1 Output
problem1_wind_direction = wind_direction
problem1_wind_speed = wind_speed
problem1_wind_direction_x_coordinate = wind_direction_x
problem1_wind_direction_y_coordinate = wind_direction_y
problem1_wind_velocity_x_coordinate = wind_velocity_x
problem1_wind_velocity_y_coordinate = wind_velocity_y
problem1_average_wind_velocity_x_coordinate = average_wind_velocity_x
problem1_average_wind_velocity_y_coordinate = average_wind_velocity_y
problem1_average_wind_velocity_angle_degrees = average_wind_velocity_angle
problem1_average_wind_direction_angle_degrees = average_wind_direction
problem1_same_angle = same_angle
problem1_wind_velocity_covariance_matrix = cov_matrix

# Problem 2
# 1. Load the data
df_train = pd.read_csv("data/indoor_train.csv")

# 2. Create numpy arrays Xtrain and Ytrain
Xtrain = df_train[["X", "Y", "Z"]].to_numpy(dtype=np.float64)
Ytrain = df_train["Location"].to_numpy(dtype=np.int64)

# 3. Train a Support Vector Classifier
svc_train = SVC(kernel="linear")
svc_train.fit(Xtrain, Ytrain)

# Problem 2 Output
df_train_output = df_train
Xtrain_output = Xtrain
Ytrain_output = Ytrain
svc_train_output = svc_train

# Problem 3
# 1. Load and split data
spam_data = pd.read_csv("data/spam.csv")
problem3_X = spam_data.iloc[:, :-1].to_numpy()
problem3_Y = spam_data.iloc[:, -1].to_numpy()
problem3_X_train, X_temp, problem3_Y_train, Y_temp = train_test_split(problem3_X, problem3_Y, test_size=0.6, random_state=42)
problem3_X_calib, problem3_X_test, problem3_Y_calib, problem3_Y_test = train_test_split(X_temp, Y_temp, test_size=2/3, random_state=42)

# 2. Define the ProportionalSpam class
class ProportionalSpam:
    def __init__(self):
        self.coeffs = None
        self.result = None

    def loss(self, X, Y, coeffs):
        logits = coeffs[0] + np.dot(X, coeffs[1:])
        G = 1 / (1 + np.exp(-logits))
        return -np.mean(Y * np.log(G) + (1 - Y) * np.log(1 - G))

    def fit(self, X, Y):
        opt_loss = lambda coeffs: self.loss(X, Y, coeffs)
        initial_arguments = np.zeros(X.shape[1] + 1)
        self.result = optimize.minimize(opt_loss, initial_arguments, method='cg')
        self.coeffs = self.result.x

    def predict(self, X):
        if self.coeffs is not None:
            logits = self.coeffs[0] + np.dot(X, self.coeffs[1:])
            G = 1 / (1 + np.exp(-logits))
            return np.round(10 * G) / 10

problem3_ps = ProportionalSpam()
problem3_ps.fit(problem3_X_train, problem3_Y_train)

# 3. Calibration
problem3_X_pred = problem3_ps.predict(problem3_X_calib).reshape(-1, 1)
problem3_calibrator = DecisionTreeRegressor()
problem3_calibrator.fit(problem3_X_pred, problem3_Y_calib)

# 4. Final predictions
problem3_final_predictions = problem3_calibrator.predict(problem3_ps.predict(problem3_X_test).reshape(-1, 1))

# Problem 3 Output
problem3_X_output = problem3_X
problem3_Y_output = problem3_Y
problem3_X_train_output = problem3_X_train
problem3_X_calib_output = problem3_X_calib
problem3_X_test_output = problem3_X_test
problem3_Y_train_output = problem3_Y_train
problem3_Y_calib_output = problem3_Y_calib
problem3_Y_test_output = problem3_Y_test
problem3_ps_output = problem3_ps
problem3_X_pred_output = problem3_X_pred
problem3_calibrator_output = problem3_calibrator
problem3_final_predictions_output = problem3_final_predictions
