In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import MinMaxScaler


df = pd.read_csv('data/go_track_tracks.csv')

# Förbered data
# Vi väljer relevanta kolumner. 'car_or_bus' är vår target (det vi vill klassificera).
# Vi tar bort 'id', 'id_android' och 'linha' (för många saknade värden).
features = ['speed', 'time', 'distance', 'rating', 'rating_bus', 'rating_weather']
X = df[features]
y = df['car_or_bus']

# Funktion för att köra experiment
def run_svm_experiment(experiment_name, X_data, y_data, test_size, kernel, scale_data=False):
    print(f"\n{'='*10} {experiment_name} {'='*10}")
    
    # Split data random_state=42
    X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=test_size,)
    
    # Normalisering (skalning)
    if scale_data:
        scaler = MinMaxScaler() # Skalar till intervallet [0, 1]
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
        print("Data: Normaliserat (MinMax)")
    else:
        print("Data: Icke-normaliserat (Original)")
        
    print(f"Split: Train={(1-test_size)*100:.0f}%, Test={test_size*100:.0f}%")
    print(f"Kernel: {kernel}")

    # Träna modell
    svc = SVC(kernel=kernel)
    svc.fit(X_train, y_train)
    
    # Prediktion och utvärdering
    y_pred = svc.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    
    print(f"Accuracy: {acc:.4f}")
    print("Confusion Matrix:")
    print(cm)
    return acc, cm

# --- EXPERIMENT ---

# a.i: Icke-normaliserat, 75% train / 25% test, RBF (default)
run_svm_experiment("Exp 1: Oskalat Data", X, y, 0.25, 'rbf', scale_data=False)

# a.ii: Normaliserat, 75% train / 25% test, RBF
run_svm_experiment("Exp 2: Normaliserat Data", X, y, 0.25, 'rbf', scale_data=True)

# b.ii: Normaliserat, 50% train / 50% test, RBF (Annan split)
run_svm_experiment("Exp 3: Annan Split (50/50)", X, y, 0.50, 'rbf', scale_data=True)

# c: Normaliserat, 75% train / 25% test, Linear (Annan kernel)
run_svm_experiment("Exp 4: Linjär Kernel", X, y, 0.25, 'linear', scale_data=True)


Data: Icke-normaliserat (Original)
Split: Train=75%, Test=25%
Kernel: rbf
Accuracy: 0.6829
Confusion Matrix:
[[17  5]
 [ 8 11]]

Data: Normaliserat (MinMax)
Split: Train=75%, Test=25%
Kernel: rbf
Accuracy: 0.8293
Confusion Matrix:
[[23  1]
 [ 6 11]]

Data: Normaliserat (MinMax)
Split: Train=50%, Test=50%
Kernel: rbf
Accuracy: 0.8537
Confusion Matrix:
[[50  0]
 [12 20]]

Data: Normaliserat (MinMax)
Split: Train=75%, Test=25%
Kernel: linear
Accuracy: 0.8537
Confusion Matrix:
[[22  5]
 [ 1 13]]


(0.8536585365853658,
 array([[22,  5],
        [ 1, 13]]))