In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load the dataset
url = "https://raw.githubusercontent.com/selva86/datasets/master/BostonHousing.csv"
data = pd.read_csv(url)

# Define features and target
X = data.drop("medv", axis=1)
y = data["medv"]

# Step 1: Binning the target variable into classes (e.g., Low, Medium, High price)
y_binned = pd.qcut(y, q=3, labels=["Low", "Medium", "High"])

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y_binned, test_size=0.3, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize models with different kernels
kernels = ['linear', 'rbf', 'poly']
results = {}

for kernel in kernels:
    # Step 2: Train SVC with the given kernel
    model = SVC(kernel=kernel)
    model.fit(X_train, y_train)
    
    # Step 3: Make predictions
    y_pred = model.predict(X_test)
    
    # Step 4: Calculate accuracy, confusion matrix, and classification report
    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    class_report = classification_report(y_test, y_pred)
    
    # Store results for each kernel
    results[kernel] = {
        'accuracy': accuracy,
        'confusion_matrix': conf_matrix,
        'classification_report': class_report
    }

# Print results for each kernel
for kernel, metrics in results.items():
    print(f"\nKernel: {kernel}")
    print(f"Accuracy: {metrics['accuracy']}")
    print("Confusion Matrix:\n", metrics['confusion_matrix'])
    print("Classification Report:\n", metrics['classification_report'])


Kernel: linear
Accuracy: 0.7763157894736842
Confusion Matrix:
 [[32  1  9]
 [ 2 51  6]
 [ 7  9 35]]
Classification Report:
               precision    recall  f1-score   support

        High       0.78      0.76      0.77        42
         Low       0.84      0.86      0.85        59
      Medium       0.70      0.69      0.69        51

    accuracy                           0.78       152
   macro avg       0.77      0.77      0.77       152
weighted avg       0.78      0.78      0.78       152


Kernel: rbf
Accuracy: 0.7828947368421053
Confusion Matrix:
 [[32  1  9]
 [ 0 48 11]
 [ 7  5 39]]
Classification Report:
               precision    recall  f1-score   support

        High       0.82      0.76      0.79        42
         Low       0.89      0.81      0.85        59
      Medium       0.66      0.76      0.71        51

    accuracy                           0.78       152
   macro avg       0.79      0.78      0.78       152
weighted avg       0.79      0.78      0.79   

In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler

# Load the earthquake dataset
file_path = ('file:///Users/hudaabdul/Library/CloudStorage/OneDrive-Personal/MSc in Data Science/Machine Learning/Datasets/earthquake_data.csv') 
data = pd.read_csv(file_path)

# Define features and target
X = data[['cdi', 'mmi', 'sig', 'nst', 'dmin', 'gap', 'depth', 'latitude', 'longitude']]
y = data['tsunami']  # Using 'tsunami' as the target variable

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize models with different kernels
kernels = ['linear', 'rbf', 'poly']
results = {}

# Train SVC models with each kernel and store results
for kernel in kernels:
    model = SVC(kernel=kernel)
    model.fit(X_train, y_train)
    
    # Make predictions
    y_pred = model.predict(X_test)
    
    # Calculate accuracy, confusion matrix, and classification report
    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    class_report = classification_report(y_test, y_pred)
    
    # Store results for each kernel
    results[kernel] = {
        'accuracy': accuracy,
        'confusion_matrix': conf_matrix,
        'classification_report': class_report
    }

# Print results for each kernel
for kernel, metrics in results.items():
    print(f"\nKernel: {kernel}")
    print(f"Accuracy: {metrics['accuracy']}")
    print("Confusion Matrix:\n", metrics['confusion_matrix'])
    print("Classification Report:\n", metrics['classification_report'])


Kernel: linear
Accuracy: 0.8212765957446808
Confusion Matrix:
 [[103  35]
 [  7  90]]
Classification Report:
               precision    recall  f1-score   support

           0       0.94      0.75      0.83       138
           1       0.72      0.93      0.81        97

    accuracy                           0.82       235
   macro avg       0.83      0.84      0.82       235
weighted avg       0.85      0.82      0.82       235


Kernel: rbf
Accuracy: 0.8170212765957446
Confusion Matrix:
 [[112  26]
 [ 17  80]]
Classification Report:
               precision    recall  f1-score   support

           0       0.87      0.81      0.84       138
           1       0.75      0.82      0.79        97

    accuracy                           0.82       235
   macro avg       0.81      0.82      0.81       235
weighted avg       0.82      0.82      0.82       235


Kernel: poly
Accuracy: 0.7829787234042553
Confusion Matrix:
 [[118  20]
 [ 31  66]]
Classification Report:
               prec