In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import train_test_split

columns = ["OSM_ID", "LONGITUDE", "LATITUDE", "ALTITUDE"]
dataset = pd.read_csv("3d+road+network+north+jutland+denmark/3D_spatial_network.txt", header=None, names=columns, chunksize=20000)


scaler = StandardScaler()
sgd_reg = SGDRegressor(random_state=10)

for chunk in dataset:
    features = chunk[["OSM_ID", "LONGITUDE", "LATITUDE"]]
    target = chunk["ALTITUDE"]

    scaler.partial_fit(features)
    scaled_features = scaler.transform(features)

    sgd_reg.partial_fit(scaled_features, target)

sgd_reg


In [2]:
SGDRegressor(random_state=10)

In [10]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

iris = load_iris()

X_train, X_test, y_train, y_test = train_test_split(
    iris.data, iris.target, test_size=0.2, random_state=10)

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((120, 4), (30, 4), (120,), (30,))

In [11]:
from sklearn.preprocessing import Normalizer

normalizer = Normalizer()

X_train_norm = normalizer.fit_transform(X_train)
X_test_norm = normalizer.transform(X_test)

X_train_norm.shape, X_test_norm.shape

((120, 4), (30, 4))

In [12]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

k_values = [2, 3, 4]
scores = {}

for k in k_values:
   
    knn = KNeighborsClassifier(n_neighbors=k)   
    knn.fit(X_train_norm, y_train)    
    y_pred = knn.predict(X_test_norm)   
    score = accuracy_score(y_test, y_pred)    
    scores[k] = score

scores

{2: 0.9666666666666667, 3: 0.9666666666666667, 4: 0.9666666666666667}

In [13]:
from sklearn.metrics import f1_score

knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train_norm, y_train)
y_pred = knn.predict(X_test_norm)
f1 = f1_score(y_test, y_pred, average='weighted')

f1

0.9671111111111111

In [14]:
from sklearn import datasets
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn import svm

iris = datasets.load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

model = svm.SVC(gamma='auto', random_state=0)
param_grid = {'kernel': ['linear', 'rbf'], 'C': [1,15,25]}

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=4)
grid_search.fit(X_train, y_train)

best_score = grid_search.best_score_
best_score

0.9807692307692308

In [15]:
from sklearn.svm import SVC

data = pd.read_csv('Social_Network_Ads.csv')
X = data.drop('Purchased', axis=1)
y = data['Purchased']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

classifier = SVC(kernel='linear', random_state=0)
classifier.fit(X_train, y_train)

X_train, X_test, y_train, y_test

(array([[ 0.58164944, -0.88670699],
        [-0.60673761,  1.46173768],
        [-0.01254409, -0.5677824 ],
        [-0.60673761,  1.89663484],
        [ 1.37390747, -1.40858358],
        [ 1.47293972,  0.99784738],
        [ 0.08648817, -0.79972756],
        [-0.01254409, -0.24885782],
        [-0.21060859, -0.5677824 ],
        [-0.21060859, -0.19087153],
        [-0.30964085, -1.29261101],
        [-0.30964085, -0.5677824 ],
        [ 0.38358493,  0.09905991],
        [ 0.8787462 , -0.59677555],
        [ 2.06713324, -1.17663843],
        [ 1.07681071, -0.13288524],
        [ 0.68068169,  1.78066227],
        [-0.70576986,  0.56295021],
        [ 0.77971394,  0.35999821],
        [ 0.8787462 , -0.53878926],
        [-1.20093113, -1.58254245],
        [ 2.1661655 ,  0.93986109],
        [-0.01254409,  1.22979253],
        [ 0.18552042,  1.08482681],
        [ 0.38358493, -0.48080297],
        [-0.30964085, -0.30684411],
        [ 0.97777845, -0.8287207 ],
        [ 0.97777845,  1.867

In [16]:
y_pred = classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
accuracy

0.9

In [17]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, y_pred)
cm

array([[66,  2],
       [ 8, 24]], dtype=int64)

In [18]:
from sklearn.datasets import fetch_openml
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import classification_report

X, y = fetch_openml('mnist_784', version=1, return_X_y=True, as_frame=False)

X_train, X_test = X[:20000], X[20000:25000]
y_train, y_test = y[:20000], y[20000:25000]

pipe = make_pipeline(
    MinMaxScaler(),
    SVC(kernel='linear', decision_function_shape='ovr', class_weight=None)
)

pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)
print(classification_report(y_test, y_pred))


  warn(


              precision    recall  f1-score   support

           0       0.94      0.98      0.96       478
           1       0.95      0.98      0.97       568
           2       0.92      0.92      0.92       521
           3       0.91      0.90      0.90       516
           4       0.91      0.94      0.92       500
           5       0.88      0.88      0.88       460
           6       0.97      0.96      0.96       491
           7       0.91      0.96      0.94       504
           8       0.93      0.85      0.89       466
           9       0.92      0.88      0.90       496

    accuracy                           0.92      5000
   macro avg       0.92      0.92      0.92      5000
weighted avg       0.92      0.92      0.92      5000



In [19]:
import numpy as np
y_pred = pipe.predict(X_test)
conf_matrix = confusion_matrix(y_test, y_pred)
sum_diag = np.trace(conf_matrix)
print("Sum of diagonal elements:", sum_diag)

Sum of diagonal elements: 4623
