## Scikit-learn Project using Diabetes data
### Gotten from scikit-learn cheatsheet from scikit-learn platform

### 1. Loading the data

In [2]:
# Classification
from sklearn import datasets
X,y = datasets.load_wine(return_X_y=True)



In [3]:
# Regression
diabetes = datasets.load_diabetes()
X,y = diabetes.data, diabetes.target


### 2. Training and testing data

In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

### 3. Preprocessing the data

In [5]:
# Standardization
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaled_X_train = scaler.fit_transform(X_train)
scaled_X_test = scaler.fit_transform(X_test)

In [6]:
# Normalization
from sklearn.preprocessing import Normalizer

norm = Normalizer()
norm_X_train = norm.fit_transform(X_train)
norm_X_test = norm.transform(X_test)

In [7]:
# Binarization
from sklearn.preprocessing import Binarizer

binary = Binarizer(threshold=1.1)
binary_X = binary.fit_transform(X)

In [8]:
# Encoding categorical features

from sklearn.preprocessing import LabelEncoder

lab_enc = LabelEncoder()
y = lab_enc.fit_transform(y)

In [10]:
# Imputer
from sklearn.impute import SimpleImputer
import numpy as np

imp_mean = SimpleImputer(missing_values=np.nan, strategy='mean')
imp_mean.fit_transform(X_train)


array([[ 0.01264814,  0.05068012,  0.00241654, ..., -0.03949338,
         0.00370906,  0.07348023],
       [-0.10722563, -0.04464164, -0.07734155, ..., -0.0763945 ,
        -0.04257085, -0.0052198 ],
       [ 0.02717829,  0.05068012, -0.03530688, ..., -0.00259226,
        -0.01495969, -0.05078298],
       ...,
       [ 0.05987114, -0.04464164, -0.02129532, ...,  0.07120998,
         0.07912244,  0.13561183],
       [-0.07816532, -0.04464164, -0.0730303 , ..., -0.03949338,
        -0.01811369, -0.08391984],
       [ 0.04170844,  0.05068012,  0.07139652, ...,  0.03430886,
         0.07340696,  0.08590655]])

### 3. Supervised Learning Model

In [11]:
# Linear Regression
from sklearn.linear_model import LinearRegression
lr = LinearRegression()


In [12]:
# Support Vector Machines
from sklearn.svm import SVC
svm_svc = SVC(kernel='linear', C=1, random_state=42)

In [13]:
# Naive Bayes
from sklearn.naive_bayes import GaussianNB

gbn = GaussianNB()

### 4. Unsupervised Learning Model 

In [14]:

# Principal Component Analysis (PCA)
from sklearn.decomposition import PCA
pca = PCA(n_components=2)

In [15]:
# K-Means Clustering
from sklearn.cluster import KMeans
kmeans = KMeans(n_clusters=5, random_state=0)

### 5. Model Fitting

In [16]:
# Supervised learning
lr.fit(X_train, y_train)
svm_svc.fit(X_train, y_train)

In [17]:
# Unsupervised learning
model = pca.fit_transform(X_train)
kmeans.fit(X_train)

### 6. Prediction

In [19]:
# Supervised learning
y_pred = lr.predict(X_test)
y_pred = svm_svc.predict(X_test)

In [20]:
# Unsupervised learning
y_pred = kmeans.predict(X_test)

### 6. Evaluation

In [21]:
# Accuracy score
lr.score(X_test, y_test)

from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

0.0

In [23]:
# Classification report
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00       0.0
         1.0       0.00      0.00      0.00       0.0
         2.0       0.00      0.00      0.00       0.0
         3.0       0.00      0.00      0.00       0.0
         4.0       0.00      0.00      0.00       0.0
        47.0       0.00      0.00      0.00       1.0
        49.0       0.00      0.00      0.00       3.0
        52.0       0.00      0.00      0.00       1.0
        53.0       0.00      0.00      0.00       1.0
        57.0       0.00      0.00      0.00       1.0
        59.0       0.00      0.00      0.00       1.0
        61.0       0.00      0.00      0.00       1.0
        64.0       0.00      0.00      0.00       1.0
        67.0       0.00      0.00      0.00       1.0
        68.0       0.00      0.00      0.00       1.0
        74.0       0.00      0.00      0.00       1.0
        75.0       0.00      0.00      0.00       1.0
        84.0       0.00    

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [24]:
# Mean squared error
from sklearn.metrics import mean_squared_error
mean_squared_error(y_test, y_pred)


28309.696629213482

In [25]:
# R2 score 
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

-4.520690225664238

In [26]:
# Adjusted Rand Index 
from sklearn.metrics import adjusted_rand_score 
adjusted_rand_score(y_test, y_pred)

0.0006963858447657236

### 7. Cross-Validation

In [27]:
from sklearn.model_selection import cross_val_score
cross_val_score(lr, X, y, cv=5, scoring='f1_macro')

Traceback (most recent call last):
  File "/home/prigii/.local/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 137, in __call__
    score = scorer._score(
  File "/home/prigii/.local/lib/python3.10/site-packages/sklearn/metrics/_scorer.py", line 350, in _score
    return self._sign * self._score_func(y_true, y_pred, **scoring_kwargs)
  File "/home/prigii/.local/lib/python3.10/site-packages/sklearn/utils/_param_validation.py", line 213, in wrapper
    return func(*args, **kwargs)
  File "/home/prigii/.local/lib/python3.10/site-packages/sklearn/metrics/_classification.py", line 1271, in f1_score
    return fbeta_score(
  File "/home/prigii/.local/lib/python3.10/site-packages/sklearn/utils/_param_validation.py", line 186, in wrapper
    return func(*args, **kwargs)
  File "/home/prigii/.local/lib/python3.10/site-packages/sklearn/metrics/_classification.py", line 1463, in fbeta_score
    _, _, f, _ = precision_recall_fscore_support(
  File "/home/prigii/.local/lib/python3.10

array([nan, nan, nan, nan, nan])

### 8. Model Tuning

In [28]:
from sklearn.model_selection import GridSearchCV

parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}
model = GridSearchCV(SVC(), parameters)
model.fit(X_train, y_train)
print(model.best_score__)
print(model.best_estimator_)



AttributeError: 'GridSearchCV' object has no attribute 'best_score__'