In [75]:
from sklearn.cluster import DBSCAN
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [76]:
data = pd.read_csv("insurance.csv")
data.head(2)

Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523


In [77]:
#___ "DBSCAN works only with numeric datasets" ___#
data = data.drop('sex', axis = 1)
data = data.drop('smoker', axis = 1)
data = data.drop('region', axis = 1)

In [78]:
data.fillna(method ='ffill', inplace = True)

In [79]:
data.head(2)

Unnamed: 0,age,bmi,children,charges
0,19,27.9,0,16884.924
1,18,33.77,1,1725.5523


# **preprocessing the data**

In [62]:
# Scaling the data to bring all the attributes to a comparable level
from sklearn.preprocessing import StandardScaler, normalize

# Separating out the features
x = data.iloc[:, :-1].values

# Separating out the target
y = data.iloc[:, -1].values

# Standardizing the features
scaler = StandardScaler()
x_scaled = scaler.fit_transform(x)

# Normalizing the data so that
# the data approximately follows a Gaussian distribution
x_normalized = normalize(x_scaled)

# Converting the numpy array into a pandas DataFrame
x_normalized = pd.DataFrame(x_normalized)

# **DBSCAN**

In [81]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 0)

In [73]:
clc = DBSCAN(eps = 0.5, metric = "euclidean", min_samples = 5, algorithm = "auto")

In [82]:
y_pred_train = clc.fit_predict(x_train)
y_pred_test = clc.fit_predict(x_test)

In [83]:
print(x_train)

[[37.   34.1   4.  ]
 [18.   34.43  0.  ]
 [23.   36.67  2.  ]
 ...
 [40.   25.08  0.  ]
 [19.   35.53  0.  ]
 [33.   18.5   1.  ]]


In [84]:
print(y_train)

[40182.246   1137.4697 38511.6283 ...  5415.6612  1646.4297  4766.022 ]


In [85]:
print(clc.labels_)

[-1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1
 -1 -1 -1 -1]
