### Test of dual_annealing  

#### Import libraries  

In [None]:
import numpy as np
import pandas as pd
from random import uniform
from scipy.optimize import dual_annealing
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor
import seaborn as sns

### Read csv
dataset from [Medical Cost Personal Datasets
](https://www.kaggle.com/mirichoi0218/insurance)
#### Description
##### Context
Machine Learning with R by Brett Lantz is a book that provides an introduction to machine learning using R. As far as I can tell, Packt Publishing does not make its datasets available online unless you buy the book and create a user account which can be a problem if you are checking the book out from the library or borrowing the book from a friend. All of these datasets are in the public domain but simply needed some cleaning up and recoding to match the format in the book.

In [None]:
df = pd.read_csv("../input/insurance.csv")

### Show dataset
#### Columns - age: age of primary beneficiary

- sex: insurance contractor gender, female, male

- bmi: Body mass index, providing an understanding of body, weights that are relatively high or low relative to height, objective index of body weight (kg / m ^ 2) using the ratio of height to weight, ideally 18.5 to 24.9

- children: Number of children covered by health insurance / Number of dependents

- smoker: Smoking

- region: the beneficiary's residential area in the US, northeast, southeast, southwest, northwest.

- charges: Individual medical costs billed by health insurance

In [None]:
display(df.shape)
display(df.info())
display(df.head())
display(df["region"].value_counts())

### Show pairplot

In [None]:
sns.pairplot(df)
plt.show()

### Get dummy var
The columns of sex and smoker, region have to convert dummy variance because of its category variance.
sex male:1 female:0  
smoke yes:1 no:0  
region northeast:0 northwest:1 southeast:2 southwest:3

In [None]:
df["region"] = pd.Categorical(df["region"])
df["code"] = df["region"].cat.codes
sex_male = pd.get_dummies(df["sex"], drop_first=True)
smoke_yes = pd.get_dummies(df["smoker"], drop_first=True)
df_concat = pd.concat([df, sex_male, smoke_yes], axis=1)

In [None]:
sns.pairplot(df_concat)
plt.show()

### Split dataset into X, y

In [None]:
X = df_concat[["age", "bmi", "children", "male", "yes", "code"]]
y = df_concat["charges"]
display(X.shape)
display(X.head())
display(y.shape)
display(y.head())

### regression

In [None]:
model = sm.OLS(y, sm.add_constant(X))
result = model.fit()
display(result.summary())

In [None]:
print(result.params)

#### Cost function  
Known as Ackley function  

In [None]:
def ackley2d(x):
    total = result.params[0]
    s1 = np.sum([x[i]*result.params[i+1] for i in range(len(x))])
    total += s1
    # or
    # total = 20.0
    # s1 = np.sum(x**2) / 2
    # total -= 20.0 * np.exp(-0.2 * s1**0.5)
    # s2 = np.sum(np.cos(2*np.pi*x))
    # total -= np.exp(s2 / 2)
    return total

#### Set bounds (lower, upper) for each x[i]  

In [None]:
bounds = [(X[i].min(), X[i].max()) for i in X.columns]

In [None]:
bounds

#### Multiple trial and check obtained minima  

In [None]:
n_trial = 10

In [None]:
x = np.zeros(6)
for i in range(n_trial):
    # Initial value
    x[0] = uniform(bounds[0][0], bounds[0][1])
    x[1] = uniform(bounds[1][0], bounds[1][1])
    x[2] = uniform(bounds[2][0], bounds[2][1])
    x[3] = uniform(bounds[3][0], bounds[3][1])
    x[4] = uniform(bounds[4][0], bounds[4][1])
    x[5] = uniform(bounds[5][0], bounds[5][1])
#    x[6] = uniform(bounds[6][0], bounds[6][1])
#    x[7] = uniform(bounds[7][0], bounds[7][1])
#    x[8] = uniform(bounds[8][0], bounds[8][1])
    print(x)  # debug
    
    # Dual annealing optimization
    ret = dual_annealing(ackley2d, bounds, x0=x, maxiter=500)
    print('x:', ret.x)
    print('f(x):', ret.fun)